diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..0cef9ae4 --- /dev/null +++ b/.clang-format @@ -0,0 +1,66 @@ +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: false +AlignOperands: true +AlignTrailingComments: true +AllowShortBlocksOnASingleLine: Empty +AllowShortCaseLabelsOnASingleLine: false +AllowShortEnumsOnASingleLine: true +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: false +AllowShortLambdasOnASingleLine: Inline +AlwaysBreakTemplateDeclarations: true +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: true + AfterClass: true + AfterControlStatement: Always + AfterEnum: true + AfterExternBlock: true + AfterFunction: true + AfterNamespace: true + AfterStruct: true + AfterUnion: true + BeforeElse: true + BeforeWhile: true + SplitEmptyFunction: false +BreakBeforeBraces: Custom +BreakBeforeTernaryOperators: true +ColumnLimit: 0 +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +IndentWidth: 4 +KeepEmptyLinesAtTheStartOfBlocks: false +Language: Cpp +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: All +ObjCSpaceAfterProperty: false +PointerAlignment: Left +ReflowComments: true +SortIncludes: false +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceBeforeCtorInitializerColon: true +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInCStyleCastParentheses: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Latest +TabWidth: 4 +UseTab: Always diff --git a/.github/ISSUE_TEMPLATE/bug-report-feature-request.md b/.github/ISSUE_TEMPLATE/bug-report-feature-request.md deleted file mode 100644 index 05eebe54..00000000 --- a/.github/ISSUE_TEMPLATE/bug-report-feature-request.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -name: Bug Report / Feature Request -about: Tech support does not belong here. You should only file an issue here if you think you have experienced an actual bug with Cemu or you are requesting a feature you believe would make Cemu better. -title: '' -labels: '' -assignees: '' - ---- - - diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index d33d87ed..d71e22d0 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -2,4 +2,4 @@ blank_issues_enabled: false contact_links: - name: Cemu Discord url: https://discord.com/invite/5psYsup - about: If you are experiencing an issue with Cemu, and you need tech support, or if you have a general question, try asking in the official Cemu Discord linked here. Piracy is not allowed. + about: If you need technical support with Cemu or have other questions the best place to ask is on the official Cemu Discord linked here diff --git a/.github/ISSUE_TEMPLATE/emulation_bug_report.yaml b/.github/ISSUE_TEMPLATE/emulation_bug_report.yaml new file mode 100644 index 00000000..75928607 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/emulation_bug_report.yaml @@ -0,0 +1,69 @@ +# Docs - https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema +name: Bug Report +description: Report an issue with Cemu emulator +title: "Enter a title for the bug report here" +labels: bug +body: + - type: markdown + id: md_readme + attributes: + value: | + ## Important: Read First + + If you discovered a bug you can report it here. Please make sure of the following first: + - That you are using the latest version of Cemu + - Only report something if you are sure it's a bug and not any technical issue on your end. For troubleshooting help see the [links page](https://github.com/cemu-project/Cemu#links) + - Problems specific to a single game should be reported on the [compatibility wiki](https://wiki.cemu.info/wiki/Main_Page) instead + - Verify that your problem isn't already mentioned on the [issue tracker](https://github.com/cemu-project/Cemu/issues) + + Additionally, be aware that graphic packs can also causes issues. There is a separate issue tracker for graphic pack bugs over at the [graphic pack repository](https://github.com/cemu-project/cemu_graphic_packs) + - type: textarea + id: current_behavior + attributes: + label: Current Behavior + description: "What the bug is, in a brief description" + validations: + required: true + - type: textarea + id: expected_behavior + attributes: + label: Expected Behavior + description: "What did you expect to happen?" + validations: + required: true + + - type: textarea + id: steps_to_reproduce + attributes: + label: Steps to Reproduce + description: "How to reproduce the issue" + validations: + required: true + - type: textarea + id: sys_info + attributes: + label: System Info (Optional) + description: "Your PC specifications. Usually only the operating system and graphics card is important. But feel free to add more info." + placeholder: | + Info + OS: Windows 10 + GPU: NVIDIA GeForce RTX 4090 + value: | + OS: + GPU: + - type: textarea + id: emulation_settings + attributes: + label: Emulation Settings (Optional) + description: | + Any non-default settings. You can leave this empty if you didn't change anything other than input settings. + validations: + required: false + - type: textarea + id: logs_files + attributes: + label: "Logs (Optional)" + description: | + "Attach `log.txt` from your Cemu folder (*File > Open Cemu folder*)". + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/feature_report.yaml b/.github/ISSUE_TEMPLATE/feature_report.yaml new file mode 100644 index 00000000..a5d8705c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_report.yaml @@ -0,0 +1,28 @@ +# Docs - https://docs.github.com/en/communities/using-templates-to-encourage-useful-issues-and-pull-requests/syntax-for-githubs-form-schema +name: Feature suggestion +description: Suggest a new feature +title: "Enter a title for the suggestion here" +labels: feature request +body: + - type: markdown + id: md_readme + attributes: + value: | + ## Important: Read First + + While we appreciate suggestions, it is important to note that we are a very small team and there are already many more ideas than we could ever implement in the near future. Therefore, please only suggest something if you believe it is a great addition and the idea is reasonably unique. + + *Avoid* to create suggestions for: + - Overly obvious features ("Game xyz does not work and should be fixed", "Wiimote support should be improved", "You should add an Android port", "Copy feature xyz from another emulator", "A button to pause/stop emulation") + - Niche features which are only interesting to a tiny percentage of users + - Large scale features ("Add a Metal backend for MacOS", "Add ARM support", "Add savestates") + + Note that this doesn't mean we aren't interested in these ideas, but rather we likely have them planned anyway and it's mostly up to finding the time to implement them. + If you believe your idea is worthwhile even if it doesn't meet all the criteria above, you can still try suggesting it but we might close it. + - type: textarea + id: idea_suggestion + attributes: + label: Your suggestion + description: "Describe what your suggestion is in as much detail as possible" + validations: + required: true diff --git a/.github/getversion.cpp b/.github/getversion.cpp deleted file mode 100644 index 469a796e..00000000 --- a/.github/getversion.cpp +++ /dev/null @@ -1,9 +0,0 @@ -#include -#include "./../src/Common/version.h" - -// output current Cemu version for CI workflow. Do not modify -int main() -{ - printf("%d.%d", EMULATOR_VERSION_LEAD, EMULATOR_VERSION_MAJOR); - return 0; -} diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 362cf3bc..e798c1a7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -3,10 +3,10 @@ name: Build Cemu on: workflow_call: inputs: - deploymode: + next_version_major: required: false type: string - experimentalversion: + next_version_minor: required: false type: string @@ -16,40 +16,40 @@ env: jobs: build-ubuntu: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: "Checkout repo" - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: submodules: "recursive" - - - name: Setup release mode parameters (for deploy) - if: ${{ inputs.deploymode == 'release' }} + fetch-depth: 0 + + - name: Setup release mode parameters run: | echo "BUILD_MODE=release" >> $GITHUB_ENV - echo "BUILD_FLAGS=-DPUBLIC_RELEASE=ON" >> $GITHUB_ENV - echo "Build mode is release" - - name: Setup debug mode parameters (for continous build) - if: ${{ inputs.deploymode != 'release' }} - run: | - echo "BUILD_MODE=debug" >> $GITHUB_ENV echo "BUILD_FLAGS=" >> $GITHUB_ENV - echo "Build mode is debug" - - - name: Setup version for experimental - if: ${{ inputs.experimentalversion != '' }} + echo "Build mode is release" + + - name: Setup build flags for version + if: ${{ inputs.next_version_major != '' }} run: | - echo "[INFO] Experimental version ${{ inputs.experimentalversion }}" - echo "BUILD_FLAGS=${{ env.BUILD_FLAGS }} -DEXPERIMENTAL_VERSION=${{ inputs.experimentalversion }}" >> $GITHUB_ENV - + echo "[INFO] Version ${{ inputs.next_version_major }}.${{ inputs.next_version_minor }}" + echo "BUILD_FLAGS=${{ env.BUILD_FLAGS }} -DEMULATOR_VERSION_MAJOR=${{ inputs.next_version_major }} -DEMULATOR_VERSION_MINOR=${{ inputs.next_version_minor }}" >> $GITHUB_ENV + - name: "Install system dependencies" run: | sudo apt update -qq - sudo apt install -y ninja-build cmake libgtk-3-dev libsecret-1-dev libgcrypt20-dev libsystemd-dev freeglut3-dev clang-12 nasm + sudo apt install -y clang-15 cmake freeglut3-dev libgcrypt20-dev libglm-dev libgtk-3-dev libpulse-dev libsecret-1-dev libsystemd-dev libudev-dev nasm ninja-build libbluetooth-dev + + - name: "Setup cmake" + uses: jwlawson/actions-setup-cmake@v2 + with: + cmake-version: '3.29.0' + - name: "Bootstrap vcpkg" run: | bash ./dependencies/vcpkg/bootstrap-vcpkg.sh - + - name: 'Setup NuGet Credentials for vcpkg' shell: 'bash' run: | @@ -66,89 +66,74 @@ jobs: - name: "cmake" run: | - mkdir -p build - cd build - cmake .. ${{ env.BUILD_FLAGS }} -DCMAKE_BUILD_TYPE=${{ env.BUILD_MODE }} -DCMAKE_C_COMPILER=/usr/bin/clang-12 -DCMAKE_CXX_COMPILER=/usr/bin/clang++-12 -G Ninja -DCMAKE_MAKE_PROGRAM=/usr/bin/ninja + cmake -S . -B build ${{ env.BUILD_FLAGS }} -DCMAKE_BUILD_TYPE=${{ env.BUILD_MODE }} -DCMAKE_C_COMPILER=/usr/bin/clang-15 -DCMAKE_CXX_COMPILER=/usr/bin/clang++-15 -G Ninja -DCMAKE_MAKE_PROGRAM=/usr/bin/ninja - name: "Build Cemu" run: | - cd build - ninja - + cmake --build build + - name: Prepare artifact - if: ${{ inputs.deploymode == 'release' }} run: mv bin/Cemu_release bin/Cemu - name: Upload artifact - uses: actions/upload-artifact@v3 - if: ${{ inputs.deploymode == 'release' }} + uses: actions/upload-artifact@v4 with: name: cemu-bin-linux-x64 path: ./bin/Cemu - - + + build-appimage: + runs-on: ubuntu-22.04 + needs: build-ubuntu + steps: + - name: Checkout Upstream Repo + uses: actions/checkout@v4 + + - uses: actions/download-artifact@v4 + with: + name: cemu-bin-linux-x64 + path: bin + + - name: "Install system dependencies" + run: | + sudo apt update -qq + sudo apt install -y clang-15 cmake freeglut3-dev libgcrypt20-dev libglm-dev libgtk-3-dev libpulse-dev libsecret-1-dev libsystemd-dev nasm ninja-build appstream libbluetooth-dev + + - name: "Build AppImage" + run: | + export LD_LIBRARY_PATH="/usr/local/lib:$LD_LIBRARY_PATH" + export DEPLOY_GTK_VERSION=3 + dist/linux/appimage.sh + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: cemu-appimage-x64 + path: artifacts + build-windows: runs-on: windows-2022 steps: - name: "Checkout repo" - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: submodules: "recursive" - - - name: Setup release mode parameters (for deploy) - if: ${{ inputs.deploymode == 'release' }} + + - name: Setup release mode parameters run: | echo "BUILD_MODE=release" | Out-File -FilePath $Env:GITHUB_ENV -Encoding utf8 -Append - echo "BUILD_FLAGS=-DPUBLIC_RELEASE=ON" | Out-File -FilePath $Env:GITHUB_ENV -Encoding utf8 -Append - echo "Build mode is release" - - - name: Setup debug mode parameters (for continous build) - if: ${{ inputs.deploymode != 'release' }} - run: | - echo "BUILD_MODE=debug" | Out-File -FilePath $Env:GITHUB_ENV -Encoding utf8 -Append echo "BUILD_FLAGS=" | Out-File -FilePath $Env:GITHUB_ENV -Encoding utf8 -Append - echo "Build mode is debug" - - name: Setup version for experimental - if: ${{ inputs.experimentalversion != '' }} + echo "Build mode is release" + + - name: Setup build flags for version + if: ${{ inputs.next_version_major != '' }} run: | - echo "[INFO] Experimental version ${{ inputs.experimentalversion }}" - echo "BUILD_FLAGS=${{ env.BUILD_FLAGS }} -DEXPERIMENTAL_VERSION=${{ inputs.experimentalversion }}" | Out-File -FilePath $Env:GITHUB_ENV -Encoding utf8 -Append - - - name: Workaround - run: | - Set-Location "C:\Program Files (x86)\Microsoft Visual Studio\Installer\" - $InstallPath = "C:\Program Files\Microsoft Visual Studio\2022\Enterprise" - $componentsToRemove= @( - "Microsoft.VisualStudio.Component.VC.14.32.17.2.ARM" - "Microsoft.VisualStudio.Component.VC.14.32.17.2.ARM.Spectre" - "Microsoft.VisualStudio.Component.VC.14.32.17.2.ARM64" - "Microsoft.VisualStudio.Component.VC.14.32.17.2.ARM64.Spectre" - "Microsoft.VisualStudio.Component.VC.14.32.17.2.x86.x64" - "Microsoft.VisualStudio.Component.VC.14.32.17.2.x86.x64.Spectre" - "Microsoft.VisualStudio.Component.VC.14.32.17.2.ATL" - "Microsoft.VisualStudio.Component.VC.14.32.17.2.ATL.Spectre" - "Microsoft.VisualStudio.Component.VC.14.32.17.2.ATL.ARM" - "Microsoft.VisualStudio.Component.VC.14.32.17.2.ATL.ARM.Spectre" - "Microsoft.VisualStudio.Component.VC.14.32.17.2.ATL.ARM64" - "Microsoft.VisualStudio.Component.VC.14.32.17.2.ATL.ARM64.Spectre" - "Microsoft.VisualStudio.Component.VC.14.32.17.2.MFC" - "Microsoft.VisualStudio.Component.VC.14.32.17.2.MFC.Spectre" - "Microsoft.VisualStudio.Component.VC.14.32.17.2.MFC.ARM" - "Microsoft.VisualStudio.Component.VC.14.32.17.2.MFC.ARM.Spectre" - "Microsoft.VisualStudio.Component.VC.14.32.17.2.MFC.ARM64" - "Microsoft.VisualStudio.Component.VC.14.32.17.2.MFC.ARM64.Spectre" - ) - [string]$workloadArgs = $componentsToRemove | ForEach-Object {" --remove " + $_} - $Arguments = ('/c', "vs_installer.exe", 'modify', '--installPath', "`"$InstallPath`"",$workloadArgs, '--quiet', '--norestart', '--nocache') - # should be run twice - $process = Start-Process -FilePath cmd.exe -ArgumentList $Arguments -Wait -PassThru -WindowStyle Hidden - $process = Start-Process -FilePath cmd.exe -ArgumentList $Arguments -Wait -PassThru -WindowStyle Hidden - - name: Configure MSVC - uses: ilammy/msvc-dev-cmd@v1 + echo "[INFO] Version ${{ inputs.next_version_major }}.${{ inputs.next_version_minor }}" + echo "BUILD_FLAGS=${{ env.BUILD_FLAGS }} -DEMULATOR_VERSION_MAJOR=${{ inputs.next_version_major }} -DEMULATOR_VERSION_MINOR=${{ inputs.next_version_minor }}" | Out-File -FilePath $Env:GITHUB_ENV -Encoding utf8 -Append + + - name: "Setup cmake" + uses: jwlawson/actions-setup-cmake@v2 with: - arch: amd64 - toolset: 14.33.31629 - spectre: false + cmake-version: '3.29.0' - name: "Bootstrap vcpkg" run: | @@ -174,20 +159,108 @@ jobs: cd build echo "[INFO] BUILD_FLAGS: ${{ env.BUILD_FLAGS }}" echo "[INFO] BUILD_MODE: ${{ env.BUILD_MODE }}" - cmake .. ${{ env.BUILD_FLAGS }} -DCMAKE_BUILD_TYPE=${{ env.BUILD_MODE }} + cmake .. ${{ env.BUILD_FLAGS }} -DCMAKE_BUILD_TYPE=${{ env.BUILD_MODE }} -DVCPKG_INSTALL_OPTIONS="--clean-after-build" - name: "Build Cemu" run: | cd build - cmake --build . --config ${{ env.BUILD_MODE }} -j 2 + cmake --build . --config ${{ env.BUILD_MODE }} - name: Prepare artifact - if: ${{ inputs.deploymode == 'release' }} run: Rename-Item bin/Cemu_release.exe Cemu.exe - name: Upload artifact - uses: actions/upload-artifact@v3 - if: ${{ inputs.deploymode == 'release' }} + uses: actions/upload-artifact@v4 with: name: cemu-bin-windows-x64 path: ./bin/Cemu.exe + + build-macos: + runs-on: macos-14 + strategy: + matrix: + arch: [x86_64, arm64] + steps: + - name: "Checkout repo" + uses: actions/checkout@v4 + with: + submodules: "recursive" + + - name: Setup release mode parameters + run: | + echo "BUILD_MODE=release" >> $GITHUB_ENV + echo "BUILD_FLAGS=" >> $GITHUB_ENV + echo "Build mode is release" + + - name: Setup build flags for version + if: ${{ inputs.next_version_major != '' }} + run: | + echo "[INFO] Version ${{ inputs.next_version_major }}.${{ inputs.next_version_minor }}" + echo "BUILD_FLAGS=${{ env.BUILD_FLAGS }} -DEMULATOR_VERSION_MAJOR=${{ inputs.next_version_major }} -DEMULATOR_VERSION_MINOR=${{ inputs.next_version_minor }}" >> $GITHUB_ENV + + - name: "Install system dependencies" + run: | + brew update + brew install ninja nasm automake libtool + + - name: "Install molten-vk" + run: | + curl -L -O https://github.com/KhronosGroup/MoltenVK/releases/download/v1.3.0/MoltenVK-macos.tar + tar xf MoltenVK-macos.tar + sudo mkdir -p /usr/local/lib + sudo cp MoltenVK/MoltenVK/dynamic/dylib/macOS/libMoltenVK.dylib /usr/local/lib + + - name: "Setup cmake" + uses: jwlawson/actions-setup-cmake@v2 + with: + cmake-version: '3.29.0' + + - name: "Bootstrap vcpkg" + run: | + bash ./dependencies/vcpkg/bootstrap-vcpkg.sh + + - name: 'Setup NuGet Credentials for vcpkg' + shell: 'bash' + run: | + mono `./dependencies/vcpkg/vcpkg fetch nuget | tail -n 1` \ + sources add \ + -source "https://nuget.pkg.github.com/${{ github.repository_owner }}/index.json" \ + -storepasswordincleartext \ + -name "GitHub" \ + -username "${{ github.repository_owner }}" \ + -password "${{ secrets.GITHUB_TOKEN }}" + mono `./dependencies/vcpkg/vcpkg fetch nuget | tail -n 1` \ + setapikey "${{ secrets.GITHUB_TOKEN }}" \ + -source "https://nuget.pkg.github.com/${{ github.repository_owner }}/index.json" + + - name: "cmake" + run: | + mkdir build + cd build + cmake .. ${{ env.BUILD_FLAGS }} \ + -DCMAKE_BUILD_TYPE=${{ env.BUILD_MODE }} \ + -DCMAKE_OSX_ARCHITECTURES=${{ matrix.arch }} \ + -DMACOS_BUNDLE=ON \ + -G Ninja + + - name: "Build Cemu" + run: | + cmake --build build + + - name: Prepare artifact + run: | + mkdir bin/Cemu_app + mv bin/Cemu_release.app bin/Cemu_app/Cemu.app + mv bin/Cemu_app/Cemu.app/Contents/MacOS/Cemu_release bin/Cemu_app/Cemu.app/Contents/MacOS/Cemu + sed -i '' 's/Cemu_release/Cemu/g' bin/Cemu_app/Cemu.app/Contents/Info.plist + chmod a+x bin/Cemu_app/Cemu.app/Contents/MacOS/{Cemu,update.sh} + ln -s /Applications bin/Cemu_app/Applications + hdiutil create ./bin/tmp.dmg -ov -volname "Cemu" -fs HFS+ -srcfolder "./bin/Cemu_app" + hdiutil convert ./bin/tmp.dmg -format UDZO -o bin/Cemu.dmg + rm bin/tmp.dmg + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: cemu-bin-macos-${{ matrix.arch }} + path: ./bin/Cemu.dmg diff --git a/.github/workflows/build_check.yml b/.github/workflows/build_check.yml index 49ef79e9..5d24b0c6 100644 --- a/.github/workflows/build_check.yml +++ b/.github/workflows/build_check.yml @@ -16,6 +16,3 @@ on: jobs: build: uses: ./.github/workflows/build.yml - with: - deploymode: release - experimentalversion: 999999 diff --git a/.github/workflows/deploy_experimental_release.yml b/.github/workflows/deploy_experimental_release.yml deleted file mode 100644 index afe3dee7..00000000 --- a/.github/workflows/deploy_experimental_release.yml +++ /dev/null @@ -1,66 +0,0 @@ -name: Deploy experimental release -on: - workflow_dispatch: - -jobs: - call-release-build: - uses: ./.github/workflows/build.yml - with: - deploymode: release - experimentalversion: ${{ github.run_number }} - deploy: - name: Deploy experimental release - runs-on: ubuntu-20.04 - needs: call-release-build - steps: - - uses: actions/checkout@v2 - - - uses: actions/download-artifact@v3 - with: - name: cemu-bin-linux-x64 - path: cemu-bin-linux-x64 - - - uses: actions/download-artifact@v3 - with: - name: cemu-bin-windows-x64 - path: cemu-bin-windows-x64 - - - name: Initialize - run: | - mkdir upload - sudo apt install zip - - - name: Get version - run: | - echo "Experimental version: ${{ github.run_number }}" - ls - gcc -o getversion .github/getversion.cpp - ./getversion - echo "Cemu CI version: $(./getversion)" - echo "CEMU_FOLDER_NAME=Cemu_$(./getversion)-${{ github.run_number }}" >> $GITHUB_ENV - echo "CEMU_VERSION=$(./getversion)-${{ github.run_number }}" >> $GITHUB_ENV - - - name: Create release from windows-bin - run: | - ls ./ - ls ./bin/ - cp -R ./bin ./${{ env.CEMU_FOLDER_NAME }} - mv cemu-bin-windows-x64/Cemu.exe ./${{ env.CEMU_FOLDER_NAME }}/Cemu.exe - zip -9 -r upload/cemu-${{ env.CEMU_VERSION }}-windows-x64.zip ${{ env.CEMU_FOLDER_NAME }} - rm -r ./${{ env.CEMU_FOLDER_NAME }} - - - name: Create release from linux-bin - run: | - ls ./ - ls ./bin/ - cp -R ./bin ./${{ env.CEMU_FOLDER_NAME }} - mv cemu-bin-linux-x64/Cemu ./${{ env.CEMU_FOLDER_NAME }}/Cemu - zip -9 -r upload/cemu-${{ env.CEMU_VERSION }}-ubuntu-20.04-x64.zip ${{ env.CEMU_FOLDER_NAME }} - rm -r ./${{ env.CEMU_FOLDER_NAME }} - - - name: Create release - run: | - wget -O ghr.tar.gz https://github.com/tcnksm/ghr/releases/download/v0.15.0/ghr_v0.15.0_linux_amd64.tar.gz - tar xvzf ghr.tar.gz; rm ghr.tar.gz - echo "[INFO] Release tag: v${{ env.CEMU_VERSION }}" - ghr_v0.15.0_linux_amd64/ghr -prerelease -t ${{ secrets.GITHUB_TOKEN }} -n "Cemu ${{ env.CEMU_VERSION }} (Experimental)" -b "Cemu experimental release - [changelog](https://cemu.info/changelog.html)" "v${{ env.CEMU_VERSION }}" ./upload diff --git a/.github/workflows/deploy_release.yml b/.github/workflows/deploy_release.yml new file mode 100644 index 00000000..2b9ee491 --- /dev/null +++ b/.github/workflows/deploy_release.yml @@ -0,0 +1,151 @@ +name: Deploy release +on: + workflow_dispatch: + inputs: + changelog0: + description: 'Enter the changelog lines for this release. Each line is a feature / bullet point. Do not use dash.' + required: true + type: string + changelog1: + description: 'Feature 2' + required: false + type: string + changelog2: + description: 'Feature 3' + required: false + type: string + changelog3: + description: 'Feature 4' + required: false + type: string + changelog4: + description: 'Feature 5' + required: false + type: string + changelog5: + description: 'Feature 6' + required: false + type: string + changelog6: + description: 'Feature 7' + required: false + type: string + changelog7: + description: 'Feature 8' + required: false + type: string + changelog8: + description: 'Feature 9' + required: false + type: string + changelog9: + description: 'Feature 10' + required: false + type: string + +jobs: + calculate-version: + name: Calculate Version + uses: ./.github/workflows/determine_release_version.yml + call-release-build: + uses: ./.github/workflows/build.yml + needs: calculate-version + with: + next_version_major: ${{ needs.calculate-version.outputs.next_version_major }} + next_version_minor: ${{ needs.calculate-version.outputs.next_version_minor }} + deploy: + name: Deploy release + runs-on: ubuntu-22.04 + needs: [call-release-build, calculate-version] + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Generate changelog + id: generate_changelog + run: | + CHANGELOG="" + if [ -n "${{ github.event.inputs.changelog0 }}" ]; then CHANGELOG="$CHANGELOG- ${{ github.event.inputs.changelog0 }}\n"; fi + if [ -n "${{ github.event.inputs.changelog1 }}" ]; then CHANGELOG="$CHANGELOG- ${{ github.event.inputs.changelog1 }}\n"; fi + if [ -n "${{ github.event.inputs.changelog2 }}" ]; then CHANGELOG="$CHANGELOG- ${{ github.event.inputs.changelog2 }}\n"; fi + if [ -n "${{ github.event.inputs.changelog3 }}" ]; then CHANGELOG="$CHANGELOG- ${{ github.event.inputs.changelog3 }}\n"; fi + if [ -n "${{ github.event.inputs.changelog4 }}" ]; then CHANGELOG="$CHANGELOG- ${{ github.event.inputs.changelog4 }}\n"; fi + if [ -n "${{ github.event.inputs.changelog5 }}" ]; then CHANGELOG="$CHANGELOG- ${{ github.event.inputs.changelog5 }}\n"; fi + if [ -n "${{ github.event.inputs.changelog6 }}" ]; then CHANGELOG="$CHANGELOG- ${{ github.event.inputs.changelog6 }}\n"; fi + if [ -n "${{ github.event.inputs.changelog7 }}" ]; then CHANGELOG="$CHANGELOG- ${{ github.event.inputs.changelog7 }}\n"; fi + if [ -n "${{ github.event.inputs.changelog8 }}" ]; then CHANGELOG="$CHANGELOG- ${{ github.event.inputs.changelog8 }}\n"; fi + if [ -n "${{ github.event.inputs.changelog9 }}" ]; then CHANGELOG="$CHANGELOG- ${{ github.event.inputs.changelog9 }}\n"; fi + echo -e "$CHANGELOG" + echo "RELEASE_BODY=$CHANGELOG" >> $GITHUB_ENV + - uses: actions/download-artifact@v4 + with: + name: cemu-bin-linux-x64 + path: cemu-bin-linux-x64 + + - uses: actions/download-artifact@v4 + with: + name: cemu-appimage-x64 + path: cemu-appimage-x64 + + - uses: actions/download-artifact@v4 + with: + name: cemu-bin-windows-x64 + path: cemu-bin-windows-x64 + + - uses: actions/download-artifact@v4 + with: + name: cemu-bin-macos-x64 + path: cemu-bin-macos-x64 + + - name: Initialize + run: | + mkdir upload + sudo apt install zip + + - name: Set version dependent vars + run: | + echo "Version: ${{ needs.calculate-version.outputs.next_version }}" + echo "CEMU_FOLDER_NAME=Cemu_${{ needs.calculate-version.outputs.next_version }}" + echo "CEMU_VERSION=${{ needs.calculate-version.outputs.next_version }}" + echo "CEMU_FOLDER_NAME=Cemu_${{ needs.calculate-version.outputs.next_version }}" >> $GITHUB_ENV + echo "CEMU_VERSION=${{ needs.calculate-version.outputs.next_version }}" >> $GITHUB_ENV + + - name: Create release from windows-bin + run: | + ls ./ + ls ./bin/ + cp -R ./bin ./${{ env.CEMU_FOLDER_NAME }} + mv cemu-bin-windows-x64/Cemu.exe ./${{ env.CEMU_FOLDER_NAME }}/Cemu.exe + zip -9 -r upload/cemu-${{ env.CEMU_VERSION }}-windows-x64.zip ${{ env.CEMU_FOLDER_NAME }} + rm -r ./${{ env.CEMU_FOLDER_NAME }} + + - name: Create appimage + run: | + VERSION=${{ env.CEMU_VERSION }} + echo "Cemu Version is $VERSION" + ls cemu-appimage-x64 + mv cemu-appimage-x64/Cemu-*-x86_64.AppImage upload/Cemu-$VERSION-x86_64.AppImage + + - name: Create release from linux-bin + run: | + ls ./ + ls ./bin/ + cp -R ./bin ./${{ env.CEMU_FOLDER_NAME }} + mv cemu-bin-linux-x64/Cemu ./${{ env.CEMU_FOLDER_NAME }}/Cemu + zip -9 -r upload/cemu-${{ env.CEMU_VERSION }}-ubuntu-22.04-x64.zip ${{ env.CEMU_FOLDER_NAME }} + rm -r ./${{ env.CEMU_FOLDER_NAME }} + + - name: Create release from macos-bin + run: cp cemu-bin-macos-x64/Cemu.dmg upload/cemu-${{ env.CEMU_VERSION }}-macos-12-x64.dmg + + - name: Create release + run: | + wget -O ghr.tar.gz https://github.com/tcnksm/ghr/releases/download/v0.15.0/ghr_v0.15.0_linux_amd64.tar.gz + tar xvzf ghr.tar.gz; rm ghr.tar.gz + echo "[INFO] Release tag: v${{ env.CEMU_VERSION }}" + CHANGELOG_UNESCAPED=$(printf "%s\n" "${{ env.RELEASE_BODY }}" | sed 's/\\n/\n/g') + RELEASE_BODY=$(printf "%s\n%s" \ + "**Changelog:**" \ + "$CHANGELOG_UNESCAPED") + ghr_v0.15.0_linux_amd64/ghr -draft -t ${{ secrets.GITHUB_TOKEN }} -n "Cemu ${{ env.CEMU_VERSION }}" -b "$RELEASE_BODY" "v${{ env.CEMU_VERSION }}" ./upload diff --git a/.github/workflows/deploy_stable_release.yml b/.github/workflows/deploy_stable_release.yml deleted file mode 100644 index 7167dcbe..00000000 --- a/.github/workflows/deploy_stable_release.yml +++ /dev/null @@ -1,69 +0,0 @@ -name: Create new release -on: - workflow_dispatch: - inputs: - PlaceholderInput: - description: PlaceholderInput - required: false -jobs: - call-release-build: - uses: ./.github/workflows/build.yml - with: - deploymode: release - deploy: - name: Deploy release - runs-on: ubuntu-20.04 - needs: call-release-build - steps: - - uses: actions/checkout@v2 - - - uses: actions/download-artifact@v3 - with: - name: cemu-bin-linux-x64 - path: cemu-bin-linux-x64 - - - uses: actions/download-artifact@v3 - with: - name: cemu-bin-windows-x64 - path: cemu-bin-windows-x64 - - - name: Initialize - run: | - mkdir upload - sudo apt update -qq - sudo apt install -y zip - - - name: Get Cemu release version - run: | - gcc -o getversion .github/getversion.cpp - echo "Cemu CI version: $(./getversion)" - echo "CEMU_FOLDER_NAME=Cemu_$(./getversion)" >> $GITHUB_ENV - echo "CEMU_VERSION=$(./getversion)" >> $GITHUB_ENV - - - name: Create appimage - run: | - echo "to do" - - - name: Create release from windows-bin - run: | - ls ./ - ls ./bin/ - cp -R ./bin ./${{ env.CEMU_FOLDER_NAME }} - mv cemu-bin-windows-x64/Cemu.exe ./${{ env.CEMU_FOLDER_NAME }}/Cemu.exe - zip -9 -r upload/cemu-${{ env.CEMU_VERSION }}-windows-x64.zip ${{ env.CEMU_FOLDER_NAME }} - rm -r ./${{ env.CEMU_FOLDER_NAME }} - - - name: Create release from ubuntu-bin - run: | - ls ./ - ls ./bin/ - cp -R ./bin ./${{ env.CEMU_FOLDER_NAME }} - mv cemu-bin-linux-x64/Cemu ./${{ env.CEMU_FOLDER_NAME }}/Cemu - zip -9 -r upload/cemu-${{ env.CEMU_VERSION }}-ubuntu-20.04-x64.zip ${{ env.CEMU_FOLDER_NAME }} - rm -r ./${{ env.CEMU_FOLDER_NAME }} - - - name: Create release - run: | - wget -O ghr.tar.gz https://github.com/tcnksm/ghr/releases/download/v0.15.0/ghr_v0.15.0_linux_amd64.tar.gz - tar xvzf ghr.tar.gz; rm ghr.tar.gz - ghr_v0.15.0_linux_amd64/ghr -t ${{ secrets.GITHUB_TOKEN }} -n "Cemu ${{ env.CEMU_VERSION }}" -b "Changelog:" v${{ env.CEMU_VERSION }} ./upload diff --git a/.github/workflows/determine_release_version.yml b/.github/workflows/determine_release_version.yml new file mode 100644 index 00000000..be606941 --- /dev/null +++ b/.github/workflows/determine_release_version.yml @@ -0,0 +1,74 @@ +name: Calculate Next Version from release history + +on: + workflow_dispatch: + workflow_call: + outputs: + next_version: + description: "The next semantic version" + value: ${{ jobs.calculate-version.outputs.next_version }} + next_version_major: + description: "The next semantic version (major)" + value: ${{ jobs.calculate-version.outputs.next_version_major }} + next_version_minor: + description: "The next semantic version (minor)" + value: ${{ jobs.calculate-version.outputs.next_version_minor }} + +jobs: + calculate-version: + runs-on: ubuntu-latest + outputs: + next_version: ${{ steps.calculate_next_version.outputs.next_version }} + next_version_major: ${{ steps.calculate_next_version.outputs.next_version_major }} + next_version_minor: ${{ steps.calculate_next_version.outputs.next_version_minor }} + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Get all releases + id: get_all_releases + run: | + # Fetch all releases and check for API errors + RESPONSE=$(curl -s -o response.json -w "%{http_code}" "https://api.github.com/repos/${{ github.repository }}/releases?per_page=100") + if [ "$RESPONSE" -ne 200 ]; then + echo "Failed to fetch releases. HTTP status: $RESPONSE" + cat response.json + exit 1 + fi + + # Extract and sort tags + ALL_TAGS=$(jq -r '.[].tag_name' response.json | grep -E '^v[0-9]+\.[0-9]+(-[0-9]+)?$' | sed 's/-.*//' | sort -V | tail -n 1) + + # Exit if no tags were found + if [ -z "$ALL_TAGS" ]; then + echo "No valid tags found." + exit 1 + fi + + echo "::set-output name=tag::$ALL_TAGS" + # echo "tag=$ALL_TAGS" >> $GITHUB_STATE + + - name: Calculate next semver minor + id: calculate_next_version + run: | + LATEST_VERSION=${{ steps.get_all_releases.outputs.tag }} + + # strip 'v' prefix and split into major.minor + LATEST_VERSION=${LATEST_VERSION//v/} + IFS='.' read -r -a VERSION_PARTS <<< "$LATEST_VERSION" + + MAJOR=${VERSION_PARTS[0]} + MINOR=${VERSION_PARTS[1]} + + # increment the minor version + MINOR=$((MINOR + 1)) + + NEXT_VERSION="${MAJOR}.${MINOR}" + + echo "Major: $MAJOR" + echo "Minor: $MINOR" + + echo "Next version: $NEXT_VERSION" + echo "::set-output name=next_version::$NEXT_VERSION" + echo "::set-output name=next_version_major::$MAJOR" + echo "::set-output name=next_version_minor::$MINOR" \ No newline at end of file diff --git a/.github/workflows/generate_pot.yml b/.github/workflows/generate_pot.yml index f2675574..b057d441 100644 --- a/.github/workflows/generate_pot.yml +++ b/.github/workflows/generate_pot.yml @@ -29,13 +29,13 @@ jobs: - name: "Generate POT file using xgettext" run: > find src -name *.cpp -o -name *.hpp -o -name *.h | - xargs xgettext --from-code=utf-8 - -k_ -kwxTRANSLATE -w 100 + xargs xgettext --from-code=utf-8 -w 100 + --keyword="_" --keyword="wxTRANSLATE" --keyword="wxPLURAL:1,2" --check=space-ellipsis --omit-header -o cemu.pot - name: Upload artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: POT file path: ./cemu.pot diff --git a/.gitignore b/.gitignore index 293864a8..67a268aa 100644 --- a/.gitignore +++ b/.gitignore @@ -13,9 +13,11 @@ *.out *.app .vs +.vscode .idea/ build/ +cmake-build-*/ out/ .cache/ bin/Cemu_* @@ -30,9 +32,15 @@ bin/Cemu_*.ilk bin/Cemu.exe.backup bin/mlc01/* bin/settings.xml +bin/network_services.xml bin/title_list_cache.xml bin/debugger/* bin/sdcard/* +bin/screenshots/* +bin/dump/* +bin/cafeLibs/* +bin/portable/* +bin/keys.txt !bin/shaderCache/info.txt bin/shaderCache/* @@ -43,3 +51,6 @@ bin/controllerProfiles/* bin/gameProfiles/* bin/graphicPacks/* + +# Ignore Finder view option files created by OS X +.DS_Store \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index dd32088b..8f9772d3 100644 --- a/.gitmodules +++ b/.gitmodules @@ -9,7 +9,15 @@ [submodule "dependencies/vcpkg"] path = dependencies/vcpkg url = https://github.com/microsoft/vcpkg - shallow = true + shallow = false [submodule "dependencies/Vulkan-Headers"] path = dependencies/Vulkan-Headers url = https://github.com/KhronosGroup/Vulkan-Headers + shallow = true +[submodule "dependencies/imgui"] + path = dependencies/imgui + url = https://github.com/ocornut/imgui + shallow = true +[submodule "dependencies/xbyak_aarch64"] + path = dependencies/xbyak_aarch64 + url = https://github.com/fujitsu/xbyak_aarch64 diff --git a/BUILD.md b/BUILD.md index 67f55a94..31c26531 100644 --- a/BUILD.md +++ b/BUILD.md @@ -1,59 +1,232 @@ -# Build instructions +# Build Instructions + +## Table of Contents + +- [Windows](#windows) +- [Linux](#linux) + - [Dependencies](#dependencies) + - [For Arch and derivatives:](#for-arch-and-derivatives) + - [For Debian, Ubuntu and derivatives](#for-debian-ubuntu-and-derivatives) + - [For Fedora and derivatives:](#for-fedora-and-derivatives) + - [Build Cemu](#build-cemu) + - [CMake and Clang](#cmake-and-clang) + - [GCC](#gcc) + - [Debug Build](#debug-build) + - [Troubleshooting Steps](#troubleshooting-steps) + - [Compiling Errors](#compiling-errors) + - [Building Errors](#building-errors) +- [macOS](#macos) + - [Installing brew](#installing-brew) + - [Installing Tool Dependencies](#installing-tool-dependencies) + - [Installing Library Dependencies](#installing-library-dependencies) + - [Build Cemu using CMake](#build-cemu-using-cmake) +- [Updating Cemu and source code](#updating-cemu-and-source-code) ## Windows Prerequisites: -- A recent version of Visual Studio 2022 (recommended but not required) with the following additional components: - - C++ CMake tools for Windows - - Windows 10/11 SDK - git +- A recent version of Visual Studio 2022 with the following additional components: + - C++ CMake tools for Windows + - Windows 10/11 SDK -Instructions: +Instructions for Visual Studio 2022: 1. Run `git clone --recursive https://github.com/cemu-project/Cemu` -2. Launch `Cemu/generate_vs_solution.bat`. - - If you installed VS to a custom location or use VS 2019, you may need to manually change the path inside the .bat file -3. Wait until it's done, then open `Cemu/build/Cemu.sln` in Visual Studio -4. Then build the solution and once finished you can run and debug it, or build it and check the /bin folder for the final Cemu_release.exe. +2. Open the newly created Cemu directory in Visual Studio using the "Open a local folder" option +3. In the menu select Project -> Configure CMake. Wait until it is done, this may take a long time +4. You can now build, run and debug Cemu -You can also skip steps 3-5 and open the root folder of the cloned repo directly in Visual Studio (as a folder) and use the built-in cmake support but be warned that cmake support in VS can be a bit finicky. +Any other IDE should also work as long as it has CMake and MSVC support. CLion and Visual Studio Code have been confirmed to work. ## Linux -To compile Cemu, a recent enough compiler and STL with C++20 support is required! clang-12 or higher is what we recommend. +To compile Cemu, a recent enough compiler and STL with C++20 support is required! Clang-15 or higher is what we recommend. -### Installing dependencies - -#### For Ubuntu and derivatives: -`sudo apt install -y git curl cmake ninja-build nasm libgtk-3-dev libsecret-1-dev libgcrypt20-dev libsystemd-dev freeglut3-dev libpulse-dev` -Additionally, for ubuntu 20.04 only: - - `sudo apt install -y clang-12` - - At step 3 while building, use - `cmake -S . -B build -DCMAKE_BUILD_TYPE=release -DCMAKE_C_COMPILER=/usr/bin/clang-12 -DCMAKE_CXX_COMPILER=/usr/bin/clang++-12 -G Ninja -DCMAKE_MAKE_PROGRAM=/usr/bin/ninja` +### Dependencies #### For Arch and derivatives: -`sudo pacman -S git cmake clang ninja nasm base-devel linux-headers gtk3 libsecret libgcrypt systemd freeglut zip libpulse` +`sudo pacman -S --needed base-devel bluez-libs clang cmake freeglut git glm gtk3 libgcrypt libpulse libsecret linux-headers llvm nasm ninja systemd unzip zip` + +#### For Debian, Ubuntu and derivatives: +`sudo apt install -y cmake curl clang-15 freeglut3-dev git libbluetooth-dev libgcrypt20-dev libglm-dev libgtk-3-dev libpulse-dev libsecret-1-dev libsystemd-dev libtool nasm ninja-build` + +You may also need to install `libusb-1.0-0-dev` as a workaround for an issue with the vcpkg hidapi package. + +At Step 3 in [Build Cemu using cmake and clang](#build-cemu-using-cmake-and-clang), use the following command instead: + `cmake -S . -B build -DCMAKE_BUILD_TYPE=release -DCMAKE_C_COMPILER=/usr/bin/clang-15 -DCMAKE_CXX_COMPILER=/usr/bin/clang++-15 -G Ninja -DCMAKE_MAKE_PROGRAM=/usr/bin/ninja` #### For Fedora and derivatives: -`sudo dnf install git cmake clang ninja-build nasm kernel-headers gtk3-devel libsecret-devel libgcrypt-devel systemd-devel freeglut-devel perl-core zlib-devel cubeb-devel` +`sudo dnf install bluez-libs-devel clang cmake cubeb-devel freeglut-devel git glm-devel gtk3-devel kernel-headers libgcrypt-devel libsecret-devel libtool libusb1-devel llvm nasm ninja-build perl-core systemd-devel wayland-protocols-devel zlib-devel zlib-static` + +### Build Cemu + +#### CMake and Clang + +``` +git clone --recursive https://github.com/cemu-project/Cemu +cd Cemu +cmake -S . -B build -DCMAKE_BUILD_TYPE=release -DCMAKE_C_COMPILER=/usr/bin/clang -DCMAKE_CXX_COMPILER=/usr/bin/clang++ -G Ninja +cmake --build build +``` + +#### GCC + +If you are building using GCC, make sure you have g++ installed: +- Installation for Arch and derivatives: `sudo pacman -S gcc` +- Installation for Debian, Ubuntu and derivatives: `sudo apt install g++` +- Installation for Fedora and derivatives: `sudo dnf install gcc-c++` + +``` +git clone --recursive https://github.com/cemu-project/Cemu +cd Cemu +cmake -S . -B build -DCMAKE_BUILD_TYPE=release -DCMAKE_C_COMPILER=/usr/bin/gcc -DCMAKE_CXX_COMPILER=/usr/bin/g++ -G Ninja +cmake --build build +``` + +#### Debug Build + +``` +git clone --recursive https://github.com/cemu-project/Cemu +cd Cemu +cmake -S . -B build -DCMAKE_BUILD_TYPE=debug -DCMAKE_C_COMPILER=/usr/bin/clang -DCMAKE_CXX_COMPILER=/usr/bin/clang++ -G Ninja +cmake --build build +``` + +If you are using GCC, replace `cmake -S . -B build -DCMAKE_BUILD_TYPE=debug -DCMAKE_C_COMPILER=/usr/bin/clang -DCMAKE_CXX_COMPILER=/usr/bin/clang++ -G Ninja` with `cmake -S . -B build -DCMAKE_BUILD_TYPE=debug -DCMAKE_C_COMPILER=/usr/bin/gcc -DCMAKE_CXX_COMPILER=/usr/bin/g++ -G Ninja` + +#### Troubleshooting Steps + +##### Compiling Errors + +This section refers to running `cmake -S...` (truncated). + +* `vcpkg install failed` + * Run the following in the root directory and try running the command again (don't forget to change directories afterwards): + * `cd dependencies/vcpkg && git fetch --unshallow` +* `Please ensure you're using the latest port files with git pull and vcpkg update.` + * Either: + * Update vcpkg by running by the following command: + * `git submodule update --remote dependencies/vcpkg` + * If you are sure vcpkg is up to date, check the following logs: + * `Cemu/dependencies/vcpkg/buildtrees/wxwidgets/config-x64-linux-out.log` + * `Cemu/dependencies/vcpkg/buildtrees/libsystemd/config-x64-linux-dbg-meson-log.txt.log` + * `Cemu/dependencies/vcpkg/buildtrees/libsystemd/config-x64-linux-dbg-out.log` +* Not able to find Ninja. + * Add the following and try running the command again: + * `-DCMAKE_MAKE_PROGRAM=/usr/bin/ninja` +* Compiling failed during the boost-build dependency. + * It means you don't have a working/good standard library installation. Check the integrity of your system headers and making sure that C++ related packages are installed and intact. +* Compiling failed during rebuild after `git pull` with an error that mentions RPATH + * Add the following and try running the command again: + * `-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON` +* Environment variable `VCPKG_FORCE_SYSTEM_BINARIES` must be set. + * Execute the folowing and then try running the command again: + * `export VCPKG_FORCE_SYSTEM_BINARIES=1` +* If you are getting a random error, read the [package-name-and-platform]-out.log and [package-name-and-platform]-err.log for the actual reason to see if you might be lacking the headers from a dependency. + + +If you are getting a different error than any of the errors listed above, you may either open an issue in this repo or try using [GCC](#gcc). Make sure your standard library and compilers are updated since Cemu uses a lot of modern features! + + +##### Building Errors + +This section refers to running `cmake --build build`. + +* `main.cpp.o: in function 'std::__cxx11::basic_string...` + * You likely are experiencing a clang-14 issue. This can only be fixed by either lowering the clang version or using GCC, see [GCC](#gcc). +* `fatal error: 'span' file not found` + * You're either missing `libstdc++` or are using a version that's too old. Install at least v10 with your package manager, eg `sudo apt install libstdc++-10-dev`. See [#644](https://github.com/cemu-project/Cemu/issues/644). +* `undefined libdecor_xx` + * You are likely experiencing an issue with sdl2 package that comes with vcpkg. Delete sdl2 from vcpkg.json in source file and recompile. + +If you are getting a different error than any of the errors listed above, you may either open an issue in this repo or try using [GCC](#gcc). Make sure your standard library and compilers are updated since Cemu uses a lot of modern features! + +## macOS + +To compile Cemu, a recent enough compiler and STL with C++20 support is required! LLVM 13 and below +don't support the C++20 feature set required, so either install LLVM from Homebrew or make sure that +you have a recent enough version of Xcode. Xcode 15 is known to work. The OpenGL graphics API isn't +supported on macOS, so Vulkan must be used through the Molten-VK compatibility layer. + +### Installing brew + +1. `/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"` +2. Set up the Homebrew shell environment: + 1. **On an Intel Mac:** `eval "$(/usr/local/Homebrew/bin/brew shellenv)"` + 2. **On an Apple Silicon Mac:** eval `"$(/opt/homebrew/bin/brew shellenv)"` + +### Installing Tool Dependencies + +The native versions of these can be used regardless of what type of Mac you have. + +`brew install git cmake ninja nasm automake libtool` + +### Installing Library Dependencies + +**On Apple Silicon Macs, Rosetta 2 and the x86_64 version of Homebrew must be used to install these dependencies:** +1. `softwareupdate --install-rosetta` # Install Rosetta 2 if you don't have it. This only has to be done once +2. `arch -x86_64 zsh` # run an x64 shell +3. `/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"` +4. `eval "$(/usr/local/Homebrew/bin/brew shellenv)"` + +Then install the dependencies: + +`brew install boost molten-vk` + +### Build Cemu using CMake -### Build Cemu using cmake and clang 1. `git clone --recursive https://github.com/cemu-project/Cemu` 2. `cd Cemu` -3. `cmake -S . -B build -DCMAKE_BUILD_TYPE=release -DCMAKE_C_COMPILER=/usr/bin/clang -DCMAKE_CXX_COMPILER=/usr/bin/clang++ -G Ninja` +3. `cmake -S . -B build -DCMAKE_BUILD_TYPE=release -DCMAKE_OSX_ARCHITECTURES=x86_64 -G Ninja` 4. `cmake --build build` 5. You should now have a Cemu executable file in the /bin folder, which you can run using `./bin/Cemu_release`. -#### Using GCC -While we use and test Cemu using clang, using GCC might work better with your distro (they should be fairly similar performance/issues wise and should only be considered if compilation is the issue). -You can use it by replacing the step 3 with the following: -`cmake -S . -B build -DCMAKE_BUILD_TYPE=release -DCMAKE_C_COMPILER=/usr/bin/gcc -DCMAKE_CXX_COMPILER=/usr/bin/g++ -G Ninja` - #### Troubleshooting steps - - If step 3 gives you an error about not being able to find ninja, try appending `-DCMAKE_MAKE_PROGRAM=/usr/bin/ninja` to the command and running it again. - - If step 3 fails while compiling the boost-build dependency, it means you don't have a working/good standard library installation. Check the integrity of your system headers and making sure that C++ related packages are installed and intact. - - If step 3 gives a random error, read the `[package-name-and-platform]-out.log` and `[package-name-and-platform]-err.log` for the actual reason to see if you might be lacking the headers from a dependency. - - If step 3 is still failing or if you're not able to find the cause, please make an issue on our Github about it! - - If step 4 gives you an error that contains something like `main.cpp.o: in function 'std::__cxx11::basic_string...`, you likely are experiencing a clang-14 issue. This can only be fixed by either lowering the clang version or using GCC, see below. - - If step 4 gives you a different error, you could report it to this repo or try using GCC. Just make sure your standard library and compilers are updated since Cemu uses a lot of modern features! -- If step 4 gives you undefined libdecor_xx, you are likely experiencing an issue with sdl2 package that comes with vcpkg. Delete sdl2 from vcpkg.json in source file and recompile +- If step 3 gives you an error about not being able to find ninja, try appending `-DCMAKE_MAKE_PROGRAM=/usr/local/bin/ninja` to the command and running it again. + +## Updating Cemu and source code +1. To update your Cemu local repository, use the command `git pull --recurse-submodules` (run this command on the Cemu root). + - This should update your local copy of Cemu and all of its dependencies. +2. Then, you can rebuild Cemu using the steps listed above, according to whether you use Linux or Windows. + +If CMake complains about Cemu already being compiled or another similar error, try deleting the `CMakeCache.txt` file inside the `build` folder and retry building. + +## CMake configure flags +Some flags can be passed during CMake configure to customise which features are enabled on build. + +Example usage: `cmake -S . -B build -DCMAKE_BUILD_TYPE=release -DENABLE_SDL=ON -DENABLE_VULKAN=OFF` + +### All platforms +| Flag | | Description | Default | Note | +|--------------------|:--|-----------------------------------------------------------------------------|---------|--------------------| +| ALLOW_PORTABLE | | Allow Cemu to use the `portable` directory to store configs and data | ON | | +| CEMU_CXX_FLAGS | | Flags passed straight to the compiler, e.g. `-march=native`, `-Wall`, `/W3` | "" | | +| ENABLE_CUBEB | | Enable cubeb audio backend | ON | | +| ENABLE_DISCORD_RPC | | Enable Discord Rich presence support | ON | | +| ENABLE_OPENGL | | Enable OpenGL graphics backend | ON | Currently required | +| ENABLE_HIDAPI | | Enable HIDAPI (used for Wiimote controller API) | ON | | +| ENABLE_SDL | | Enable SDLController controller API | ON | Currently required | +| ENABLE_VCPKG | | Use VCPKG package manager to obtain dependencies | ON | | +| ENABLE_VULKAN | | Enable the Vulkan graphics backend | ON | | +| ENABLE_WXWIDGETS | | Enable wxWidgets UI | ON | Currently required | + +### Windows +| Flag | Description | Default | Note | +|--------------------|-----------------------------------|---------|--------------------| +| ENABLE_DIRECTAUDIO | Enable DirectAudio audio backend | ON | Currently required | +| ENABLE_DIRECTINPUT | Enable DirectInput controller API | ON | Currently required | +| ENABLE_XAUDIO | Enable XAudio audio backend | ON | | +| ENABLE_XINPUT | Enable XInput controller API | ON | | + +### Linux +| Flag | Description | Default | +|-----------------------|----------------------------------------------------|---------| +| ENABLE_BLUEZ | Build with Bluez (used for Wiimote controller API) | ON | +| ENABLE_FERAL_GAMEMODE | Enable Feral Interactive GameMode support | ON | +| ENABLE_WAYLAND | Enable Wayland support | ON | + +### macOS +| Flag | Description | Default | +|--------------|------------------------------------------------|---------| +| MACOS_BUNDLE | MacOS executable will be an application bundle | OFF | diff --git a/CMakeLists.txt b/CMakeLists.txt index e9f7458e..aa491b9e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,15 +1,48 @@ cmake_minimum_required(VERSION 3.21.1) -option(PUBLIC_RELEASE "Compile with debug asserts disabled and no console" OFF) option(ENABLE_VCPKG "Enable the vcpkg package manager" ON) -set(EXPERIMENTAL_VERSION "" CACHE STRING "") # used by CI script to set experimental version +option(MACOS_BUNDLE "The executable when built on macOS will be created as an application bundle" OFF) +option(ALLOW_PORTABLE "Allow Cemu to be run in portable mode" ON) -if (EXPERIMENTAL_VERSION) - add_definitions(-DEMULATOR_VERSION_MINOR=${EXPERIMENTAL_VERSION}) -endif() +# used by CI script to set version: +set(EMULATOR_VERSION_MAJOR "0" CACHE STRING "") +set(EMULATOR_VERSION_MINOR "0" CACHE STRING "") +set(EMULATOR_VERSION_PATCH "0" CACHE STRING "") + +execute_process( + COMMAND git log --format=%h -1 + WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + OUTPUT_VARIABLE GIT_HASH + OUTPUT_STRIP_TRAILING_WHITESPACE +) +add_definitions(-DEMULATOR_HASH=${GIT_HASH}) if (ENABLE_VCPKG) - set(VCPKG_OVERLAY_PORTS "${CMAKE_CURRENT_LIST_DIR}/dependencies/vcpkg_overlay_ports") + # check if vcpkg is shallow and unshallow it if necessary + execute_process( + COMMAND git rev-parse --is-shallow-repository + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/dependencies/vcpkg + OUTPUT_VARIABLE is_vcpkg_shallow + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + if(is_vcpkg_shallow STREQUAL "true") + message(STATUS "vcpkg is shallow. Unshallowing it now...") + execute_process( + COMMAND git fetch --unshallow + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/dependencies/vcpkg" + RESULT_VARIABLE result + OUTPUT_VARIABLE output + ) + endif() + + if(UNIX AND NOT APPLE) + set(VCPKG_OVERLAY_PORTS "${CMAKE_CURRENT_LIST_DIR}/dependencies/vcpkg_overlay_ports_linux") + elseif(APPLE) + set(VCPKG_OVERLAY_PORTS "${CMAKE_CURRENT_LIST_DIR}/dependencies/vcpkg_overlay_ports_mac") + else() + set(VCPKG_OVERLAY_PORTS "${CMAKE_CURRENT_LIST_DIR}/dependencies/vcpkg_overlay_ports") + endif() set(CMAKE_TOOLCHAIN_FILE "${CMAKE_CURRENT_SOURCE_DIR}/dependencies/vcpkg/scripts/buildsystems/vcpkg.cmake" CACHE STRING "Vcpkg toolchain file") # Set this so that all the various find_package() calls don't need an explicit @@ -20,7 +53,7 @@ if (ENABLE_VCPKG) endif() endif() -project(Cemu VERSION 2.0) +project(Cemu VERSION 2.0.0) list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") @@ -29,13 +62,18 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -if (PUBLIC_RELEASE) - add_compile_definitions(PUBLIC_RELEASE) - set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) # enable LTO -endif() +add_compile_definitions($<$:CEMU_DEBUG_ASSERT>) # if build type is debug, set CEMU_DEBUG_ASSERT + +add_definitions(-DEMULATOR_VERSION_MAJOR=${EMULATOR_VERSION_MAJOR}) +add_definitions(-DEMULATOR_VERSION_MINOR=${EMULATOR_VERSION_MINOR}) +add_definitions(-DEMULATOR_VERSION_PATCH=${EMULATOR_VERSION_PATCH}) set_property(GLOBAL PROPERTY USE_FOLDERS ON) +# enable link time optimization for release builds +set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELEASE ON) +set(CMAKE_INTERPROCEDURAL_OPTIMIZATION_RELWITHDEBINFO ON) + if (MSVC) set_property(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT CemuBin) # floating point model: precise, fiber safe optimizations @@ -48,22 +86,35 @@ if (MSVC) else() add_compile_options(/GT) endif() - if (PUBLIC_RELEASE) - message(STATUS "Using additional optimization flags for MSVC") - add_compile_options(/Oi /Ot) # enable intrinsic functions, favor speed - endif() + # enable additional optimization flags for release builds + add_compile_options($<$:/Oi>) # enable intrinsic functions + add_compile_options($<$:/Ot>) # favor speed +endif() + +if (APPLE) + enable_language(OBJC OBJCXX) + set(CMAKE_OSX_DEPLOYMENT_TARGET "12.0") +endif() + +if (UNIX AND NOT APPLE) + option(ENABLE_WAYLAND "Build with Wayland support" ON) + option(ENABLE_FERAL_GAMEMODE "Enables Feral Interactive GameMode Support" ON) + option(ENABLE_BLUEZ "Build with Bluez support" ON) endif() option(ENABLE_OPENGL "Enables the OpenGL backend" ON) option(ENABLE_VULKAN "Enables the Vulkan backend" ON) option(ENABLE_DISCORD_RPC "Enables the Discord Rich Presence feature" ON) + # input backends if (WIN32) option(ENABLE_XINPUT "Enables the usage of XInput" ON) option(ENABLE_DIRECTINPUT "Enables the usage of DirectInput" ON) add_compile_definitions(HAS_DIRECTINPUT) endif() + +option(ENABLE_HIDAPI "Build with HIDAPI" ON) option(ENABLE_SDL "Enables the SDLController backend" ON) # audio backends @@ -80,7 +131,6 @@ find_package(Threads REQUIRED) find_package(SDL2 REQUIRED) find_package(CURL REQUIRED) find_package(pugixml REQUIRED) -find_package(imgui REQUIRED) find_package(RapidJSON REQUIRED) find_package(Boost COMPONENTS program_options filesystem nowide REQUIRED) find_package(libzip REQUIRED) @@ -89,7 +139,7 @@ find_package(ZLIB REQUIRED) find_package(zstd MODULE REQUIRED) # MODULE so that zstd::zstd is available find_package(OpenSSL COMPONENTS Crypto SSL REQUIRED) find_package(glm REQUIRED) -find_package(fmt 9.1.0 REQUIRED) +find_package(fmt 9 REQUIRED) find_package(PNG REQUIRED) # glslang versions older than 11.11.0 define targets without a namespace @@ -99,6 +149,27 @@ endif() if (UNIX AND NOT APPLE) find_package(X11 REQUIRED) + if (ENABLE_WAYLAND) + find_package(Wayland REQUIRED Client) + find_package(WaylandScanner REQUIRED) + find_package(WaylandProtocols 1.15 REQUIRED) + + ecm_add_wayland_client_protocol(WAYLAND_PROTOCOL_SRCS + PROTOCOL "${WaylandProtocols_DATADIR}/stable/viewporter/viewporter.xml" + BASENAME viewporter) + add_library(CemuWaylandProtocols STATIC ${WAYLAND_PROTOCOL_SRCS}) + target_include_directories(CemuWaylandProtocols PUBLIC "${CMAKE_CURRENT_BINARY_DIR}") + + add_compile_definitions(HAS_WAYLAND) + endif() + find_package(GTK3 REQUIRED) + + if(ENABLE_BLUEZ) + find_package(bluez REQUIRED) + set(SUPPORTS_WIIMOTE ON) + add_compile_definitions(HAS_BLUEZ) + endif() + endif() if (ENABLE_VULKAN) @@ -115,18 +186,34 @@ if (ENABLE_DISCORD_RPC) target_include_directories(discord-rpc INTERFACE ./dependencies/discord-rpc/include) endif() +if (ENABLE_HIDAPI) + find_package(hidapi REQUIRED) + set(SUPPORTS_WIIMOTE ON) + add_compile_definitions(HAS_HIDAPI) +endif () + +if(UNIX AND NOT APPLE) + if(ENABLE_FERAL_GAMEMODE) + add_compile_definitions(ENABLE_FERAL_GAMEMODE) + add_subdirectory(dependencies/gamemode EXCLUDE_FROM_ALL) + target_include_directories(gamemode INTERFACE ./dependencies/gamemode/lib) + endif() +endif() + if (ENABLE_WXWIDGETS) find_package(wxWidgets 3.2 REQUIRED COMPONENTS base core gl propgrid xrc) endif() if (ENABLE_CUBEB) + if (NOT ENABLE_VCPKG) find_package(cubeb) + endif() if (NOT cubeb_FOUND) option(BUILD_TESTS "" OFF) option(BUILD_TOOLS "" OFF) option(BUNDLE_SPEEX "" OFF) set(USE_WINMM OFF CACHE BOOL "") - add_subdirectory("dependencies/cubeb" EXCLUDE_FROM_ALL) + add_subdirectory("dependencies/cubeb" EXCLUDE_FROM_ALL SYSTEM) set_property(TARGET cubeb PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") add_library(cubeb::cubeb ALIAS cubeb) endif() @@ -135,9 +222,18 @@ endif() add_subdirectory("dependencies/ih264d" EXCLUDE_FROM_ALL) +if (CMAKE_OSX_ARCHITECTURES) + set(CEMU_ARCHITECTURE ${CMAKE_OSX_ARCHITECTURES}) +else() + set(CEMU_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR}) +endif() +if(CEMU_ARCHITECTURE MATCHES "(aarch64)|(AARCH64)|(arm64)|(ARM64)") + add_subdirectory("dependencies/xbyak_aarch64" EXCLUDE_FROM_ALL) +endif() + find_package(ZArchive) if (NOT ZArchive_FOUND) add_subdirectory("dependencies/ZArchive" EXCLUDE_FROM_ALL) endif() -add_subdirectory(src) +add_subdirectory(src) \ No newline at end of file diff --git a/CMakeSettings.json b/CMakeSettings.json index f7f9fe6e..0927e98b 100644 --- a/CMakeSettings.json +++ b/CMakeSettings.json @@ -1,7 +1,7 @@ { "configurations": [ { - "name": "Release", + "name": "RelWithDebInfo", "configurationType": "RelWithDebInfo", "generator": "Ninja", "inheritEnvironments": [ "msvc_x64_x64" ], @@ -9,13 +9,12 @@ "installRoot": "${projectDir}\\out\\install\\${name}" }, { - "name": "Public Release", - "configurationType": "RelWithDebInfo", + "name": "Release", + "configurationType": "Release", "generator": "Ninja", "inheritEnvironments": [ "msvc_x64_x64" ], "buildRoot": "${projectDir}\\out\\build\\${name}", - "installRoot": "${projectDir}\\out\\install\\${name}", - "cmakeCommandArgs": "-DPUBLIC_RELEASE=ON" + "installRoot": "${projectDir}\\out\\install\\${name}" }, { "name": "Debug", diff --git a/CODING_STYLE.md b/CODING_STYLE.md new file mode 100644 index 00000000..39e1d342 --- /dev/null +++ b/CODING_STYLE.md @@ -0,0 +1,99 @@ + +# Coding style guidelines for Cemu + +This document describes the latest version of our coding-style guidelines. Since we did not use this style from the beginning, older code may not adhere to these guidelines. Nevertheless, use these rules even if the surrounding code does not match. + +Cemu comes with a `.clang-format` file which is supported by most IDEs for formatting. Avoid auto-reformatting whole files, PRs with a lot of formatting changes are difficult to review. + +## Names for variables, functions and classes + +- Always prefix class member variables with `m_` +- Always prefix static class variables with `s_` +- For variable names: Camel case, starting with a lower case letter after the prefix. Examples: `m_option`, `s_audioVolume` +- For functions/class names: Use camel case starting with a capital letter. Examples: `MyClass`, `SetActive` +- Avoid underscores in variable names after the prefix. Use `m_myVariable` instead of `m_my_variable` + +## About types + +Cemu provides its own set of basic fixed-width types. They are: +`uint8`, `sint8`, `uint16`, `sint16`, `uint32`, `sint32`, `uint64`, `sint64`. Always use these types over something like `uint32_t`. Using `size_t` is also acceptable where suitable. Avoid C types like `int` or `long`. The only exception is when interacting with external libraries which expect these types as parameters. + +## When and where to put brackets + +Always put curly-brackets (`{ }`) on their own line. Example: + +``` +void FooBar() +{ + if (m_hasFoo) + { + ... + } +} +``` +As an exception, you can put short lambdas onto the same line: +``` +SomeFunc([]() { .... }); +``` +You can skip brackets for single-statement `if`. Example: +``` +if (cond) + action(); +``` + +## Printing + +Avoid sprintf and similar C-style formatting API. Use `fmt::format()`. +In UI related code you can use `formatWxString`, but be aware that number formatting with this function will be locale dependent! + +## Strings and encoding + +We use UTF-8 encoded `std::string` where possible. Some conversions need special handling and we have helper functions for those: +```cpp +// std::filesystem::path <-> std::string (in precompiled.h) +std::string _pathToUtf8(const fs::path& path); +fs::path _utf8ToPath(std::string_view input); + +// wxString <-> std::string +wxString wxString::FromUTF8(const std::string& s) +wxString to_wxString(std::string_view str); // in gui/helpers.h +std::string wxString::utf8_string(); + +``` + +## Logging + +If you want to write to log.txt use `cemuLog_log()`. The log type parameter should be mostly self-explanatory. Use `LogType::Force` if you always want to log something. For example: +`cemuLog_log(LogType::Force, "The value is {}", 123);` + +## HLE and endianness + +A pretty large part of Cemu's code base are re-implementations of various Cafe OS modules (e.g. `coreinit.rpl`, `gx2.rpl`...). These generally run in the context of the emulated process, thus special care has to be taken to use types with the correct size and endianness when interacting with memory. + +Keep in mind that the emulated Espresso CPU is 32bit big-endian, while the host architectures targeted by Cemu are 64bit little-endian! + +To keep code simple and remove the need for manual endian-swapping, Cemu has templates and aliases of the basic types with explicit endian-ness. +For big-endian types add the suffix `be`. Example: `uint32be` + +When you need to store a pointer in the guest's memory. Use `MEMPTR`. It will automatically store any pointer as 32bit big-endian. The pointer you store must point to memory that is within the guest address space. + +## HLE interfaces + +The implementation for each HLE module is inside a namespace with a matching name. E.g. `coreinit.rpl` functions go into `coreinit` namespace. + +To expose a new function as callable from within the emulated machine, use `cafeExportRegister` or `cafeExportRegisterFunc`. Here is a short example: +```cpp +namespace coreinit +{ + uint32 OSGetCoreCount() + { + return Espresso::CORE_COUNT; + } + + void Init() + { + cafeExportRegister("coreinit", OSGetCoreCount, LogType::CoreinitThread); + } +} +``` +You may also see some code which uses `osLib_addFunction` directly. This is a deprecated way of registering functions. \ No newline at end of file diff --git a/README.md b/README.md index 14682b41..dfd35791 100644 --- a/README.md +++ b/README.md @@ -2,38 +2,39 @@ [![Build Process](https://github.com/cemu-project/Cemu/actions/workflows/build.yml/badge.svg)](https://github.com/cemu-project/Cemu/actions/workflows/build.yml) [![Discord](https://img.shields.io/discord/286429969104764928?label=Cemu&logo=discord&logoColor=FFFFFF)](https://discord.gg/5psYsup) -[![Matrix Server](https://img.shields.io/matrix/dev:cemu.info?server_fqdn=matrix.cemu.info&label=dev:cemu.info&logo=matrix&logoColor=FFFFFF)](https://matrix.to/#/#dev:cemu.info) +[![Matrix Server](https://img.shields.io/matrix/cemu:cemu.info?server_fqdn=matrix.cemu.info&label=cemu:cemu.info&logo=matrix&logoColor=FFFFFF)](https://matrix.to/#/#cemu:cemu.info) This is the code repository of Cemu, a Wii U emulator that is able to run most Wii U games and homebrew in a playable state. -It's written in C/C++ and is being actively developed with new features and fixes to increase compatibility, convenience and usability. +It's written in C/C++ and is being actively developed with new features and fixes. -Cemu is currently only available for 64-bit Windows and Linux devices. +Cemu is currently only available for 64-bit Windows, Linux & macOS devices. ### Links: - - [Original 2.0 announcement post](https://www.reddit.com/r/cemu/comments/wwa22c/cemu_20_announcement_linux_builds_opensource_and/) + - [Open Source Announcement](https://www.reddit.com/r/cemu/comments/wwa22c/cemu_20_announcement_linux_builds_opensource_and/) - [Official Website](https://cemu.info) - [Compatibility List/Wiki](https://wiki.cemu.info/wiki/Main_Page) - [Official Subreddit](https://reddit.com/r/Cemu) - [Official Discord](https://discord.gg/5psYsup) - - [Unofficial Setup Guide](https://cemu.cfw.guide) + - [Official Matrix Server](https://matrix.to/#/#cemu:cemu.info) + - [Setup Guide](https://cemu.cfw.guide) #### Other relevant repositories: - [Cemu-Language](https://github.com/cemu-project/Cemu-Language) - - [Cemu's Community Graphic Packs](https://github.com/ActualMandM/cemu_graphic_packs) + - [Cemu's Community Graphic Packs](https://github.com/cemu-project/cemu_graphic_packs) ## Download -You can download the latest Cemu releases from the [GitHub Releases](https://github.com/cemu-project/Cemu/releases/) or from [Cemu's website](https://cemu.info). +You can download the latest Cemu releases for Windows, Linux and Mac from the [GitHub Releases](https://github.com/cemu-project/Cemu/releases/). For Linux you can also find Cemu on [flathub](https://flathub.org/apps/info.cemu.Cemu). -Cemu is currently only available in a portable format so no installation is required besides extracting it in a safe place. +On Windows Cemu is currently only available in a portable format so no installation is required besides extracting it in a safe place. -See [Current State Of Linux builds](https://github.com/cemu-project/Cemu/issues/1) for information on using Cemu natively on Linux. +The native macOS build is currently purely experimental and should not be considered stable or ready for issue-free gameplay. There are also known issues with degraded performance due to the use of MoltenVK and Rosetta for ARM Macs. We appreciate your patience while we improve Cemu for macOS. Pre-2.0 releases can be found on Cemu's [changelog page](https://cemu.info/changelog.html). ## Build Instructions -To compile Cemu yourself on Windows or Linux, view the [BUILD.md file](/BUILD.md). +To compile Cemu yourself on Windows, Linux or macOS, view [BUILD.md](/BUILD.md). ## Issues @@ -42,11 +43,12 @@ The old bug tracker can be found at [bugs.cemu.info](https://bugs.cemu.info) and ## Contributing -Pull requests are very welcome. For easier coordination you can visit the developer discussion channel on Discord: [https://discord.gg/5psYsup](https://discord.gg/5psYsup). +Pull requests are very welcome. For easier coordination you can visit the developer discussion channel on [Discord](https://discord.gg/5psYsup) or alternatively the [Matrix Server](https://matrix.to/#/#cemu:cemu.info). +Before submitting a pull request, please read and follow our code style guidelines listed in [CODING_STYLE.md](/CODING_STYLE.md). If coding isn't your thing, testing games and making detailed bug reports or updating the (usually outdated) compatibility wiki is also appreciated! -Questions about Cemu's software architecture can also be answered on Discord. Alternative communication channels (like IRC) are being considered. +Questions about Cemu's software architecture can also be answered on Discord (or through the Matrix bridge). ## License Cemu is licensed under [Mozilla Public License 2.0](/LICENSE.txt). Exempt from this are all files in the dependencies directory for which the licenses of the original code apply as well as some individual files in the src folder, as specified in those file headers respectively. diff --git a/bin/gameProfiles/default/0005000010101a00.ini b/bin/gameProfiles/default/0005000010101a00.ini index dc63ccb5..1123343a 100644 --- a/bin/gameProfiles/default/0005000010101a00.ini +++ b/bin/gameProfiles/default/0005000010101a00.ini @@ -1,7 +1 @@ -# LEGO City Undercover (USA) - -[General] -loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# LEGO City Undercover (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010101b00.ini b/bin/gameProfiles/default/0005000010101b00.ini index 21aded1d..f3749d47 100644 --- a/bin/gameProfiles/default/0005000010101b00.ini +++ b/bin/gameProfiles/default/0005000010101b00.ini @@ -1,7 +1 @@ -# LEGO City Undercover (EUR) - -[General] -loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# LEGO City Undercover (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010104d00.ini b/bin/gameProfiles/default/0005000010104d00.ini index 50a9fa48..dc8bebaf 100644 --- a/bin/gameProfiles/default/0005000010104d00.ini +++ b/bin/gameProfiles/default/0005000010104d00.ini @@ -1,5 +1,4 @@ # Monster Hunter 3(tri-)GHD Ver. (JPN) [Graphics] -GPUBufferCacheAccuracy = 1 streamoutBufferCacheSize = 48 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010106100.ini b/bin/gameProfiles/default/0005000010106100.ini index 77e7c399..9aa38072 100644 --- a/bin/gameProfiles/default/0005000010106100.ini +++ b/bin/gameProfiles/default/0005000010106100.ini @@ -1,7 +1,4 @@ # Super Mario 3D World (JPN) -[CPU] - [Graphics] accurateShaderMul = false -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001010EB00.ini b/bin/gameProfiles/default/000500001010EB00.ini index e4b30b16..782398b5 100644 --- a/bin/gameProfiles/default/000500001010EB00.ini +++ b/bin/gameProfiles/default/000500001010EB00.ini @@ -1,7 +1 @@ -# Mario Kart 8 (JPN) - -[CPU] -cpuMode = Singlecore-Recompiler - -[Graphics] -GPUBufferCacheAccuracy = 2 \ No newline at end of file +# Mario Kart 8 (JPN) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001010F900.ini b/bin/gameProfiles/default/000500001010F900.ini index 4772db75..e22770bf 100644 --- a/bin/gameProfiles/default/000500001010F900.ini +++ b/bin/gameProfiles/default/000500001010F900.ini @@ -1,7 +1 @@ -# Scribblenauts Unlimited (EUR) - -[General] -loadSharedLibraries = true - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Scribblenauts Unlimited (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001010ac00.ini b/bin/gameProfiles/default/000500001010ac00.ini index 7df76e2f..9852538c 100644 --- a/bin/gameProfiles/default/000500001010ac00.ini +++ b/bin/gameProfiles/default/000500001010ac00.ini @@ -1,7 +1 @@ -# Ben 10 Omniverse (USA) - -[General] -loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Ben 10 Omniverse (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001010b100.ini b/bin/gameProfiles/default/000500001010b100.ini index e2215587..0741b39f 100644 --- a/bin/gameProfiles/default/000500001010b100.ini +++ b/bin/gameProfiles/default/000500001010b100.ini @@ -1,4 +1 @@ -# Rayman Legends (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Rayman Legends (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001010b200.ini b/bin/gameProfiles/default/000500001010b200.ini index d47b4060..22d51c39 100644 --- a/bin/gameProfiles/default/000500001010b200.ini +++ b/bin/gameProfiles/default/000500001010b200.ini @@ -1,7 +1 @@ -# Scribblenauts Unlimited (US) - -[General] -loadSharedLibraries = true - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Scribblenauts Unlimited (US) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001010dc00.ini b/bin/gameProfiles/default/000500001010dc00.ini index 11d0fd68..ffed75ea 100644 --- a/bin/gameProfiles/default/000500001010dc00.ini +++ b/bin/gameProfiles/default/000500001010dc00.ini @@ -1,4 +1 @@ -# Mass Effect 3 Special Edition (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Mass Effect 3 Special Edition (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001010dd00.ini b/bin/gameProfiles/default/000500001010dd00.ini index 5a0620da..2e74d7e0 100644 --- a/bin/gameProfiles/default/000500001010dd00.ini +++ b/bin/gameProfiles/default/000500001010dd00.ini @@ -1,4 +1 @@ -# ZombiU (US) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# ZombiU (US) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001010e600.ini b/bin/gameProfiles/default/000500001010e600.ini index dc351608..01dc0e7a 100644 --- a/bin/gameProfiles/default/000500001010e600.ini +++ b/bin/gameProfiles/default/000500001010e600.ini @@ -1,4 +1 @@ -# 007 Legends (US) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# 007 Legends (US) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001010e700.ini b/bin/gameProfiles/default/000500001010e700.ini index ea118074..a926c9d4 100644 --- a/bin/gameProfiles/default/000500001010e700.ini +++ b/bin/gameProfiles/default/000500001010e700.ini @@ -1,7 +1 @@ -# Cabela's Dangerous Hunts 2013 (USA) - -[General] -loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Cabela's Dangerous Hunts 2013 (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001010ec00.ini b/bin/gameProfiles/default/000500001010ec00.ini index babaf249..5d2dc9d8 100644 --- a/bin/gameProfiles/default/000500001010ec00.ini +++ b/bin/gameProfiles/default/000500001010ec00.ini @@ -1,7 +1 @@ -# Mario Kart 8 (USA) - -[CPU] -cpuMode = Singlecore-Recompiler - -[Graphics] -GPUBufferCacheAccuracy = 2 \ No newline at end of file +# Mario Kart 8 (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001010ed00.ini b/bin/gameProfiles/default/000500001010ed00.ini index 7935ea37..dc1f4d52 100644 --- a/bin/gameProfiles/default/000500001010ed00.ini +++ b/bin/gameProfiles/default/000500001010ed00.ini @@ -1,7 +1 @@ -# Mario Kart 8 (EUR) - -[CPU] -cpuMode = Singlecore-Recompiler - -[Graphics] -GPUBufferCacheAccuracy = 2 \ No newline at end of file +# Mario Kart 8 (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001010ef00.ini b/bin/gameProfiles/default/000500001010ef00.ini index 005ba84b..dbadd2b1 100644 --- a/bin/gameProfiles/default/000500001010ef00.ini +++ b/bin/gameProfiles/default/000500001010ef00.ini @@ -1,4 +1 @@ -# ZombiU (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# ZombiU (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001010f100.ini b/bin/gameProfiles/default/000500001010f100.ini index c557498c..025e3e70 100644 --- a/bin/gameProfiles/default/000500001010f100.ini +++ b/bin/gameProfiles/default/000500001010f100.ini @@ -1,7 +1 @@ -# Rise of the Guardians: The Video Game (EUR) - -[General] -loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Rise of the Guardians: The Video Game (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001010f200.ini b/bin/gameProfiles/default/000500001010f200.ini index 055fda32..507b374b 100644 --- a/bin/gameProfiles/default/000500001010f200.ini +++ b/bin/gameProfiles/default/000500001010f200.ini @@ -1,7 +1 @@ -# Rise of the Guardians: The Video Game (USA) - -[General] -loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Rise of the Guardians: The Video Game (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001010f500.ini b/bin/gameProfiles/default/000500001010f500.ini index e1a86417..e598f517 100644 --- a/bin/gameProfiles/default/000500001010f500.ini +++ b/bin/gameProfiles/default/000500001010f500.ini @@ -1,4 +1 @@ -# Mass Effect 3 Special Edition (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Mass Effect 3 Special Edition (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001011000.ini b/bin/gameProfiles/default/000500001011000.ini deleted file mode 100644 index 85ebf4c6..00000000 --- a/bin/gameProfiles/default/000500001011000.ini +++ /dev/null @@ -1,7 +0,0 @@ -# Ben 10 Omniverse (EUR) - -[General] -loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010110100.ini b/bin/gameProfiles/default/0005000010110100.ini index 69b988cf..d46267b0 100644 --- a/bin/gameProfiles/default/0005000010110100.ini +++ b/bin/gameProfiles/default/0005000010110100.ini @@ -1,4 +1 @@ -# Nano Assault Neo (USA) - -[Graphics] -GPUBufferCacheAccuracy = 2 \ No newline at end of file +# Nano Assault Neo (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010110600.ini b/bin/gameProfiles/default/0005000010110600.ini index fb19b607..bc0a0552 100644 --- a/bin/gameProfiles/default/0005000010110600.ini +++ b/bin/gameProfiles/default/0005000010110600.ini @@ -1,4 +1 @@ -# Nano Assault Neo (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 2 \ No newline at end of file +# Nano Assault Neo (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010110700.ini b/bin/gameProfiles/default/0005000010110700.ini index e2d361c4..4e88d406 100644 --- a/bin/gameProfiles/default/0005000010110700.ini +++ b/bin/gameProfiles/default/0005000010110700.ini @@ -1,4 +1 @@ -# 007 Legends (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# 007 Legends (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010110E00.ini b/bin/gameProfiles/default/0005000010110E00.ini index 3064350f..6c56910f 100644 --- a/bin/gameProfiles/default/0005000010110E00.ini +++ b/bin/gameProfiles/default/0005000010110E00.ini @@ -5,4 +5,3 @@ cpuMode = Singlecore-Recompiler [Graphics] streamoutBufferCacheSize = 48 -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010111000.ini b/bin/gameProfiles/default/0005000010111000.ini new file mode 100644 index 00000000..8c34cbab --- /dev/null +++ b/bin/gameProfiles/default/0005000010111000.ini @@ -0,0 +1 @@ +# Ben 10 Omniverse (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010111400.ini b/bin/gameProfiles/default/0005000010111400.ini index 7fee0ed1..baa50914 100644 --- a/bin/gameProfiles/default/0005000010111400.ini +++ b/bin/gameProfiles/default/0005000010111400.ini @@ -1,4 +1 @@ -# Rayman Legends (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Rayman Legends (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010111600.ini b/bin/gameProfiles/default/0005000010111600.ini index 6b433160..f1c13f22 100644 --- a/bin/gameProfiles/default/0005000010111600.ini +++ b/bin/gameProfiles/default/0005000010111600.ini @@ -1,7 +1 @@ -# Fast And Furious Showdown (USA) - -[General] -loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Fast And Furious Showdown (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010112000.ini b/bin/gameProfiles/default/0005000010112000.ini index 853fc36b..3c7710d9 100644 --- a/bin/gameProfiles/default/0005000010112000.ini +++ b/bin/gameProfiles/default/0005000010112000.ini @@ -1,4 +1 @@ -# The Croods: Prehistoric Party! (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# The Croods: Prehistoric Party! (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010112300.ini b/bin/gameProfiles/default/0005000010112300.ini index 5fe50f24..37d8050f 100644 --- a/bin/gameProfiles/default/0005000010112300.ini +++ b/bin/gameProfiles/default/0005000010112300.ini @@ -1,4 +1 @@ -# ZombiU (JPN) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# ZombiU (JPN) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010113000.ini b/bin/gameProfiles/default/0005000010113000.ini index 30330b89..6c023330 100644 --- a/bin/gameProfiles/default/0005000010113000.ini +++ b/bin/gameProfiles/default/0005000010113000.ini @@ -1,4 +1 @@ -# Mass Effect 3 Special Edition (JPN) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Mass Effect 3 Special Edition (JPN) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010113300.ini b/bin/gameProfiles/default/0005000010113300.ini index af55851d..5118d05f 100644 --- a/bin/gameProfiles/default/0005000010113300.ini +++ b/bin/gameProfiles/default/0005000010113300.ini @@ -1,4 +1 @@ -# The Smurfs 2 (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# The Smurfs 2 (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010113d00.ini b/bin/gameProfiles/default/0005000010113d00.ini index 51369705..afb42bf8 100644 --- a/bin/gameProfiles/default/0005000010113d00.ini +++ b/bin/gameProfiles/default/0005000010113d00.ini @@ -1,4 +1 @@ -# Rapala Pro Bass Fishing (US) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Rapala Pro Bass Fishing (US) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010115d00.ini b/bin/gameProfiles/default/0005000010115d00.ini index 5a00554b..84bb9fad 100644 --- a/bin/gameProfiles/default/0005000010115d00.ini +++ b/bin/gameProfiles/default/0005000010115d00.ini @@ -1,4 +1 @@ -# The Smurfs 2 (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# The Smurfs 2 (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010116100.ini b/bin/gameProfiles/default/0005000010116100.ini index 8f01e5fa..465d3dc3 100644 --- a/bin/gameProfiles/default/0005000010116100.ini +++ b/bin/gameProfiles/default/0005000010116100.ini @@ -5,4 +5,3 @@ cpuMode = Singlecore-Recompiler [Graphics] accurateShaderMul = false -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010117200.ini b/bin/gameProfiles/default/0005000010117200.ini index e802c38e..4000f79f 100644 --- a/bin/gameProfiles/default/0005000010117200.ini +++ b/bin/gameProfiles/default/0005000010117200.ini @@ -1,5 +1,4 @@ # Monster Hunter 3 Ultimate (EUR) [Graphics] -GPUBufferCacheAccuracy = 1 streamoutBufferCacheSize = 48 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010118300.ini b/bin/gameProfiles/default/0005000010118300.ini index 4ba8d937..e69a6b95 100644 --- a/bin/gameProfiles/default/0005000010118300.ini +++ b/bin/gameProfiles/default/0005000010118300.ini @@ -1,5 +1,4 @@ # Monster Hunter 3 Ultimate (USA) [Graphics] -GPUBufferCacheAccuracy = 1 streamoutBufferCacheSize = 48 \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001011a600.ini b/bin/gameProfiles/default/000500001011a600.ini index 7c673d05..454aaf08 100644 --- a/bin/gameProfiles/default/000500001011a600.ini +++ b/bin/gameProfiles/default/000500001011a600.ini @@ -1,7 +1 @@ -# Cabela's Dangerous Hunts 2013 (EUR) - -[General] -loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Cabela's Dangerous Hunts 2013 (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001011af00.ini b/bin/gameProfiles/default/000500001011af00.ini index c7bc4826..73c8f567 100644 --- a/bin/gameProfiles/default/000500001011af00.ini +++ b/bin/gameProfiles/default/000500001011af00.ini @@ -1,4 +1 @@ -# BIT.TRIP Presents... Runner2: Future Legend of Rhythm Alien (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# BIT.TRIP Presents... Runner2: Future Legend of Rhythm Alien (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001011b200.ini b/bin/gameProfiles/default/000500001011b200.ini index 64edf0af..4a576434 100644 --- a/bin/gameProfiles/default/000500001011b200.ini +++ b/bin/gameProfiles/default/000500001011b200.ini @@ -1,7 +1,4 @@ # Little Inferno (US) -[CPU] -extendedTextureReadback = true - [Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +extendedTextureReadback = true \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010128600.ini b/bin/gameProfiles/default/0005000010128600.ini index 467f0014..cd6c1b18 100644 --- a/bin/gameProfiles/default/0005000010128600.ini +++ b/bin/gameProfiles/default/0005000010128600.ini @@ -1,5 +1,4 @@ # Little Inferno (EUR) -[CPU] +[Graphics] extendedTextureReadback = true -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010129000.ini b/bin/gameProfiles/default/0005000010129000.ini index e1a9d9c5..93403c3c 100644 --- a/bin/gameProfiles/default/0005000010129000.ini +++ b/bin/gameProfiles/default/0005000010129000.ini @@ -4,5 +4,4 @@ cpuMode = Singlecore-Recompiler [Graphics] -GPUBufferCacheAccuracy = 0 streamoutBufferCacheSize = 48 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010129200.ini b/bin/gameProfiles/default/0005000010129200.ini index 33c2f21a..8705ce00 100644 --- a/bin/gameProfiles/default/0005000010129200.ini +++ b/bin/gameProfiles/default/0005000010129200.ini @@ -4,5 +4,4 @@ cpuMode = Singlecore-Recompiler [Graphics] -GPUBufferCacheAccuracy = 0 streamoutBufferCacheSize = 48 \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001012BC00.ini b/bin/gameProfiles/default/000500001012BC00.ini index c7adb846..0b4f6b6d 100644 --- a/bin/gameProfiles/default/000500001012BC00.ini +++ b/bin/gameProfiles/default/000500001012BC00.ini @@ -1,8 +1,4 @@ # Pikmin 3 (JPN) - - [Graphics] - extendedTextureReadback = true -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001012BD00.ini b/bin/gameProfiles/default/000500001012BD00.ini index 8ab10175..cd27ff77 100644 --- a/bin/gameProfiles/default/000500001012BD00.ini +++ b/bin/gameProfiles/default/000500001012BD00.ini @@ -1,8 +1,4 @@ # Pikmin 3 (USA) - - [Graphics] - extendedTextureReadback = true -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001012F000.ini b/bin/gameProfiles/default/000500001012F000.ini index af1229e6..6b1a3f95 100644 --- a/bin/gameProfiles/default/000500001012F000.ini +++ b/bin/gameProfiles/default/000500001012F000.ini @@ -2,6 +2,3 @@ [CPU] cpuMode = Singlecore-Recompiler - -[Graphics] -GPUBufferCacheAccuracy = 1 \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001012b200.ini b/bin/gameProfiles/default/000500001012b200.ini index b00c1a04..9f3733ff 100644 --- a/bin/gameProfiles/default/000500001012b200.ini +++ b/bin/gameProfiles/default/000500001012b200.ini @@ -4,6 +4,5 @@ loadSharedLibraries = false [Graphics] -GPUBufferCacheAccuracy = 0 extendedTextureReadback = true streamoutBufferCacheSize = 48 \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001012ba00.ini b/bin/gameProfiles/default/000500001012ba00.ini index 7e94a7fd..efab660e 100644 --- a/bin/gameProfiles/default/000500001012ba00.ini +++ b/bin/gameProfiles/default/000500001012ba00.ini @@ -4,6 +4,5 @@ loadSharedLibraries = false [Graphics] -GPUBufferCacheAccuracy = 0 extendedTextureReadback = true streamoutBufferCacheSize = 48 \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001012be00.ini b/bin/gameProfiles/default/000500001012be00.ini index 2987dd1e..64a38b0d 100644 --- a/bin/gameProfiles/default/000500001012be00.ini +++ b/bin/gameProfiles/default/000500001012be00.ini @@ -1,7 +1,4 @@ # Pikmin 3 (EU) - - [Graphics] - extendedTextureReadback = true \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001012c500.ini b/bin/gameProfiles/default/000500001012c500.ini index 9badd49e..e4cc6690 100644 --- a/bin/gameProfiles/default/000500001012c500.ini +++ b/bin/gameProfiles/default/000500001012c500.ini @@ -1,4 +1 @@ -# The Croods: Prehistoric Party! (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# The Croods: Prehistoric Party! (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001012da00.ini b/bin/gameProfiles/default/000500001012da00.ini index 4f478ace..8812e297 100644 --- a/bin/gameProfiles/default/000500001012da00.ini +++ b/bin/gameProfiles/default/000500001012da00.ini @@ -1,7 +1 @@ -# Fast And Furious Showdown (EUR) - -[General] -loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Fast And Furious Showdown (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010131F00.ini b/bin/gameProfiles/default/0005000010131F00.ini index 9b2dde57..b484aa82 100644 --- a/bin/gameProfiles/default/0005000010131F00.ini +++ b/bin/gameProfiles/default/0005000010131F00.ini @@ -1,4 +1 @@ -# Yoshi's Woolly World (JPN) - -[Graphics] -GPUBufferCacheAccuracy = 2 \ No newline at end of file +# Yoshi's Woolly World (JPN) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010132c00.ini b/bin/gameProfiles/default/0005000010132c00.ini index 00e83b4c..ff554b6e 100644 --- a/bin/gameProfiles/default/0005000010132c00.ini +++ b/bin/gameProfiles/default/0005000010132c00.ini @@ -1,4 +1 @@ -# Scribblenauts Unmasked: A DC Comics Adventure (US) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Scribblenauts Unmasked: A DC Comics Adventure (US) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010132d00.ini b/bin/gameProfiles/default/0005000010132d00.ini index d516cd8f..3655b674 100644 --- a/bin/gameProfiles/default/0005000010132d00.ini +++ b/bin/gameProfiles/default/0005000010132d00.ini @@ -1,4 +1 @@ -# Scribblenauts Unmasked: A DC Comics Adventure (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Scribblenauts Unmasked: A DC Comics Adventure (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010133b00.ini b/bin/gameProfiles/default/0005000010133b00.ini index 465ef364..ae2c8a48 100644 --- a/bin/gameProfiles/default/0005000010133b00.ini +++ b/bin/gameProfiles/default/0005000010133b00.ini @@ -1,4 +1 @@ -# Sniper Elite V2 (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 1 \ No newline at end of file +# Sniper Elite V2 (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010134e00.ini b/bin/gameProfiles/default/0005000010134e00.ini index e04cdd74..99ff8b41 100644 --- a/bin/gameProfiles/default/0005000010134e00.ini +++ b/bin/gameProfiles/default/0005000010134e00.ini @@ -1,4 +1 @@ -# Sniper Elite V2 (USA) - -[Graphics] -GPUBufferCacheAccuracy = 1 \ No newline at end of file +# Sniper Elite V2 (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010135500.ini b/bin/gameProfiles/default/0005000010135500.ini index cc008930..71f1c152 100644 --- a/bin/gameProfiles/default/0005000010135500.ini +++ b/bin/gameProfiles/default/0005000010135500.ini @@ -1,7 +1 @@ -# LEGO Batman 2: DC Super Heroes (EUR) - -[General] -loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# LEGO Batman 2: DC Super Heroes (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010135e00.ini b/bin/gameProfiles/default/0005000010135e00.ini index ea135acc..1319dfe5 100644 --- a/bin/gameProfiles/default/0005000010135e00.ini +++ b/bin/gameProfiles/default/0005000010135e00.ini @@ -1,5 +1 @@ -# LEGO Batman 2: DC Super Heroes (USA) - -[General] -loadSharedLibraries = false -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# LEGO Batman 2: DC Super Heroes (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010136300.ini b/bin/gameProfiles/default/0005000010136300.ini index 19e3176f..b37053e0 100644 --- a/bin/gameProfiles/default/0005000010136300.ini +++ b/bin/gameProfiles/default/0005000010136300.ini @@ -1,4 +1 @@ -# BIT.TRIP Presents... Runner2: Future Legend of Rhythm Alien (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# BIT.TRIP Presents... Runner2: Future Legend of Rhythm Alien (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010136c00.ini b/bin/gameProfiles/default/0005000010136c00.ini index 8d9d5448..427a45fe 100644 --- a/bin/gameProfiles/default/0005000010136c00.ini +++ b/bin/gameProfiles/default/0005000010136c00.ini @@ -4,5 +4,4 @@ cpuMode = Singlecore-Recompiler [Graphics] -GPUBufferCacheAccuracy = 0 streamoutBufferCacheSize = 48 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010137F00.ini b/bin/gameProfiles/default/0005000010137F00.ini index c9cddecc..151d5394 100644 --- a/bin/gameProfiles/default/0005000010137F00.ini +++ b/bin/gameProfiles/default/0005000010137F00.ini @@ -1,4 +1 @@ -# DKC: Tropical Freeze (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# DKC: Tropical Freeze (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010137c00.ini b/bin/gameProfiles/default/0005000010137c00.ini index db36989b..88fff6e7 100644 --- a/bin/gameProfiles/default/0005000010137c00.ini +++ b/bin/gameProfiles/default/0005000010137c00.ini @@ -4,5 +4,4 @@ cpuMode = Singlecore-Recompiler [Graphics] -GPUBufferCacheAccuracy = 0 streamoutBufferCacheSize = 48 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010138300.ini b/bin/gameProfiles/default/0005000010138300.ini index 4385df2e..d992cb24 100644 --- a/bin/gameProfiles/default/0005000010138300.ini +++ b/bin/gameProfiles/default/0005000010138300.ini @@ -1,4 +1 @@ -# DKC: Tropical Freeze (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# DKC: Tropical Freeze (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010138a00.ini b/bin/gameProfiles/default/0005000010138a00.ini index 8a4f621e..26318009 100644 --- a/bin/gameProfiles/default/0005000010138a00.ini +++ b/bin/gameProfiles/default/0005000010138a00.ini @@ -1,4 +1 @@ -# Angry Birds Trilogy (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Angry Birds Trilogy (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010138f00.ini b/bin/gameProfiles/default/0005000010138f00.ini index df1f79ea..1f46d11d 100644 --- a/bin/gameProfiles/default/0005000010138f00.ini +++ b/bin/gameProfiles/default/0005000010138f00.ini @@ -1,5 +1,4 @@ # Devil's Third (JPN) [Graphics] -GPUBufferCacheAccuracy = 0 streamoutBufferCacheSize = 48 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010140000.ini b/bin/gameProfiles/default/0005000010140000.ini index 9afff7f4..347a4481 100644 --- a/bin/gameProfiles/default/0005000010140000.ini +++ b/bin/gameProfiles/default/0005000010140000.ini @@ -1,4 +1 @@ -# Angry Birds Trilogy (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Angry Birds Trilogy (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010143200.ini b/bin/gameProfiles/default/0005000010143200.ini new file mode 100644 index 00000000..7085a558 --- /dev/null +++ b/bin/gameProfiles/default/0005000010143200.ini @@ -0,0 +1 @@ +# Cabela's Big Game Hunter: Pro Hunts (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010143500.ini b/bin/gameProfiles/default/0005000010143500.ini index a9266121..c8b93998 100644 --- a/bin/gameProfiles/default/0005000010143500.ini +++ b/bin/gameProfiles/default/0005000010143500.ini @@ -1,6 +1 @@ -# TLoZ: Wind Waker HD (USA) - -[CPU] - -[Graphics] -GPUBufferCacheAccuracy = 2 \ No newline at end of file +# TLoZ: Wind Waker HD (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010143600.ini b/bin/gameProfiles/default/0005000010143600.ini index 6ba5a4c0..6414c25d 100644 --- a/bin/gameProfiles/default/0005000010143600.ini +++ b/bin/gameProfiles/default/0005000010143600.ini @@ -1,6 +1 @@ -# TLoZ: Wind Waker HD (EUR) - -[CPU] - -[Graphics] -GPUBufferCacheAccuracy = 2 \ No newline at end of file +# TLoZ: Wind Waker HD (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010144800.ini b/bin/gameProfiles/default/0005000010144800.ini index 55a4e962..80721cd0 100644 --- a/bin/gameProfiles/default/0005000010144800.ini +++ b/bin/gameProfiles/default/0005000010144800.ini @@ -1,4 +1 @@ -# DKC: Tropical Freeze (JPN) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# DKC: Tropical Freeze (JPN) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010144f00.ini b/bin/gameProfiles/default/0005000010144f00.ini index 6bbc9e28..c2fce59a 100644 --- a/bin/gameProfiles/default/0005000010144f00.ini +++ b/bin/gameProfiles/default/0005000010144f00.ini @@ -5,4 +5,3 @@ cpuMode = Singlecore-Recompiler [Graphics] streamoutBufferCacheSize = 48 -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010145000.ini b/bin/gameProfiles/default/0005000010145000.ini index f286269f..ace52dba 100644 --- a/bin/gameProfiles/default/0005000010145000.ini +++ b/bin/gameProfiles/default/0005000010145000.ini @@ -5,4 +5,3 @@ cpuMode = Singlecore-Recompiler [Graphics] streamoutBufferCacheSize = 48 -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010145c00.ini b/bin/gameProfiles/default/0005000010145c00.ini index b82867e4..0be65fcc 100644 --- a/bin/gameProfiles/default/0005000010145c00.ini +++ b/bin/gameProfiles/default/0005000010145c00.ini @@ -1,7 +1,4 @@ # Super Mario 3D World (USA) -[CPU] - [Graphics] -accurateShaderMul = false -GPUBufferCacheAccuracy = 2 \ No newline at end of file +accurateShaderMul = false \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010145d00.ini b/bin/gameProfiles/default/0005000010145d00.ini index 99c70dfe..3525edd8 100644 --- a/bin/gameProfiles/default/0005000010145d00.ini +++ b/bin/gameProfiles/default/0005000010145d00.ini @@ -1,7 +1,4 @@ # Super Mario 3D World (EUR) -[CPU] - [Graphics] -accurateShaderMul = false -GPUBufferCacheAccuracy = 2 \ No newline at end of file +accurateShaderMul = false \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010147e00.ini b/bin/gameProfiles/default/0005000010147e00.ini index f5f73b9c..62109cc9 100644 --- a/bin/gameProfiles/default/0005000010147e00.ini +++ b/bin/gameProfiles/default/0005000010147e00.ini @@ -1,4 +1 @@ -# Hello Kitty Kruisers (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Hello Kitty Kruisers (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001014c100.ini b/bin/gameProfiles/default/000500001014c100.ini index da5f4d3a..64add666 100644 --- a/bin/gameProfiles/default/000500001014c100.ini +++ b/bin/gameProfiles/default/000500001014c100.ini @@ -1,4 +1 @@ -# Transformers: Rise of the Dark Spark (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Transformers: Rise of the Dark Spark (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001014c600.ini b/bin/gameProfiles/default/000500001014c600.ini index d83ea1d8..ff80f137 100644 --- a/bin/gameProfiles/default/000500001014c600.ini +++ b/bin/gameProfiles/default/000500001014c600.ini @@ -1,4 +1 @@ -# Giana Sisters Twisted Dreams (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Giana Sisters Twisted Dreams (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001014cb00.ini b/bin/gameProfiles/default/000500001014cb00.ini index 24e6e8d9..3ab3829e 100644 --- a/bin/gameProfiles/default/000500001014cb00.ini +++ b/bin/gameProfiles/default/000500001014cb00.ini @@ -1,4 +1 @@ -# Giana Sisters Twisted Dreams (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Giana Sisters Twisted Dreams (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001014d900.ini b/bin/gameProfiles/default/000500001014d900.ini index b3fae051..d234ff8a 100644 --- a/bin/gameProfiles/default/000500001014d900.ini +++ b/bin/gameProfiles/default/000500001014d900.ini @@ -1,4 +1 @@ -# PUYO PUYO TETRIS (JPN) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# PUYO PUYO TETRIS (JPN) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010154600.ini b/bin/gameProfiles/default/0005000010154600.ini index 8bdd8b98..d5af6dc0 100644 --- a/bin/gameProfiles/default/0005000010154600.ini +++ b/bin/gameProfiles/default/0005000010154600.ini @@ -4,5 +4,4 @@ cpuMode = Singlecore-Recompiler [Graphics] -GPUBufferCacheAccuracy = 0 streamoutBufferCacheSize = 48 \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001015b200.ini b/bin/gameProfiles/default/000500001015b200.ini index fd7aad16..a3872d6f 100644 --- a/bin/gameProfiles/default/000500001015b200.ini +++ b/bin/gameProfiles/default/000500001015b200.ini @@ -1,4 +1 @@ -# Child of Light (USA) - -[Graphics] -GPUBufferCacheAccuracy = 1 \ No newline at end of file +# Child of Light (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010161a00.ini b/bin/gameProfiles/default/0005000010161a00.ini index 8bd28664..88e02890 100644 --- a/bin/gameProfiles/default/0005000010161a00.ini +++ b/bin/gameProfiles/default/0005000010161a00.ini @@ -1,4 +1 @@ -# How to Train Your Dragon 2 (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# How to Train Your Dragon 2 (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010162200.ini b/bin/gameProfiles/default/0005000010162200.ini index 70e5edf4..66363dfe 100644 --- a/bin/gameProfiles/default/0005000010162200.ini +++ b/bin/gameProfiles/default/0005000010162200.ini @@ -1,4 +1 @@ -# Monkey Pirates (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Monkey Pirates (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010162a00.ini b/bin/gameProfiles/default/0005000010162a00.ini index e20011d0..3df87ebb 100644 --- a/bin/gameProfiles/default/0005000010162a00.ini +++ b/bin/gameProfiles/default/0005000010162a00.ini @@ -1,4 +1 @@ -# How to Train Your Dragon 2 (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# How to Train Your Dragon 2 (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010162b00.ini b/bin/gameProfiles/default/0005000010162b00.ini index 9802ca1d..8a50f6f6 100644 --- a/bin/gameProfiles/default/0005000010162b00.ini +++ b/bin/gameProfiles/default/0005000010162b00.ini @@ -1,6 +1 @@ -# Splatoon (JPN) - -[CPU] - -[Graphics] -GPUBufferCacheAccuracy = 2 \ No newline at end of file +# Splatoon (JPN) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001016a200.ini b/bin/gameProfiles/default/000500001016a200.ini index da6a7805..655bc980 100644 --- a/bin/gameProfiles/default/000500001016a200.ini +++ b/bin/gameProfiles/default/000500001016a200.ini @@ -1,4 +1 @@ -# Bombing Bastards (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Bombing Bastards (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001016ab00.ini b/bin/gameProfiles/default/000500001016ab00.ini index 0090ca58..594d783c 100644 --- a/bin/gameProfiles/default/000500001016ab00.ini +++ b/bin/gameProfiles/default/000500001016ab00.ini @@ -1,4 +1 @@ -# Bombing Bastards (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Bombing Bastards (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001016b200.ini b/bin/gameProfiles/default/000500001016b200.ini index 9dd87948..4db2bc79 100644 --- a/bin/gameProfiles/default/000500001016b200.ini +++ b/bin/gameProfiles/default/000500001016b200.ini @@ -6,6 +6,3 @@ useRDTSC = false [CPU] cpuTimer = cycleCounter cpuMode = Singlecore-Interpreter - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001016d400.ini b/bin/gameProfiles/default/000500001016d400.ini index 74f031ec..56c1c0f8 100644 --- a/bin/gameProfiles/default/000500001016d400.ini +++ b/bin/gameProfiles/default/000500001016d400.ini @@ -1,4 +1 @@ -# Stick It to the Man (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Stick It to the Man (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001016d800.ini b/bin/gameProfiles/default/000500001016d800.ini index 3cd2b3d0..6a8a95b8 100644 --- a/bin/gameProfiles/default/000500001016d800.ini +++ b/bin/gameProfiles/default/000500001016d800.ini @@ -1,4 +1 @@ -# Child of Light (JPN) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Child of Light (JPN) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001016da00.ini b/bin/gameProfiles/default/000500001016da00.ini index 105b4406..d54f4f50 100644 --- a/bin/gameProfiles/default/000500001016da00.ini +++ b/bin/gameProfiles/default/000500001016da00.ini @@ -2,6 +2,3 @@ [CPU] cpuMode = Singlecore-Interpreter - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001016e000.ini b/bin/gameProfiles/default/000500001016e000.ini index 871da06e..c9ef12e2 100644 --- a/bin/gameProfiles/default/000500001016e000.ini +++ b/bin/gameProfiles/default/000500001016e000.ini @@ -1,4 +1 @@ -# Stick It to the Man (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Stick It to the Man (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001016e800.ini b/bin/gameProfiles/default/000500001016e800.ini index 13482456..4e578c17 100644 --- a/bin/gameProfiles/default/000500001016e800.ini +++ b/bin/gameProfiles/default/000500001016e800.ini @@ -6,6 +6,3 @@ useRDTSC = false [CPU] cpuTimer = cycleCounter cpuMode = Singlecore-Interpreter - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001016ea00.ini b/bin/gameProfiles/default/000500001016ea00.ini index 6d72c2a2..50723312 100644 --- a/bin/gameProfiles/default/000500001016ea00.ini +++ b/bin/gameProfiles/default/000500001016ea00.ini @@ -1,4 +1 @@ -# Child of Light (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 1 \ No newline at end of file +# Child of Light (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001016fc00.ini b/bin/gameProfiles/default/000500001016fc00.ini index edd44103..9ac3ba4b 100644 --- a/bin/gameProfiles/default/000500001016fc00.ini +++ b/bin/gameProfiles/default/000500001016fc00.ini @@ -1,5 +1,4 @@ # Aqua Moto Racing Utopia (USA) -[GPU] -GPUBufferCacheAccuracy = 0 +[Graphics] streamoutBufferCacheSize = 48 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010170100.ini b/bin/gameProfiles/default/0005000010170100.ini index 848f53ff..3de1dd34 100644 --- a/bin/gameProfiles/default/0005000010170100.ini +++ b/bin/gameProfiles/default/0005000010170100.ini @@ -1,4 +1 @@ -# Monkey Pirates (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Monkey Pirates (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010172900.ini b/bin/gameProfiles/default/0005000010172900.ini index 0414258a..5ed40dd0 100644 --- a/bin/gameProfiles/default/0005000010172900.ini +++ b/bin/gameProfiles/default/0005000010172900.ini @@ -1,5 +1,4 @@ # Aqua Moto Racing Utopia (EUR) -[GPU] -GPUBufferCacheAccuracy = 0 +[Graphics] streamoutBufferCacheSize = 48 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010173400.ini b/bin/gameProfiles/default/0005000010173400.ini index 99995af4..c4062977 100644 --- a/bin/gameProfiles/default/0005000010173400.ini +++ b/bin/gameProfiles/default/0005000010173400.ini @@ -1,4 +1 @@ -# Transformers: Rise of the Dark Spark (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Transformers: Rise of the Dark Spark (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010176300.ini b/bin/gameProfiles/default/0005000010176300.ini index c8e0861c..30305418 100644 --- a/bin/gameProfiles/default/0005000010176300.ini +++ b/bin/gameProfiles/default/0005000010176300.ini @@ -1,5 +1,4 @@ # Little Inferno (JPN) -[CPU] +[Graphics] extendedTextureReadback = true -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010176900.ini b/bin/gameProfiles/default/0005000010176900.ini index d6d36fb8..852c383a 100644 --- a/bin/gameProfiles/default/0005000010176900.ini +++ b/bin/gameProfiles/default/0005000010176900.ini @@ -1,6 +1 @@ -# Splatoon (USA) - -[CPU] - -[Graphics] -GPUBufferCacheAccuracy = 2 \ No newline at end of file +# Splatoon (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010176a00.ini b/bin/gameProfiles/default/0005000010176a00.ini index f52d1658..e1dc631b 100644 --- a/bin/gameProfiles/default/0005000010176a00.ini +++ b/bin/gameProfiles/default/0005000010176a00.ini @@ -1,6 +1 @@ -# Splatoon (EUR) - -[CPU] - -[Graphics] -GPUBufferCacheAccuracy = 2 \ No newline at end of file +# Splatoon (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010177000.ini b/bin/gameProfiles/default/0005000010177000.ini index 1c47b1dc..bc7eebf4 100644 --- a/bin/gameProfiles/default/0005000010177000.ini +++ b/bin/gameProfiles/default/0005000010177000.ini @@ -1,4 +1 @@ -# Hello Kitty Kruisers (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Hello Kitty Kruisers (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010177500.ini b/bin/gameProfiles/default/0005000010177500.ini index 74a0a58e..90be1c63 100644 --- a/bin/gameProfiles/default/0005000010177500.ini +++ b/bin/gameProfiles/default/0005000010177500.ini @@ -2,6 +2,3 @@ [CPU] cpuMode = Singlecore-Interpreter - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010177600.ini b/bin/gameProfiles/default/0005000010177600.ini index 05792513..d1b955f1 100644 --- a/bin/gameProfiles/default/0005000010177600.ini +++ b/bin/gameProfiles/default/0005000010177600.ini @@ -1,5 +1,4 @@ # Devil's Third (USA) [Graphics] -GPUBufferCacheAccuracy = 0 streamoutBufferCacheSize = 48 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010177700.ini b/bin/gameProfiles/default/0005000010177700.ini index 072db711..e311f0ba 100644 --- a/bin/gameProfiles/default/0005000010177700.ini +++ b/bin/gameProfiles/default/0005000010177700.ini @@ -1,5 +1,4 @@ # Devil's Third (EUR) [Graphics] -GPUBufferCacheAccuracy = 0 streamoutBufferCacheSize = 48 \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001017da00.ini b/bin/gameProfiles/default/000500001017da00.ini index 6fc7282a..09b07927 100644 --- a/bin/gameProfiles/default/000500001017da00.ini +++ b/bin/gameProfiles/default/000500001017da00.ini @@ -1,4 +1 @@ -# Costume Quest 2 (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Costume Quest 2 (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001017e400.ini b/bin/gameProfiles/default/000500001017e400.ini index 562aa77a..723a374c 100644 --- a/bin/gameProfiles/default/000500001017e400.ini +++ b/bin/gameProfiles/default/000500001017e400.ini @@ -1,4 +1 @@ -# Chasing Dead (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 +# Chasing Dead (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010180500.ini b/bin/gameProfiles/default/0005000010180500.ini index d9258613..87ea51b5 100644 --- a/bin/gameProfiles/default/0005000010180500.ini +++ b/bin/gameProfiles/default/0005000010180500.ini @@ -1,7 +1,4 @@ # Captain Toad: Treasure Tracker (JPN) -[CPU] - [Graphics] accurateShaderMul = false -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010180600.ini b/bin/gameProfiles/default/0005000010180600.ini index e7d7e325..8b2528e3 100644 --- a/bin/gameProfiles/default/0005000010180600.ini +++ b/bin/gameProfiles/default/0005000010180600.ini @@ -1,7 +1,4 @@ # Captain Toad: Treasure Tracker (USA) -[CPU] - [Graphics] accurateShaderMul = false -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010180700.ini b/bin/gameProfiles/default/0005000010180700.ini index d0b7d792..3a7a8fe6 100644 --- a/bin/gameProfiles/default/0005000010180700.ini +++ b/bin/gameProfiles/default/0005000010180700.ini @@ -1,7 +1,4 @@ # Captain Toad: Treasure Tracker (EUR) -[CPU] - [Graphics] accurateShaderMul = false -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010183000.ini b/bin/gameProfiles/default/0005000010183000.ini index 5b79f8a9..4b1c07cf 100644 --- a/bin/gameProfiles/default/0005000010183000.ini +++ b/bin/gameProfiles/default/0005000010183000.ini @@ -1,7 +1 @@ -# Runbow (USA) - - - -[Graphics] - -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Runbow (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010184900.ini b/bin/gameProfiles/default/0005000010184900.ini index 8c4d5b59..c3aac8e8 100644 --- a/bin/gameProfiles/default/0005000010184900.ini +++ b/bin/gameProfiles/default/0005000010184900.ini @@ -1,4 +1 @@ -# Slender: The Arrival (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Slender: The Arrival (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010184E00.ini b/bin/gameProfiles/default/0005000010184E00.ini index 3ca24eda..4b282824 100644 --- a/bin/gameProfiles/default/0005000010184E00.ini +++ b/bin/gameProfiles/default/0005000010184E00.ini @@ -1,6 +1 @@ -# Yoshi's Woolly World (EUR) - -[CPU] - -[Graphics] -GPUBufferCacheAccuracy = 2 \ No newline at end of file +# Yoshi's Woolly World (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010184d00.ini b/bin/gameProfiles/default/0005000010184d00.ini index 83a64a5e..b1bb76a7 100644 --- a/bin/gameProfiles/default/0005000010184d00.ini +++ b/bin/gameProfiles/default/0005000010184d00.ini @@ -1,6 +1 @@ -# Yoshi's Woolly World (USA) - -[CPU] - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Yoshi's Woolly World (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010185400.ini b/bin/gameProfiles/default/0005000010185400.ini index c69416ee..efd26cbb 100644 --- a/bin/gameProfiles/default/0005000010185400.ini +++ b/bin/gameProfiles/default/0005000010185400.ini @@ -1,6 +1 @@ -# TLoZ: Wind Waker HD (JPN) - -[CPU] - -[Graphics] -GPUBufferCacheAccuracy = 2 \ No newline at end of file +# TLoZ: Wind Waker HD (JPN) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010189f00.ini b/bin/gameProfiles/default/0005000010189f00.ini index a88c6891..ae998084 100644 --- a/bin/gameProfiles/default/0005000010189f00.ini +++ b/bin/gameProfiles/default/0005000010189f00.ini @@ -2,6 +2,3 @@ [CPU] cpuTimer = hostBased - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001018ab00.ini b/bin/gameProfiles/default/000500001018ab00.ini index 9eeef2ff..cd66a1a3 100644 --- a/bin/gameProfiles/default/000500001018ab00.ini +++ b/bin/gameProfiles/default/000500001018ab00.ini @@ -1,4 +1 @@ -# Affordable Space Adventures (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 +# Affordable Space Adventures (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001018d800.ini b/bin/gameProfiles/default/000500001018d800.ini index 0e311245..196b4aba 100644 --- a/bin/gameProfiles/default/000500001018d800.ini +++ b/bin/gameProfiles/default/000500001018d800.ini @@ -1,4 +1 @@ -# SteamWorld Dig (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# SteamWorld Dig (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001018d900.ini b/bin/gameProfiles/default/000500001018d900.ini index 4e8d8ead..779035e1 100644 --- a/bin/gameProfiles/default/000500001018d900.ini +++ b/bin/gameProfiles/default/000500001018d900.ini @@ -2,6 +2,3 @@ [General] loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001018f100.ini b/bin/gameProfiles/default/000500001018f100.ini index 1a3c5fba..46ffcf0e 100644 --- a/bin/gameProfiles/default/000500001018f100.ini +++ b/bin/gameProfiles/default/000500001018f100.ini @@ -1,4 +1 @@ -# SteamWorld Dig (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# SteamWorld Dig (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001018f400.ini b/bin/gameProfiles/default/000500001018f400.ini index ecf014cf..a22052e4 100644 --- a/bin/gameProfiles/default/000500001018f400.ini +++ b/bin/gameProfiles/default/000500001018f400.ini @@ -1,4 +1 @@ -# Angry Video Game Nerd Adventures (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Angry Video Game Nerd Adventures (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001018fd00.ini b/bin/gameProfiles/default/000500001018fd00.ini index 119e00c4..eaef68fe 100644 --- a/bin/gameProfiles/default/000500001018fd00.ini +++ b/bin/gameProfiles/default/000500001018fd00.ini @@ -1,4 +1 @@ -# The Penguins of Madagascar (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# The Penguins of Madagascar (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010190300.ini b/bin/gameProfiles/default/0005000010190300.ini index b9a41d4f..fe197295 100644 --- a/bin/gameProfiles/default/0005000010190300.ini +++ b/bin/gameProfiles/default/0005000010190300.ini @@ -5,7 +5,4 @@ useRDTSC = false [CPU] cpuMode = Singlecore-Recompiler -cpuTimer = cycleCounter - -[Graphics] -GPUBufferCacheAccuracy = 1 \ No newline at end of file +cpuTimer = cycleCounter \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010193f00.ini b/bin/gameProfiles/default/0005000010193f00.ini index 9d7f8a63..104a88ca 100644 --- a/bin/gameProfiles/default/0005000010193f00.ini +++ b/bin/gameProfiles/default/0005000010193f00.ini @@ -1,4 +1 @@ -# The Penguins of Madagascar (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# The Penguins of Madagascar (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010195e00.ini b/bin/gameProfiles/default/0005000010195e00.ini index 23d72923..23585737 100644 --- a/bin/gameProfiles/default/0005000010195e00.ini +++ b/bin/gameProfiles/default/0005000010195e00.ini @@ -1,4 +1 @@ -# Rock Zombie (US) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Rock Zombie (US) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010197300.ini b/bin/gameProfiles/default/0005000010197300.ini index e15f1c7d..6f2dfa48 100644 --- a/bin/gameProfiles/default/0005000010197300.ini +++ b/bin/gameProfiles/default/0005000010197300.ini @@ -1,4 +1 @@ -# Electronic Super Joy Groove City (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Electronic Super Joy Groove City (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010197800.ini b/bin/gameProfiles/default/0005000010197800.ini index 92839df2..2af2ef33 100644 --- a/bin/gameProfiles/default/0005000010197800.ini +++ b/bin/gameProfiles/default/0005000010197800.ini @@ -1,4 +1 @@ -# Costume Quest 2 (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Costume Quest 2 (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010199e00.ini b/bin/gameProfiles/default/0005000010199e00.ini index 12c31235..01e1f2d5 100644 --- a/bin/gameProfiles/default/0005000010199e00.ini +++ b/bin/gameProfiles/default/0005000010199e00.ini @@ -1,4 +1 @@ -# High Strangeness (US) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# High Strangeness (US) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001019ab00.ini b/bin/gameProfiles/default/000500001019ab00.ini index 12c31235..01e1f2d5 100644 --- a/bin/gameProfiles/default/000500001019ab00.ini +++ b/bin/gameProfiles/default/000500001019ab00.ini @@ -1,4 +1 @@ -# High Strangeness (US) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# High Strangeness (US) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001019b000.ini b/bin/gameProfiles/default/000500001019b000.ini index 23e77a87..c458883f 100644 --- a/bin/gameProfiles/default/000500001019b000.ini +++ b/bin/gameProfiles/default/000500001019b000.ini @@ -1,4 +1 @@ -# Tachyon Project (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Tachyon Project (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001019b200.ini b/bin/gameProfiles/default/000500001019b200.ini index 6790a314..3e893f12 100644 --- a/bin/gameProfiles/default/000500001019b200.ini +++ b/bin/gameProfiles/default/000500001019b200.ini @@ -1,4 +1 @@ -# Tachyon Project (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Tachyon Project (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001019ca00.ini b/bin/gameProfiles/default/000500001019ca00.ini index 9187ed88..f79dd190 100644 --- a/bin/gameProfiles/default/000500001019ca00.ini +++ b/bin/gameProfiles/default/000500001019ca00.ini @@ -1,4 +1 @@ -# Rock Zombie (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Rock Zombie (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001019e500.ini b/bin/gameProfiles/default/000500001019e500.ini index 0ee89ea1..eec28dc9 100644 --- a/bin/gameProfiles/default/000500001019e500.ini +++ b/bin/gameProfiles/default/000500001019e500.ini @@ -1,6 +1 @@ -# TLoZ: Twilight Princess (US) - -[CPU] - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# TLoZ: Twilight Princess (US) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001019e600.ini b/bin/gameProfiles/default/000500001019e600.ini index 4f3aeecb..47380cbe 100644 --- a/bin/gameProfiles/default/000500001019e600.ini +++ b/bin/gameProfiles/default/000500001019e600.ini @@ -1,6 +1 @@ -# TLoZ: Twilight Princess (EU) - -[CPU] - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# TLoZ: Twilight Princess (EU) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001019ea00.ini b/bin/gameProfiles/default/000500001019ea00.ini index f69c7740..49250a19 100644 --- a/bin/gameProfiles/default/000500001019ea00.ini +++ b/bin/gameProfiles/default/000500001019ea00.ini @@ -1,4 +1 @@ -# Zombeer (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Zombeer (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001019ee00.ini b/bin/gameProfiles/default/000500001019ee00.ini index accf6993..958d2f84 100644 --- a/bin/gameProfiles/default/000500001019ee00.ini +++ b/bin/gameProfiles/default/000500001019ee00.ini @@ -1,4 +1 @@ -# Zombie Defense (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Zombie Defense (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101C9300.ini b/bin/gameProfiles/default/00050000101C9300.ini index f3c94e8f..09f0706d 100644 --- a/bin/gameProfiles/default/00050000101C9300.ini +++ b/bin/gameProfiles/default/00050000101C9300.ini @@ -3,4 +3,3 @@ [Graphics] disableGPUFence = false accurateShaderMul = true -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101E4100.ini b/bin/gameProfiles/default/00050000101E4100.ini index 71176e83..8b8165b1 100644 --- a/bin/gameProfiles/default/00050000101E4100.ini +++ b/bin/gameProfiles/default/00050000101E4100.ini @@ -2,6 +2,3 @@ [CPU] cpuMode = Singlecore-Recompiler - -[Graphics] -GPUBufferCacheAccuracy = 1 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101a1200.ini b/bin/gameProfiles/default/00050000101a1200.ini index 76b1207a..4ff51799 100644 --- a/bin/gameProfiles/default/00050000101a1200.ini +++ b/bin/gameProfiles/default/00050000101a1200.ini @@ -1,5 +1 @@ -# Affordable Space Adventures (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 - +# Affordable Space Adventures (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101a1800.ini b/bin/gameProfiles/default/00050000101a1800.ini index 94c50e45..56dd856c 100644 --- a/bin/gameProfiles/default/00050000101a1800.ini +++ b/bin/gameProfiles/default/00050000101a1800.ini @@ -1,4 +1 @@ -# Zombie Defense (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Zombie Defense (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101a3b00.ini b/bin/gameProfiles/default/00050000101a3b00.ini index 2efa5016..44ec4fcb 100644 --- a/bin/gameProfiles/default/00050000101a3b00.ini +++ b/bin/gameProfiles/default/00050000101a3b00.ini @@ -1,4 +1 @@ -# Life of Pixel (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Life of Pixel (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101a4300.ini b/bin/gameProfiles/default/00050000101a4300.ini index 1c49a9ab..80c72ed3 100644 --- a/bin/gameProfiles/default/00050000101a4300.ini +++ b/bin/gameProfiles/default/00050000101a4300.ini @@ -1,4 +1 @@ -# Beatbuddy (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Beatbuddy (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101a4800.ini b/bin/gameProfiles/default/00050000101a4800.ini index bb446c86..64e552e9 100644 --- a/bin/gameProfiles/default/00050000101a4800.ini +++ b/bin/gameProfiles/default/00050000101a4800.ini @@ -2,6 +2,3 @@ [General] loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101a4900.ini b/bin/gameProfiles/default/00050000101a4900.ini index 74fedef3..3bb54df4 100644 --- a/bin/gameProfiles/default/00050000101a4900.ini +++ b/bin/gameProfiles/default/00050000101a4900.ini @@ -1,4 +1 @@ -# Life of Pixel (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Life of Pixel (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101a7f00.ini b/bin/gameProfiles/default/00050000101a7f00.ini index 35f91d55..be575b52 100644 --- a/bin/gameProfiles/default/00050000101a7f00.ini +++ b/bin/gameProfiles/default/00050000101a7f00.ini @@ -1,4 +1 @@ -# Shiftlings (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Shiftlings (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101a9c00.ini b/bin/gameProfiles/default/00050000101a9c00.ini index 651c8c01..6df3d13a 100644 --- a/bin/gameProfiles/default/00050000101a9c00.ini +++ b/bin/gameProfiles/default/00050000101a9c00.ini @@ -1,4 +1 @@ -# Chompy Chomp Chomp Party (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Chompy Chomp Chomp Party (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101a9e00.ini b/bin/gameProfiles/default/00050000101a9e00.ini index 5fbd157c..a8853119 100644 --- a/bin/gameProfiles/default/00050000101a9e00.ini +++ b/bin/gameProfiles/default/00050000101a9e00.ini @@ -1,4 +1 @@ -# Chompy Chomp Chomp Party (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Chompy Chomp Chomp Party (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101acc00.ini b/bin/gameProfiles/default/00050000101acc00.ini index d94071ac..6dae9607 100644 --- a/bin/gameProfiles/default/00050000101acc00.ini +++ b/bin/gameProfiles/default/00050000101acc00.ini @@ -1,4 +1 @@ -# Shiftlings (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Shiftlings (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101b0100.ini b/bin/gameProfiles/default/00050000101b0100.ini index ed2f7bf9..1599daed 100644 --- a/bin/gameProfiles/default/00050000101b0100.ini +++ b/bin/gameProfiles/default/00050000101b0100.ini @@ -1,4 +1 @@ -# Funk of Titans (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Funk of Titans (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101b4e00.ini b/bin/gameProfiles/default/00050000101b4e00.ini index 38642cbe..a0b90655 100644 --- a/bin/gameProfiles/default/00050000101b4e00.ini +++ b/bin/gameProfiles/default/00050000101b4e00.ini @@ -1,4 +1 @@ -# Funk of Titans (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Funk of Titans (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101b9900.ini b/bin/gameProfiles/default/00050000101b9900.ini index ae410226..2e83fe40 100644 --- a/bin/gameProfiles/default/00050000101b9900.ini +++ b/bin/gameProfiles/default/00050000101b9900.ini @@ -2,6 +2,3 @@ [General] loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101bc300.ini b/bin/gameProfiles/default/00050000101bc300.ini index c7ec1da5..e880a0f1 100644 --- a/bin/gameProfiles/default/00050000101bc300.ini +++ b/bin/gameProfiles/default/00050000101bc300.ini @@ -1,4 +1 @@ -# Beatbuddy (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Beatbuddy (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101c3100.ini b/bin/gameProfiles/default/00050000101c3100.ini index d7c2e293..c6f5c569 100644 --- a/bin/gameProfiles/default/00050000101c3100.ini +++ b/bin/gameProfiles/default/00050000101c3100.ini @@ -1,4 +1 @@ -# Freedom Planet (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Freedom Planet (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101c4200.ini b/bin/gameProfiles/default/00050000101c4200.ini index 203ce9a7..c12944a3 100644 --- a/bin/gameProfiles/default/00050000101c4200.ini +++ b/bin/gameProfiles/default/00050000101c4200.ini @@ -1,4 +1 @@ -# The Peanuts Movie: Snoopy's Grand Adventure (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# The Peanuts Movie: Snoopy's Grand Adventure (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101c4300.ini b/bin/gameProfiles/default/00050000101c4300.ini index a2a79f38..549984d4 100644 --- a/bin/gameProfiles/default/00050000101c4300.ini +++ b/bin/gameProfiles/default/00050000101c4300.ini @@ -2,6 +2,3 @@ [CPU] cpuMode = Singlecore-Recompiler - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101c4c00.ini b/bin/gameProfiles/default/00050000101c4c00.ini index 6e8fd71f..08ad5e5b 100644 --- a/bin/gameProfiles/default/00050000101c4c00.ini +++ b/bin/gameProfiles/default/00050000101c4c00.ini @@ -5,4 +5,3 @@ cpuMode = Singlecore-Recompiler [Graphics] accurateShaderMul = false -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101c4d00.ini b/bin/gameProfiles/default/00050000101c4d00.ini index 83301c0d..a1db58b0 100644 --- a/bin/gameProfiles/default/00050000101c4d00.ini +++ b/bin/gameProfiles/default/00050000101c4d00.ini @@ -5,4 +5,3 @@ cpuMode = Singlecore-Recompiler [Graphics] accurateShaderMul = false -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101c6c00.ini b/bin/gameProfiles/default/00050000101c6c00.ini index da9dd712..ff16d037 100644 --- a/bin/gameProfiles/default/00050000101c6c00.ini +++ b/bin/gameProfiles/default/00050000101c6c00.ini @@ -1,4 +1 @@ -# Typoman (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Typoman (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101c7600.ini b/bin/gameProfiles/default/00050000101c7600.ini index 14128c7b..599520e4 100644 --- a/bin/gameProfiles/default/00050000101c7600.ini +++ b/bin/gameProfiles/default/00050000101c7600.ini @@ -1,4 +1 @@ -# Pumped BMX + (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Pumped BMX + (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101c7b00.ini b/bin/gameProfiles/default/00050000101c7b00.ini index 7350d0b5..9b764f96 100644 --- a/bin/gameProfiles/default/00050000101c7b00.ini +++ b/bin/gameProfiles/default/00050000101c7b00.ini @@ -1,4 +1 @@ -# Chronicles of Teddy: Harmony of Exidus (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Chronicles of Teddy: Harmony of Exidus (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101c7d00.ini b/bin/gameProfiles/default/00050000101c7d00.ini index 45ce4448..60a0dd23 100644 --- a/bin/gameProfiles/default/00050000101c7d00.ini +++ b/bin/gameProfiles/default/00050000101c7d00.ini @@ -1,4 +1 @@ -# Pumped BMX + (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Pumped BMX + (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101c9400.ini b/bin/gameProfiles/default/00050000101c9400.ini index 86c1b839..585a9e6f 100644 --- a/bin/gameProfiles/default/00050000101c9400.ini +++ b/bin/gameProfiles/default/00050000101c9400.ini @@ -3,4 +3,3 @@ [Graphics] disableGPUFence = false accurateShaderMul = true -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101c9500.ini b/bin/gameProfiles/default/00050000101c9500.ini index 4f326b32..71485002 100644 --- a/bin/gameProfiles/default/00050000101c9500.ini +++ b/bin/gameProfiles/default/00050000101c9500.ini @@ -3,4 +3,3 @@ [Graphics] disableGPUFence = false accurateShaderMul = true -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101c9a00.ini b/bin/gameProfiles/default/00050000101c9a00.ini index 0cc7525b..026e593f 100644 --- a/bin/gameProfiles/default/00050000101c9a00.ini +++ b/bin/gameProfiles/default/00050000101c9a00.ini @@ -2,6 +2,3 @@ [CPU] cpuMode = Singlecore-Recompiler - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101cc900.ini b/bin/gameProfiles/default/00050000101cc900.ini index dded48b2..ceaa7e67 100644 --- a/bin/gameProfiles/default/00050000101cc900.ini +++ b/bin/gameProfiles/default/00050000101cc900.ini @@ -1,4 +1 @@ -# Freedom Planet (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Freedom Planet (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101ccf00.ini b/bin/gameProfiles/default/00050000101ccf00.ini index e71dfd8f..d3d497f1 100644 --- a/bin/gameProfiles/default/00050000101ccf00.ini +++ b/bin/gameProfiles/default/00050000101ccf00.ini @@ -1,4 +1 @@ -# Never Alone (Kisima Ingitchuna) (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Never Alone (Kisima Ingitchuna) (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101ce000.ini b/bin/gameProfiles/default/00050000101ce000.ini index 69aa9dae..30a271e8 100644 --- a/bin/gameProfiles/default/00050000101ce000.ini +++ b/bin/gameProfiles/default/00050000101ce000.ini @@ -1,4 +1 @@ -# Typoman (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Typoman (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101ce800.ini b/bin/gameProfiles/default/00050000101ce800.ini index 2cc75f8e..902e8e23 100644 --- a/bin/gameProfiles/default/00050000101ce800.ini +++ b/bin/gameProfiles/default/00050000101ce800.ini @@ -1,4 +1 @@ -# Never Alone (Kisima Ingitchuna) (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Never Alone (Kisima Ingitchuna) (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101d2100.ini b/bin/gameProfiles/default/00050000101d2100.ini index caac5cdc..830f5f81 100644 --- a/bin/gameProfiles/default/00050000101d2100.ini +++ b/bin/gameProfiles/default/00050000101d2100.ini @@ -1,5 +1,4 @@ # Little Inferno (US) -[CPU] +[Graphics] extendedTextureReadback = true -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101d4500.ini b/bin/gameProfiles/default/00050000101d4500.ini index 8ae9fc73..c8244324 100644 --- a/bin/gameProfiles/default/00050000101d4500.ini +++ b/bin/gameProfiles/default/00050000101d4500.ini @@ -1,4 +1 @@ -# Grumpy Reaper (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Grumpy Reaper (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101d4d00.ini b/bin/gameProfiles/default/00050000101d4d00.ini index 5df2ddbb..23887c1f 100644 --- a/bin/gameProfiles/default/00050000101d4d00.ini +++ b/bin/gameProfiles/default/00050000101d4d00.ini @@ -1,4 +1 @@ -# Joe's Diner (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Joe's Diner (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101d5000.ini b/bin/gameProfiles/default/00050000101d5000.ini index 2e1b8356..4cff3852 100644 --- a/bin/gameProfiles/default/00050000101d5000.ini +++ b/bin/gameProfiles/default/00050000101d5000.ini @@ -1,4 +1 @@ -# The Peanuts Movie: Snoopy's Grand Adventure (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# The Peanuts Movie: Snoopy's Grand Adventure (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101d5500.ini b/bin/gameProfiles/default/00050000101d5500.ini index 81bcef30..2e1f7542 100644 --- a/bin/gameProfiles/default/00050000101d5500.ini +++ b/bin/gameProfiles/default/00050000101d5500.ini @@ -1,4 +1 @@ -# Joe's Diner (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Joe's Diner (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101d6000.ini b/bin/gameProfiles/default/00050000101d6000.ini index 129477c2..09ff9c3c 100644 --- a/bin/gameProfiles/default/00050000101d6000.ini +++ b/bin/gameProfiles/default/00050000101d6000.ini @@ -2,6 +2,3 @@ [CPU] cpuMode = Singlecore-Recompiler - -[Graphics] -GPUBufferCacheAccuracy = 1 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101d6d00.ini b/bin/gameProfiles/default/00050000101d6d00.ini index 0bf677ba..a04e39c7 100644 --- a/bin/gameProfiles/default/00050000101d6d00.ini +++ b/bin/gameProfiles/default/00050000101d6d00.ini @@ -1,7 +1 @@ -# Runbow (EUR) - - - -[Graphics] - -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Runbow (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101d7500.ini b/bin/gameProfiles/default/00050000101d7500.ini index 8707efcd..992b00cc 100644 --- a/bin/gameProfiles/default/00050000101d7500.ini +++ b/bin/gameProfiles/default/00050000101d7500.ini @@ -2,6 +2,3 @@ [General] loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101d8900.ini b/bin/gameProfiles/default/00050000101d8900.ini index 29b9698f..aaf7d122 100644 --- a/bin/gameProfiles/default/00050000101d8900.ini +++ b/bin/gameProfiles/default/00050000101d8900.ini @@ -1,4 +1 @@ -# Slender: The Arrival (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Slender: The Arrival (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101d8d00.ini b/bin/gameProfiles/default/00050000101d8d00.ini index 864c446b..0b61b998 100644 --- a/bin/gameProfiles/default/00050000101d8d00.ini +++ b/bin/gameProfiles/default/00050000101d8d00.ini @@ -1,4 +1 @@ -# Rock 'N Racing Off Road DX (USA) - -[Graphics] -GPUBufferCacheAccuracy = 1 \ No newline at end of file +# Rock 'N Racing Off Road DX (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101d9600.ini b/bin/gameProfiles/default/00050000101d9600.ini index c2c62b6a..aa4f9a10 100644 --- a/bin/gameProfiles/default/00050000101d9600.ini +++ b/bin/gameProfiles/default/00050000101d9600.ini @@ -1,4 +1 @@ -# Rock 'N Racing Off Road DX (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 1 \ No newline at end of file +# Rock 'N Racing Off Road DX (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101d9d00.ini b/bin/gameProfiles/default/00050000101d9d00.ini index dc54dea2..30d07833 100644 --- a/bin/gameProfiles/default/00050000101d9d00.ini +++ b/bin/gameProfiles/default/00050000101d9d00.ini @@ -2,6 +2,3 @@ [General] loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101dac00.ini b/bin/gameProfiles/default/00050000101dac00.ini index 1780613f..bb988dac 100644 --- a/bin/gameProfiles/default/00050000101dac00.ini +++ b/bin/gameProfiles/default/00050000101dac00.ini @@ -1,4 +1 @@ -# Swap Fire - -[GPU] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Swap Fire \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101daf00.ini b/bin/gameProfiles/default/00050000101daf00.ini index 2c8bef7b..3ebc182e 100644 --- a/bin/gameProfiles/default/00050000101daf00.ini +++ b/bin/gameProfiles/default/00050000101daf00.ini @@ -1,4 +1 @@ -# Chronicles of Teddy: Harmony of Exidus (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Chronicles of Teddy: Harmony of Exidus (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101db000.ini b/bin/gameProfiles/default/00050000101db000.ini index 94736dbb..fafb6333 100644 --- a/bin/gameProfiles/default/00050000101db000.ini +++ b/bin/gameProfiles/default/00050000101db000.ini @@ -1,4 +1 @@ -# Oddworld New 'n' Tasty (US) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Oddworld New 'n' Tasty (US) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101dbb00.ini b/bin/gameProfiles/default/00050000101dbb00.ini index 092b54ec..7e08923d 100644 --- a/bin/gameProfiles/default/00050000101dbb00.ini +++ b/bin/gameProfiles/default/00050000101dbb00.ini @@ -1,4 +1 @@ -# Oddworld New 'n' Tasty (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Oddworld New 'n' Tasty (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101dbc00.ini b/bin/gameProfiles/default/00050000101dbc00.ini index 98c41e22..94808fc1 100644 --- a/bin/gameProfiles/default/00050000101dbc00.ini +++ b/bin/gameProfiles/default/00050000101dbc00.ini @@ -1,4 +1 @@ -# Star Ghost (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Star Ghost (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101dbe00.ini b/bin/gameProfiles/default/00050000101dbe00.ini index 66ec5de9..0145c649 100644 --- a/bin/gameProfiles/default/00050000101dbe00.ini +++ b/bin/gameProfiles/default/00050000101dbe00.ini @@ -1,7 +1,4 @@ # Minecraft: Wii U Edition (JPN) [General] -loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +loadSharedLibraries = false \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101dbf00.ini b/bin/gameProfiles/default/00050000101dbf00.ini index 6f32b59d..6cab37ad 100644 --- a/bin/gameProfiles/default/00050000101dbf00.ini +++ b/bin/gameProfiles/default/00050000101dbf00.ini @@ -1,4 +1 @@ -# Angry Video Game Nerd Adventures (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Angry Video Game Nerd Adventures (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101dc000.ini b/bin/gameProfiles/default/00050000101dc000.ini index b4b8717c..8734af28 100644 --- a/bin/gameProfiles/default/00050000101dc000.ini +++ b/bin/gameProfiles/default/00050000101dc000.ini @@ -1,4 +1 @@ -# Vektor Wars (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Vektor Wars (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101dc200.ini b/bin/gameProfiles/default/00050000101dc200.ini index 0cec8b6d..eebe3ac5 100644 --- a/bin/gameProfiles/default/00050000101dc200.ini +++ b/bin/gameProfiles/default/00050000101dc200.ini @@ -1,4 +1 @@ -# Vektor Wars (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Vektor Wars (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101dd000.ini b/bin/gameProfiles/default/00050000101dd000.ini index 8f275d8c..33caf468 100644 --- a/bin/gameProfiles/default/00050000101dd000.ini +++ b/bin/gameProfiles/default/00050000101dd000.ini @@ -1,4 +1 @@ -# Star Ghost (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Star Ghost (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101dd600.ini b/bin/gameProfiles/default/00050000101dd600.ini index 7b180555..1a17ec4b 100644 --- a/bin/gameProfiles/default/00050000101dd600.ini +++ b/bin/gameProfiles/default/00050000101dd600.ini @@ -1,4 +1 @@ -# BIT.TRIP Presents... Runner2: Future Legend of Rhythm Alien (JPN) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# BIT.TRIP Presents... Runner2: Future Legend of Rhythm Alien (JPN) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101dd700.ini b/bin/gameProfiles/default/00050000101dd700.ini index 57b3a897..fbefda0e 100644 --- a/bin/gameProfiles/default/00050000101dd700.ini +++ b/bin/gameProfiles/default/00050000101dd700.ini @@ -1,7 +1 @@ -# Runbow (JPN) - - - -[Graphics] - -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Runbow (JPN) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101ddf00.ini b/bin/gameProfiles/default/00050000101ddf00.ini index a3adf2fc..5c802484 100644 --- a/bin/gameProfiles/default/00050000101ddf00.ini +++ b/bin/gameProfiles/default/00050000101ddf00.ini @@ -1,4 +1 @@ -# Hive Jump (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Hive Jump (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101e0100.ini b/bin/gameProfiles/default/00050000101e0100.ini index 13f74180..738553d8 100644 --- a/bin/gameProfiles/default/00050000101e0100.ini +++ b/bin/gameProfiles/default/00050000101e0100.ini @@ -1,6 +1 @@ -# Minecraft: Story Mode (USA) - - - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Minecraft: Story Mode (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101e1800.ini b/bin/gameProfiles/default/00050000101e1800.ini index bf35ca87..a372c127 100644 --- a/bin/gameProfiles/default/00050000101e1800.ini +++ b/bin/gameProfiles/default/00050000101e1800.ini @@ -1,4 +1 @@ -# Human Resource Machine (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Human Resource Machine (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101e1a00.ini b/bin/gameProfiles/default/00050000101e1a00.ini index 23dca6a5..a90125c4 100644 --- a/bin/gameProfiles/default/00050000101e1a00.ini +++ b/bin/gameProfiles/default/00050000101e1a00.ini @@ -1,4 +1 @@ -# Human Resource Machine (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Human Resource Machine (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101e1b00.ini b/bin/gameProfiles/default/00050000101e1b00.ini index 5675969a..88e9b456 100644 --- a/bin/gameProfiles/default/00050000101e1b00.ini +++ b/bin/gameProfiles/default/00050000101e1b00.ini @@ -2,6 +2,3 @@ [CPU] cpuTimer = hostBased - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101e3800.ini b/bin/gameProfiles/default/00050000101e3800.ini index 4e6f4859..fdfa2cdf 100644 --- a/bin/gameProfiles/default/00050000101e3800.ini +++ b/bin/gameProfiles/default/00050000101e3800.ini @@ -1,7 +1 @@ -# Dual Core (USA) - -[General] -loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Dual Core (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101e4200.ini b/bin/gameProfiles/default/00050000101e4200.ini index 9e6a983b..ef94b291 100644 --- a/bin/gameProfiles/default/00050000101e4200.ini +++ b/bin/gameProfiles/default/00050000101e4200.ini @@ -6,6 +6,3 @@ useRDTSC = false [CPU] cpuTimer = cycleCounter cpuMode = Singlecore-Interpreter - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101e5300.ini b/bin/gameProfiles/default/00050000101e5300.ini index c6f6674a..c04cd6b6 100644 --- a/bin/gameProfiles/default/00050000101e5300.ini +++ b/bin/gameProfiles/default/00050000101e5300.ini @@ -6,6 +6,3 @@ useRDTSC = false [CPU] cpuMode = Singlecore-Recompiler cpuTimer = cycleCounter - -[Graphics] -GPUBufferCacheAccuracy = 1 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101e5400.ini b/bin/gameProfiles/default/00050000101e5400.ini index 299c9e40..221a23ed 100644 --- a/bin/gameProfiles/default/00050000101e5400.ini +++ b/bin/gameProfiles/default/00050000101e5400.ini @@ -6,6 +6,3 @@ useRDTSC = false [CPU] cpuMode = Singlecore-Recompiler cpuTimer = cycleCounter - -[Graphics] -GPUBufferCacheAccuracy = 1 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101e5e00.ini b/bin/gameProfiles/default/00050000101e5e00.ini index 2736143b..ab9fc348 100644 --- a/bin/gameProfiles/default/00050000101e5e00.ini +++ b/bin/gameProfiles/default/00050000101e5e00.ini @@ -1,4 +1 @@ -# Chasing Dead (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 +# Chasing Dead (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101e7300.ini b/bin/gameProfiles/default/00050000101e7300.ini index de2d9ace..092720b8 100644 --- a/bin/gameProfiles/default/00050000101e7300.ini +++ b/bin/gameProfiles/default/00050000101e7300.ini @@ -1,4 +1 @@ -# The Deer God (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# The Deer God (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101e7400.ini b/bin/gameProfiles/default/00050000101e7400.ini index 38a64064..e8644aa7 100644 --- a/bin/gameProfiles/default/00050000101e7400.ini +++ b/bin/gameProfiles/default/00050000101e7400.ini @@ -1,4 +1 @@ -# Grumpy Reaper (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Grumpy Reaper (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101e9300.ini b/bin/gameProfiles/default/00050000101e9300.ini index a88c29bc..24c0dad8 100644 --- a/bin/gameProfiles/default/00050000101e9300.ini +++ b/bin/gameProfiles/default/00050000101e9300.ini @@ -1,4 +1 @@ -# Gear Gauntlet (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Gear Gauntlet (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101e9400.ini b/bin/gameProfiles/default/00050000101e9400.ini index 6f914578..6b78201e 100644 --- a/bin/gameProfiles/default/00050000101e9400.ini +++ b/bin/gameProfiles/default/00050000101e9400.ini @@ -1,4 +1 @@ -# Gear Gauntlet (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Gear Gauntlet (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101eb300.ini b/bin/gameProfiles/default/00050000101eb300.ini index e5b62760..66c8dcb4 100644 --- a/bin/gameProfiles/default/00050000101eb300.ini +++ b/bin/gameProfiles/default/00050000101eb300.ini @@ -1,4 +1 @@ -# The Beggar's Ride (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# The Beggar's Ride (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101ec700.ini b/bin/gameProfiles/default/00050000101ec700.ini index 27e0f560..7dd85123 100644 --- a/bin/gameProfiles/default/00050000101ec700.ini +++ b/bin/gameProfiles/default/00050000101ec700.ini @@ -1,4 +1 @@ -# The Beggar's Ride (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# The Beggar's Ride (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101ecf00.ini b/bin/gameProfiles/default/00050000101ecf00.ini index 3191607c..e3b80181 100644 --- a/bin/gameProfiles/default/00050000101ecf00.ini +++ b/bin/gameProfiles/default/00050000101ecf00.ini @@ -1,4 +1 @@ -# Buddy & Me Dream Edition (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Buddy & Me Dream Edition (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101f1300.ini b/bin/gameProfiles/default/00050000101f1300.ini index 3b17b3a5..85683631 100644 --- a/bin/gameProfiles/default/00050000101f1300.ini +++ b/bin/gameProfiles/default/00050000101f1300.ini @@ -1,4 +1 @@ -# Armikrog (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Armikrog (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101f2800.ini b/bin/gameProfiles/default/00050000101f2800.ini index 65c08d62..a8d455dd 100644 --- a/bin/gameProfiles/default/00050000101f2800.ini +++ b/bin/gameProfiles/default/00050000101f2800.ini @@ -1,7 +1,4 @@ # 8Bit Hero (USA) -[Graphics] -GPUBufferCacheAccuracy = 0 - [CPU] cpuMode = Singlecore-Interpreter \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101f4a00.ini b/bin/gameProfiles/default/00050000101f4a00.ini index 2d9432da..e0ca60b7 100644 --- a/bin/gameProfiles/default/00050000101f4a00.ini +++ b/bin/gameProfiles/default/00050000101f4a00.ini @@ -1,4 +1 @@ -# Buddy & Me Dream Edition (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Buddy & Me Dream Edition (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101f5700.ini b/bin/gameProfiles/default/00050000101f5700.ini index a2d5119f..c0c98988 100644 --- a/bin/gameProfiles/default/00050000101f5700.ini +++ b/bin/gameProfiles/default/00050000101f5700.ini @@ -1,4 +1 @@ -# Jotun Valhalla Edition (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Jotun Valhalla Edition (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101f6f00.ini b/bin/gameProfiles/default/00050000101f6f00.ini index 92bf8b9e..4a590788 100644 --- a/bin/gameProfiles/default/00050000101f6f00.ini +++ b/bin/gameProfiles/default/00050000101f6f00.ini @@ -1,4 +1 @@ -# Jotun Valhalla Edition (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Jotun Valhalla Edition (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101f7600.ini b/bin/gameProfiles/default/00050000101f7600.ini index 88e55d2a..bbe32dff 100644 --- a/bin/gameProfiles/default/00050000101f7600.ini +++ b/bin/gameProfiles/default/00050000101f7600.ini @@ -1,7 +1 @@ -# Dual Core (EUR) - -[General] -loadSharedLibraries = false - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Dual Core (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101f9700.ini b/bin/gameProfiles/default/00050000101f9700.ini index 34c46581..a36f67a1 100644 --- a/bin/gameProfiles/default/00050000101f9700.ini +++ b/bin/gameProfiles/default/00050000101f9700.ini @@ -1,4 +1 @@ -# Darksiders Warmastered Edition (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Darksiders Warmastered Edition (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101fa600.ini b/bin/gameProfiles/default/00050000101fa600.ini index 89ab8ade..653531f9 100644 --- a/bin/gameProfiles/default/00050000101fa600.ini +++ b/bin/gameProfiles/default/00050000101fa600.ini @@ -1,4 +1 @@ -# Darksiders Warmastered Edition (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Darksiders Warmastered Edition (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101fd100.ini b/bin/gameProfiles/default/00050000101fd100.ini index 6ce5a608..a0f02db2 100644 --- a/bin/gameProfiles/default/00050000101fd100.ini +++ b/bin/gameProfiles/default/00050000101fd100.ini @@ -1,4 +1 @@ -# Grumpy Reaper (JPN) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Grumpy Reaper (JPN) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101ff200.ini b/bin/gameProfiles/default/00050000101ff200.ini index 54e13b65..13b5470b 100644 --- a/bin/gameProfiles/default/00050000101ff200.ini +++ b/bin/gameProfiles/default/00050000101ff200.ini @@ -1,4 +1 @@ -# Exile's End (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Exile's End (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101ffc00.ini b/bin/gameProfiles/default/00050000101ffc00.ini index e01c407c..4d6850ae 100644 --- a/bin/gameProfiles/default/00050000101ffc00.ini +++ b/bin/gameProfiles/default/00050000101ffc00.ini @@ -1,5 +1,4 @@ # Ghost Blade HD (USA) [Graphics] -GPUBufferCacheAccuracy = 0 streamoutBufferCacheSize = 48 \ No newline at end of file diff --git a/bin/gameProfiles/default/00050000101ffe00.ini b/bin/gameProfiles/default/00050000101ffe00.ini index 9225f393..123cc4b9 100644 --- a/bin/gameProfiles/default/00050000101ffe00.ini +++ b/bin/gameProfiles/default/00050000101ffe00.ini @@ -1,4 +1 @@ -# Tetrimos (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Tetrimos (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010200300.ini b/bin/gameProfiles/default/0005000010200300.ini index 47600923..b62d3c32 100644 --- a/bin/gameProfiles/default/0005000010200300.ini +++ b/bin/gameProfiles/default/0005000010200300.ini @@ -1,4 +1 @@ -# Armikrog (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Armikrog (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010200b00.ini b/bin/gameProfiles/default/0005000010200b00.ini index 8fc9da4b..7a6469b5 100644 --- a/bin/gameProfiles/default/0005000010200b00.ini +++ b/bin/gameProfiles/default/0005000010200b00.ini @@ -1,4 +1 @@ -# Tetrimos (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Tetrimos (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010204a00.ini b/bin/gameProfiles/default/0005000010204a00.ini index 8a0285c3..e0d9db5a 100644 --- a/bin/gameProfiles/default/0005000010204a00.ini +++ b/bin/gameProfiles/default/0005000010204a00.ini @@ -1,4 +1 @@ -# Exile's End (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Exile's End (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010207300.ini b/bin/gameProfiles/default/0005000010207300.ini index c678e7d6..56d9a42f 100644 --- a/bin/gameProfiles/default/0005000010207300.ini +++ b/bin/gameProfiles/default/0005000010207300.ini @@ -1,4 +1 @@ -# Koi DX (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Koi DX (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010207500.ini b/bin/gameProfiles/default/0005000010207500.ini index 090be3f2..1a4ab56d 100644 --- a/bin/gameProfiles/default/0005000010207500.ini +++ b/bin/gameProfiles/default/0005000010207500.ini @@ -1,4 +1 @@ -# Koi DX (USA) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Koi DX (USA) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001020a200.ini b/bin/gameProfiles/default/000500001020a200.ini index bef0c82e..6960a967 100644 --- a/bin/gameProfiles/default/000500001020a200.ini +++ b/bin/gameProfiles/default/000500001020a200.ini @@ -1,5 +1 @@ -# Minecraft: Story Mode (EUR) - - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Minecraft: Story Mode (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/000500001020b600.ini b/bin/gameProfiles/default/000500001020b600.ini index 74528c38..46e4c8da 100644 --- a/bin/gameProfiles/default/000500001020b600.ini +++ b/bin/gameProfiles/default/000500001020b600.ini @@ -1,5 +1,4 @@ # Ghost Blade HD (EUR) [Graphics] -GPUBufferCacheAccuracy = 0 streamoutBufferCacheSize = 48 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000010211b00.ini b/bin/gameProfiles/default/0005000010211b00.ini index 69534194..7e82f408 100644 --- a/bin/gameProfiles/default/0005000010211b00.ini +++ b/bin/gameProfiles/default/0005000010211b00.ini @@ -1,4 +1 @@ -# Sphere Slice (EUR) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# Sphere Slice (EUR) \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000C1012BC00.ini b/bin/gameProfiles/default/0005000C1012BC00.ini index 5c2c09d4..5f699eb6 100644 --- a/bin/gameProfiles/default/0005000C1012BC00.ini +++ b/bin/gameProfiles/default/0005000C1012BC00.ini @@ -1,8 +1,4 @@ # Pikmin 3 (JAP) - - [Graphics] - extendedTextureReadback = true -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000C1012BD00.ini b/bin/gameProfiles/default/0005000C1012BD00.ini index 8ab10175..cd27ff77 100644 --- a/bin/gameProfiles/default/0005000C1012BD00.ini +++ b/bin/gameProfiles/default/0005000C1012BD00.ini @@ -1,8 +1,4 @@ # Pikmin 3 (USA) - - [Graphics] - extendedTextureReadback = true -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000C1012BE00.ini b/bin/gameProfiles/default/0005000C1012BE00.ini index e7226ef4..6be6a929 100644 --- a/bin/gameProfiles/default/0005000C1012BE00.ini +++ b/bin/gameProfiles/default/0005000C1012BE00.ini @@ -1,8 +1,4 @@ # Pikmin 3 (EU) - - [Graphics] - extendedTextureReadback = true -GPUBufferCacheAccuracy = 2 \ No newline at end of file diff --git a/bin/gameProfiles/default/0005000e1019c800.ini b/bin/gameProfiles/default/0005000e1019c800.ini index 8c47898c..81adc4c9 100644 --- a/bin/gameProfiles/default/0005000e1019c800.ini +++ b/bin/gameProfiles/default/0005000e1019c800.ini @@ -1,4 +1 @@ -# TLoZ: Twilight Princess (JPN) - -[Graphics] -GPUBufferCacheAccuracy = 0 \ No newline at end of file +# TLoZ: Twilight Princess (JPN) \ No newline at end of file diff --git a/bin/resources/ar/‏‏cemu.mo b/bin/resources/ar/‏‏cemu.mo new file mode 100644 index 00000000..4062628b Binary files /dev/null and b/bin/resources/ar/‏‏cemu.mo differ diff --git a/bin/resources/ca/cemu.mo b/bin/resources/ca/cemu.mo index 7930c426..55640215 100644 Binary files a/bin/resources/ca/cemu.mo and b/bin/resources/ca/cemu.mo differ diff --git a/bin/resources/de/cemu.mo b/bin/resources/de/cemu.mo index 47d6a964..cd9edd3c 100644 Binary files a/bin/resources/de/cemu.mo and b/bin/resources/de/cemu.mo differ diff --git a/bin/resources/es/cemu.mo b/bin/resources/es/cemu.mo index 93d8f7e5..a43d4a1d 100644 Binary files a/bin/resources/es/cemu.mo and b/bin/resources/es/cemu.mo differ diff --git a/bin/resources/fr/cemu.mo b/bin/resources/fr/cemu.mo index 09bcd9f2..f3f3b498 100644 Binary files a/bin/resources/fr/cemu.mo and b/bin/resources/fr/cemu.mo differ diff --git a/bin/resources/he/cemu.mo b/bin/resources/he/cemu.mo new file mode 100644 index 00000000..072e48c6 Binary files /dev/null and b/bin/resources/he/cemu.mo differ diff --git a/bin/resources/hu/cemu.mo b/bin/resources/hu/cemu.mo index c66dd9d7..51b00d08 100644 Binary files a/bin/resources/hu/cemu.mo and b/bin/resources/hu/cemu.mo differ diff --git a/bin/resources/it/cemu.mo b/bin/resources/it/cemu.mo index a417f089..20d5bb93 100644 Binary files a/bin/resources/it/cemu.mo and b/bin/resources/it/cemu.mo differ diff --git a/bin/resources/ja/cemu.mo b/bin/resources/ja/cemu.mo index 5044fc61..ff2fadd8 100644 Binary files a/bin/resources/ja/cemu.mo and b/bin/resources/ja/cemu.mo differ diff --git a/bin/resources/ko/cemu.mo b/bin/resources/ko/cemu.mo index 27e63d6e..5ea5e1da 100644 Binary files a/bin/resources/ko/cemu.mo and b/bin/resources/ko/cemu.mo differ diff --git a/bin/resources/nb/cemu.mo b/bin/resources/nb/cemu.mo index 6b87f8c0..21149d82 100644 Binary files a/bin/resources/nb/cemu.mo and b/bin/resources/nb/cemu.mo differ diff --git a/bin/resources/nl/cemu.mo b/bin/resources/nl/cemu.mo index 5cbe532f..f9fcc226 100644 Binary files a/bin/resources/nl/cemu.mo and b/bin/resources/nl/cemu.mo differ diff --git a/bin/resources/pl/cemu.mo b/bin/resources/pl/cemu.mo index 4ff064f9..2d86b139 100644 Binary files a/bin/resources/pl/cemu.mo and b/bin/resources/pl/cemu.mo differ diff --git a/bin/resources/pt/cemu.mo b/bin/resources/pt/cemu.mo index 3b79163c..8ff9b167 100644 Binary files a/bin/resources/pt/cemu.mo and b/bin/resources/pt/cemu.mo differ diff --git a/bin/resources/ru/cemu.mo b/bin/resources/ru/cemu.mo index 46583b20..eb8f372f 100644 Binary files a/bin/resources/ru/cemu.mo and b/bin/resources/ru/cemu.mo differ diff --git a/bin/resources/sv/cemu.mo b/bin/resources/sv/cemu.mo index 13a4e827..c8fd68ee 100644 Binary files a/bin/resources/sv/cemu.mo and b/bin/resources/sv/cemu.mo differ diff --git a/bin/resources/tr/cemu.mo b/bin/resources/tr/cemu.mo index 0a4c3c63..5b9b47ba 100644 Binary files a/bin/resources/tr/cemu.mo and b/bin/resources/tr/cemu.mo differ diff --git a/bin/resources/uk/cemu.mo b/bin/resources/uk/cemu.mo new file mode 100644 index 00000000..c8f6d461 Binary files /dev/null and b/bin/resources/uk/cemu.mo differ diff --git a/bin/resources/zh/cemu.mo b/bin/resources/zh/cemu.mo index a07eb370..3e636971 100644 Binary files a/bin/resources/zh/cemu.mo and b/bin/resources/zh/cemu.mo differ diff --git a/bin/shaderCache/info.txt b/bin/shaderCache/info.txt deleted file mode 100644 index 962cf88b..00000000 --- a/bin/shaderCache/info.txt +++ /dev/null @@ -1 +0,0 @@ -If you plan to transfer the shader cache to a different PC or Cemu installation you only need to copy the 'transferable' directory. \ No newline at end of file diff --git a/boost.natvis b/boost.natvis new file mode 100644 index 00000000..2781a585 --- /dev/null +++ b/boost.natvis @@ -0,0 +1,26 @@ + + + + + + m_holder.m_size + + m_holder.m_size + m_holder.m_start + + + + + + {{ size={m_holder.m_size} }} + + m_holder.m_size + static_capacity + + m_holder.m_size + ($T1*)m_holder.storage.data + + + + + diff --git a/cmake/ECMFindModuleHelpers.cmake b/cmake/ECMFindModuleHelpers.cmake new file mode 100644 index 00000000..a4837403 --- /dev/null +++ b/cmake/ECMFindModuleHelpers.cmake @@ -0,0 +1,279 @@ +# SPDX-FileCopyrightText: 2014 Alex Merry +# +# SPDX-License-Identifier: BSD-3-Clause + +#[=======================================================================[.rst: +ECMFindModuleHelpers +-------------------- + +Helper macros for find modules: ``ecm_find_package_version_check()``, +``ecm_find_package_parse_components()`` and +``ecm_find_package_handle_library_components()``. + +:: + + ecm_find_package_version_check() + +Prints warnings if the CMake version or the project's required CMake version +is older than that required by extra-cmake-modules. + +:: + + ecm_find_package_parse_components( + RESULT_VAR + KNOWN_COMPONENTS [ [...]] + [SKIP_DEPENDENCY_HANDLING]) + +This macro will populate with a list of components found in +_FIND_COMPONENTS, after checking that all those components are in the +list of ``KNOWN_COMPONENTS``; if there are any unknown components, it will print +an error or warning (depending on the value of _FIND_REQUIRED) and call +``return()``. + +The order of components in is guaranteed to match the order they +are listed in the ``KNOWN_COMPONENTS`` argument. + +If ``SKIP_DEPENDENCY_HANDLING`` is not set, for each component the variable +__component_deps will be checked for dependent components. +If is listed in _FIND_COMPONENTS, then all its (transitive) +dependencies will also be added to . + +:: + + ecm_find_package_handle_library_components( + COMPONENTS [ [...]] + [SKIP_DEPENDENCY_HANDLING]) + [SKIP_PKG_CONFIG]) + +Creates an imported library target for each component. The operation of this +macro depends on the presence of a number of CMake variables. + +The __lib variable should contain the name of this library, +and __header variable should contain the name of a header +file associated with it (whatever relative path is normally passed to +'#include'). __header_subdir variable can be used to specify +which subdirectory of the include path the headers will be found in. +``ecm_find_package_components()`` will then search for the library +and include directory (creating appropriate cache variables) and create an +imported library target named ::. + +Additional variables can be used to provide additional information: + +If ``SKIP_PKG_CONFIG``, the __pkg_config variable is set, and +pkg-config is found, the pkg-config module given by +__pkg_config will be searched for and used to help locate the +library and header file. It will also be used to set +__VERSION. + +Note that if version information is found via pkg-config, +__FIND_VERSION can be set to require a particular version +for each component. + +If ``SKIP_DEPENDENCY_HANDLING`` is not set, the ``INTERFACE_LINK_LIBRARIES`` property +of the imported target for will be set to contain the imported +targets for the components listed in __component_deps. +_FOUND will also be set to ``FALSE`` if any of the components in +__component_deps are not found. This requires the components +in __component_deps to be listed before in the +``COMPONENTS`` argument. + +The following variables will be set: + +``_TARGETS`` + the imported targets +``_LIBRARIES`` + the found libraries +``_INCLUDE_DIRS`` + the combined required include directories for the components +``_DEFINITIONS`` + the "other" CFLAGS provided by pkg-config, if any +``_VERSION`` + the value of ``__VERSION`` for the first component that + has this variable set (note that components are searched for in the order + they are passed to the macro), although if it is already set, it will not + be altered + +.. note:: + These variables are never cleared, so if + ``ecm_find_package_handle_library_components()`` is called multiple times with + different components (typically because of multiple ``find_package()`` calls) then + ``_TARGETS``, for example, will contain all the targets found in any + call (although no duplicates). + +Since pre-1.0.0. +#]=======================================================================] + +include(CMakeParseArguments) + +macro(ecm_find_package_version_check module_name) + if(CMAKE_VERSION VERSION_LESS 3.16.0) + message(FATAL_ERROR "CMake 3.16.0 is required by Find${module_name}.cmake") + endif() + if(CMAKE_MINIMUM_REQUIRED_VERSION VERSION_LESS 3.16.0) + message(AUTHOR_WARNING "Your project should require at least CMake 3.16.0 to use Find${module_name}.cmake") + endif() +endmacro() + +macro(ecm_find_package_parse_components module_name) + set(ecm_fppc_options SKIP_DEPENDENCY_HANDLING) + set(ecm_fppc_oneValueArgs RESULT_VAR) + set(ecm_fppc_multiValueArgs KNOWN_COMPONENTS DEFAULT_COMPONENTS) + cmake_parse_arguments(ECM_FPPC "${ecm_fppc_options}" "${ecm_fppc_oneValueArgs}" "${ecm_fppc_multiValueArgs}" ${ARGN}) + + if(ECM_FPPC_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Unexpected arguments to ecm_find_package_parse_components: ${ECM_FPPC_UNPARSED_ARGUMENTS}") + endif() + if(NOT ECM_FPPC_RESULT_VAR) + message(FATAL_ERROR "Missing RESULT_VAR argument to ecm_find_package_parse_components") + endif() + if(NOT ECM_FPPC_KNOWN_COMPONENTS) + message(FATAL_ERROR "Missing KNOWN_COMPONENTS argument to ecm_find_package_parse_components") + endif() + if(NOT ECM_FPPC_DEFAULT_COMPONENTS) + set(ECM_FPPC_DEFAULT_COMPONENTS ${ECM_FPPC_KNOWN_COMPONENTS}) + endif() + + if(${module_name}_FIND_COMPONENTS) + set(ecm_fppc_requestedComps ${${module_name}_FIND_COMPONENTS}) + + if(NOT ECM_FPPC_SKIP_DEPENDENCY_HANDLING) + # Make sure deps are included + foreach(ecm_fppc_comp ${ecm_fppc_requestedComps}) + foreach(ecm_fppc_dep_comp ${${module_name}_${ecm_fppc_comp}_component_deps}) + list(FIND ecm_fppc_requestedComps "${ecm_fppc_dep_comp}" ecm_fppc_index) + if("${ecm_fppc_index}" STREQUAL "-1") + if(NOT ${module_name}_FIND_QUIETLY) + message(STATUS "${module_name}: ${ecm_fppc_comp} requires ${${module_name}_${ecm_fppc_comp}_component_deps}") + endif() + list(APPEND ecm_fppc_requestedComps "${ecm_fppc_dep_comp}") + endif() + endforeach() + endforeach() + else() + message(STATUS "Skipping dependency handling for ${module_name}") + endif() + list(REMOVE_DUPLICATES ecm_fppc_requestedComps) + + # This makes sure components are listed in the same order as + # KNOWN_COMPONENTS (potentially important for inter-dependencies) + set(${ECM_FPPC_RESULT_VAR}) + foreach(ecm_fppc_comp ${ECM_FPPC_KNOWN_COMPONENTS}) + list(FIND ecm_fppc_requestedComps "${ecm_fppc_comp}" ecm_fppc_index) + if(NOT "${ecm_fppc_index}" STREQUAL "-1") + list(APPEND ${ECM_FPPC_RESULT_VAR} "${ecm_fppc_comp}") + list(REMOVE_AT ecm_fppc_requestedComps ${ecm_fppc_index}) + endif() + endforeach() + # if there are any left, they are unknown components + if(ecm_fppc_requestedComps) + set(ecm_fppc_msgType STATUS) + if(${module_name}_FIND_REQUIRED) + set(ecm_fppc_msgType FATAL_ERROR) + endif() + if(NOT ${module_name}_FIND_QUIETLY) + message(${ecm_fppc_msgType} "${module_name}: requested unknown components ${ecm_fppc_requestedComps}") + endif() + return() + endif() + else() + set(${ECM_FPPC_RESULT_VAR} ${ECM_FPPC_DEFAULT_COMPONENTS}) + endif() +endmacro() + +macro(ecm_find_package_handle_library_components module_name) + set(ecm_fpwc_options SKIP_PKG_CONFIG SKIP_DEPENDENCY_HANDLING) + set(ecm_fpwc_oneValueArgs) + set(ecm_fpwc_multiValueArgs COMPONENTS) + cmake_parse_arguments(ECM_FPWC "${ecm_fpwc_options}" "${ecm_fpwc_oneValueArgs}" "${ecm_fpwc_multiValueArgs}" ${ARGN}) + + if(ECM_FPWC_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Unexpected arguments to ecm_find_package_handle_components: ${ECM_FPWC_UNPARSED_ARGUMENTS}") + endif() + if(NOT ECM_FPWC_COMPONENTS) + message(FATAL_ERROR "Missing COMPONENTS argument to ecm_find_package_handle_components") + endif() + + include(FindPackageHandleStandardArgs) + find_package(PkgConfig QUIET) + foreach(ecm_fpwc_comp ${ECM_FPWC_COMPONENTS}) + set(ecm_fpwc_dep_vars) + set(ecm_fpwc_dep_targets) + if(NOT SKIP_DEPENDENCY_HANDLING) + foreach(ecm_fpwc_dep ${${module_name}_${ecm_fpwc_comp}_component_deps}) + list(APPEND ecm_fpwc_dep_vars "${module_name}_${ecm_fpwc_dep}_FOUND") + list(APPEND ecm_fpwc_dep_targets "${module_name}::${ecm_fpwc_dep}") + endforeach() + endif() + + if(NOT ECM_FPWC_SKIP_PKG_CONFIG AND ${module_name}_${ecm_fpwc_comp}_pkg_config) + pkg_check_modules(PKG_${module_name}_${ecm_fpwc_comp} QUIET + ${${module_name}_${ecm_fpwc_comp}_pkg_config}) + endif() + + find_path(${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR + NAMES ${${module_name}_${ecm_fpwc_comp}_header} + HINTS ${PKG_${module_name}_${ecm_fpwc_comp}_INCLUDE_DIRS} + PATH_SUFFIXES ${${module_name}_${ecm_fpwc_comp}_header_subdir} + ) + find_library(${module_name}_${ecm_fpwc_comp}_LIBRARY + NAMES ${${module_name}_${ecm_fpwc_comp}_lib} + HINTS ${PKG_${module_name}_${ecm_fpwc_comp}_LIBRARY_DIRS} + ) + + set(${module_name}_${ecm_fpwc_comp}_VERSION "${PKG_${module_name}_${ecm_fpwc_comp}_VERSION}") + if(NOT ${module_name}_VERSION) + set(${module_name}_VERSION ${${module_name}_${ecm_fpwc_comp}_VERSION}) + endif() + + set(FPHSA_NAME_MISMATCHED 1) + find_package_handle_standard_args(${module_name}_${ecm_fpwc_comp} + FOUND_VAR + ${module_name}_${ecm_fpwc_comp}_FOUND + REQUIRED_VARS + ${module_name}_${ecm_fpwc_comp}_LIBRARY + ${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR + ${ecm_fpwc_dep_vars} + VERSION_VAR + ${module_name}_${ecm_fpwc_comp}_VERSION + ) + unset(FPHSA_NAME_MISMATCHED) + + mark_as_advanced( + ${module_name}_${ecm_fpwc_comp}_LIBRARY + ${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR + ) + + if(${module_name}_${ecm_fpwc_comp}_FOUND) + list(APPEND ${module_name}_LIBRARIES + "${${module_name}_${ecm_fpwc_comp}_LIBRARY}") + list(APPEND ${module_name}_INCLUDE_DIRS + "${${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR}") + set(${module_name}_DEFINITIONS + ${${module_name}_DEFINITIONS} + ${PKG_${module_name}_${ecm_fpwc_comp}_DEFINITIONS}) + if(NOT TARGET ${module_name}::${ecm_fpwc_comp}) + add_library(${module_name}::${ecm_fpwc_comp} UNKNOWN IMPORTED) + set_target_properties(${module_name}::${ecm_fpwc_comp} PROPERTIES + IMPORTED_LOCATION "${${module_name}_${ecm_fpwc_comp}_LIBRARY}" + INTERFACE_COMPILE_OPTIONS "${PKG_${module_name}_${ecm_fpwc_comp}_DEFINITIONS}" + INTERFACE_INCLUDE_DIRECTORIES "${${module_name}_${ecm_fpwc_comp}_INCLUDE_DIR}" + INTERFACE_LINK_LIBRARIES "${ecm_fpwc_dep_targets}" + ) + endif() + list(APPEND ${module_name}_TARGETS + "${module_name}::${ecm_fpwc_comp}") + endif() + endforeach() + if(${module_name}_LIBRARIES) + list(REMOVE_DUPLICATES ${module_name}_LIBRARIES) + endif() + if(${module_name}_INCLUDE_DIRS) + list(REMOVE_DUPLICATES ${module_name}_INCLUDE_DIRS) + endif() + if(${module_name}_DEFINITIONS) + list(REMOVE_DUPLICATES ${module_name}_DEFINITIONS) + endif() + if(${module_name}_TARGETS) + list(REMOVE_DUPLICATES ${module_name}_TARGETS) + endif() +endmacro() diff --git a/cmake/ECMFindModuleHelpersStub.cmake b/cmake/ECMFindModuleHelpersStub.cmake new file mode 100644 index 00000000..acc8c804 --- /dev/null +++ b/cmake/ECMFindModuleHelpersStub.cmake @@ -0,0 +1 @@ +include(${CMAKE_CURRENT_LIST_DIR}/ECMFindModuleHelpers.cmake) diff --git a/cmake/FindGTK3.cmake b/cmake/FindGTK3.cmake new file mode 100644 index 00000000..06f6c18e --- /dev/null +++ b/cmake/FindGTK3.cmake @@ -0,0 +1,16 @@ +# SPDX-FileCopyrightText: 2022 Andrea Pappacoda +# SPDX-License-Identifier: ISC + +include(FindPackageHandleStandardArgs) + +find_package(PkgConfig) +if (PKG_CONFIG_FOUND) + pkg_search_module(GTK3 IMPORTED_TARGET gtk+-3.0) + if (GTK3_FOUND) + add_library(GTK3::gtk ALIAS PkgConfig::GTK3) + endif() + find_package_handle_standard_args(GTK3 + REQUIRED_VARS GTK3_LINK_LIBRARIES + VERSION_VAR GTK3_VERSION + ) +endif() diff --git a/cmake/FindWayland.cmake b/cmake/FindWayland.cmake new file mode 100644 index 00000000..926fd485 --- /dev/null +++ b/cmake/FindWayland.cmake @@ -0,0 +1,137 @@ +# SPDX-FileCopyrightText: 2014 Alex Merry +# SPDX-FileCopyrightText: 2014 Martin Gräßlin +# +# SPDX-License-Identifier: BSD-3-Clause + +#[=======================================================================[.rst: +FindWayland +----------- + +Try to find Wayland. + +This is a component-based find module, which makes use of the COMPONENTS +and OPTIONAL_COMPONENTS arguments to find_module. The following components +are available:: + + Client Server Cursor Egl + +If no components are specified, this module will act as though all components +were passed to OPTIONAL_COMPONENTS. + +This module will define the following variables, independently of the +components searched for or found: + +``Wayland_FOUND`` + TRUE if (the requested version of) Wayland is available +``Wayland_VERSION`` + Found Wayland version +``Wayland_TARGETS`` + A list of all targets imported by this module (note that there may be more + than the components that were requested) +``Wayland_LIBRARIES`` + This can be passed to target_link_libraries() instead of the imported + targets +``Wayland_INCLUDE_DIRS`` + This should be passed to target_include_directories() if the targets are + not used for linking +``Wayland_DEFINITIONS`` + This should be passed to target_compile_options() if the targets are not + used for linking +``Wayland_DATADIR`` + The core wayland protocols data directory + Since 5.73.0 + +For each searched-for components, ``Wayland__FOUND`` will be set to +TRUE if the corresponding Wayland library was found, and FALSE otherwise. If +``Wayland__FOUND`` is TRUE, the imported target +``Wayland::`` will be defined. This module will also attempt to +determine ``Wayland_*_VERSION`` variables for each imported target, although +``Wayland_VERSION`` should normally be sufficient. + +In general we recommend using the imported targets, as they are easier to use +and provide more control. Bear in mind, however, that if any target is in the +link interface of an exported library, it must be made available by the +package config file. + +Since pre-1.0.0. +#]=======================================================================] + +include(${CMAKE_CURRENT_LIST_DIR}/ECMFindModuleHelpersStub.cmake) + +ecm_find_package_version_check(Wayland) + +set(Wayland_known_components + Client + Server + Cursor + Egl +) +foreach(_comp ${Wayland_known_components}) + string(TOLOWER "${_comp}" _lc_comp) + set(Wayland_${_comp}_component_deps) + set(Wayland_${_comp}_pkg_config "wayland-${_lc_comp}") + set(Wayland_${_comp}_lib "wayland-${_lc_comp}") + set(Wayland_${_comp}_header "wayland-${_lc_comp}.h") +endforeach() +set(Wayland_Egl_component_deps Client) + +ecm_find_package_parse_components(Wayland + RESULT_VAR Wayland_components + KNOWN_COMPONENTS ${Wayland_known_components} +) +ecm_find_package_handle_library_components(Wayland + COMPONENTS ${Wayland_components} +) + +# If pkg-config didn't provide us with version information, +# try to extract it from wayland-version.h +# (Note that the version from wayland-egl.pc will probably be +# the Mesa version, rather than the Wayland version, but that +# version will be ignored as we always find wayland-client.pc +# first). +if(NOT Wayland_VERSION) + find_file(Wayland_VERSION_HEADER + NAMES wayland-version.h + HINTS ${Wayland_INCLUDE_DIRS} + ) + mark_as_advanced(Wayland_VERSION_HEADER) + if(Wayland_VERSION_HEADER) + file(READ ${Wayland_VERSION_HEADER} _wayland_version_header_contents) + string(REGEX REPLACE + "^.*[ \t]+WAYLAND_VERSION[ \t]+\"([0-9.]*)\".*$" + "\\1" + Wayland_VERSION + "${_wayland_version_header_contents}" + ) + unset(_wayland_version_header_contents) + endif() +endif() + +find_package_handle_standard_args(Wayland + FOUND_VAR + Wayland_FOUND + REQUIRED_VARS + Wayland_LIBRARIES + VERSION_VAR + Wayland_VERSION + HANDLE_COMPONENTS +) + +pkg_get_variable(Wayland_DATADIR wayland-scanner pkgdatadir) +if (CMAKE_CROSSCOMPILING AND (NOT EXISTS "${Wayland_DATADIR}/wayland.xml")) + # PKG_CONFIG_SYSROOT_DIR only applies to -I and -L flags, so pkg-config + # does not prepend CMAKE_SYSROOT when cross-compiling unless you pass + # --define-prefix explicitly. Therefore we have to manually do prepend + # it here when cross-compiling. + # See https://gitlab.kitware.com/cmake/cmake/-/issues/16647#note_844761 + set(Wayland_DATADIR ${CMAKE_SYSROOT}${Wayland_DATADIR}) +endif() +if (NOT EXISTS "${Wayland_DATADIR}/wayland.xml") + message(WARNING "Could not find wayland.xml in ${Wayland_DATADIR}") +endif() + +include(FeatureSummary) +set_package_properties(Wayland PROPERTIES + URL "https://wayland.freedesktop.org/" + DESCRIPTION "C library implementation of the Wayland protocol: a protocol for a compositor to talk to its clients" +) diff --git a/cmake/FindWaylandProtocols.cmake b/cmake/FindWaylandProtocols.cmake new file mode 100644 index 00000000..a4449885 --- /dev/null +++ b/cmake/FindWaylandProtocols.cmake @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: 2019 Vlad Zahorodnii +# +# SPDX-License-Identifier: BSD-3-Clause + +#[=======================================================================[.rst: +FindWaylandProtocols +-------------------- + +Try to find wayland-protocols on a Unix system. + +This will define the following variables: + +``WaylandProtocols_FOUND`` + True if (the requested version of) wayland-protocols is available +``WaylandProtocols_VERSION`` + The version of wayland-protocols +``WaylandProtocols_DATADIR`` + The wayland protocols data directory +#]=======================================================================] + +find_package(PkgConfig QUIET) +pkg_check_modules(PKG_wayland_protocols QUIET wayland-protocols) + +set(WaylandProtocols_VERSION ${PKG_wayland_protocols_VERSION}) +pkg_get_variable(WaylandProtocols_DATADIR wayland-protocols pkgdatadir) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(WaylandProtocols + FOUND_VAR WaylandProtocols_FOUND + REQUIRED_VARS WaylandProtocols_DATADIR + VERSION_VAR WaylandProtocols_VERSION +) + +include(FeatureSummary) +set_package_properties(WaylandProtocols PROPERTIES + DESCRIPTION "Specifications of extended Wayland protocols" + URL "https://wayland.freedesktop.org/" +) diff --git a/cmake/FindWaylandScanner.cmake b/cmake/FindWaylandScanner.cmake new file mode 100644 index 00000000..f8495800 --- /dev/null +++ b/cmake/FindWaylandScanner.cmake @@ -0,0 +1,162 @@ +# SPDX-FileCopyrightText: 2012-2014 Pier Luigi Fiorini +# +# SPDX-License-Identifier: BSD-3-Clause + +#[=======================================================================[.rst: +FindWaylandScanner +------------------ + +Try to find wayland-scanner. + +If the wayland-scanner executable is not in your PATH, you can provide +an alternative name or full path location with the ``WaylandScanner_EXECUTABLE`` +variable. + +This will define the following variables: + +``WaylandScanner_FOUND`` + True if wayland-scanner is available. + +``WaylandScanner_EXECUTABLE`` + The wayland-scanner executable. + +If ``WaylandScanner_FOUND`` is TRUE, it will also define the following imported +target: + +``Wayland::Scanner`` + The wayland-scanner executable. + +This module provides the following functions to generate C protocol +implementations: + + - ``ecm_add_wayland_client_protocol`` + - ``ecm_add_wayland_server_protocol`` + +:: + + ecm_add_wayland_client_protocol( + PROTOCOL + BASENAME ) + + ecm_add_wayland_client_protocol( + PROTOCOL + BASENAME ) + +Generate Wayland client protocol files from ```` XML +definition for the ```` interface and append those files +to ```` or ````. + +:: + + ecm_add_wayland_server_protocol( + PROTOCOL + BASENAME ) + + ecm_add_wayland_server_protocol( + PROTOCOL + BASENAME ) + +Generate Wayland server protocol files from ```` XML +definition for the ```` interface and append those files +to ```` or ````. + +Since 1.4.0. +#]=======================================================================] + +include(${CMAKE_CURRENT_LIST_DIR}/ECMFindModuleHelpersStub.cmake) + +ecm_find_package_version_check(WaylandScanner) + +# Find wayland-scanner +find_program(WaylandScanner_EXECUTABLE NAMES wayland-scanner) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(WaylandScanner + FOUND_VAR + WaylandScanner_FOUND + REQUIRED_VARS + WaylandScanner_EXECUTABLE +) + +mark_as_advanced(WaylandScanner_EXECUTABLE) + +if(NOT TARGET Wayland::Scanner AND WaylandScanner_FOUND) + add_executable(Wayland::Scanner IMPORTED) + set_target_properties(Wayland::Scanner PROPERTIES + IMPORTED_LOCATION "${WaylandScanner_EXECUTABLE}" + ) +endif() + +include(FeatureSummary) +set_package_properties(WaylandScanner PROPERTIES + URL "https://wayland.freedesktop.org/" + DESCRIPTION "Executable that converts XML protocol files to C code" +) + + +include(CMakeParseArguments) + +function(ecm_add_wayland_client_protocol target_or_sources_var) + # Parse arguments + set(oneValueArgs PROTOCOL BASENAME) + cmake_parse_arguments(ARGS "" "${oneValueArgs}" "" ${ARGN}) + + if(ARGS_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Unknown keywords given to ecm_add_wayland_client_protocol(): \"${ARGS_UNPARSED_ARGUMENTS}\"") + endif() + + get_filename_component(_infile ${ARGS_PROTOCOL} ABSOLUTE) + set(_client_header "${CMAKE_CURRENT_BINARY_DIR}/wayland-${ARGS_BASENAME}-client-protocol.h") + set(_code "${CMAKE_CURRENT_BINARY_DIR}/wayland-${ARGS_BASENAME}-protocol.c") + + set_source_files_properties(${_client_header} GENERATED) + set_source_files_properties(${_code} GENERATED) + set_property(SOURCE ${_client_header} ${_code} PROPERTY SKIP_AUTOMOC ON) + + add_custom_command(OUTPUT "${_client_header}" + COMMAND ${WaylandScanner_EXECUTABLE} client-header ${_infile} ${_client_header} + DEPENDS ${_infile} VERBATIM) + + add_custom_command(OUTPUT "${_code}" + COMMAND ${WaylandScanner_EXECUTABLE} public-code ${_infile} ${_code} + DEPENDS ${_infile} ${_client_header} VERBATIM) + + if (TARGET ${target_or_sources_var}) + target_sources(${target_or_sources_var} PRIVATE "${_client_header}" "${_code}") + else() + list(APPEND ${target_or_sources_var} "${_client_header}" "${_code}") + set(${target_or_sources_var} ${${target_or_sources_var}} PARENT_SCOPE) + endif() +endfunction() + + +function(ecm_add_wayland_server_protocol target_or_sources_var) + # Parse arguments + set(oneValueArgs PROTOCOL BASENAME) + cmake_parse_arguments(ARGS "" "${oneValueArgs}" "" ${ARGN}) + + if(ARGS_UNPARSED_ARGUMENTS) + message(FATAL_ERROR "Unknown keywords given to ecm_add_wayland_server_protocol(): \"${ARGS_UNPARSED_ARGUMENTS}\"") + endif() + + ecm_add_wayland_client_protocol(${target_or_sources_var} + PROTOCOL ${ARGS_PROTOCOL} + BASENAME ${ARGS_BASENAME}) + + get_filename_component(_infile ${ARGS_PROTOCOL} ABSOLUTE) + set(_server_header "${CMAKE_CURRENT_BINARY_DIR}/wayland-${ARGS_BASENAME}-server-protocol.h") + set(_server_code "${CMAKE_CURRENT_BINARY_DIR}/wayland-${ARGS_BASENAME}-protocol.c") + set_property(SOURCE ${_server_header} ${_server_code} PROPERTY SKIP_AUTOMOC ON) + set_source_files_properties(${_server_header} GENERATED) + + add_custom_command(OUTPUT "${_server_header}" + COMMAND ${WaylandScanner_EXECUTABLE} server-header ${_infile} ${_server_header} + DEPENDS ${_infile} VERBATIM) + + if (TARGET ${target_or_sources_var}) + target_sources(${target_or_sources_var} PRIVATE "${_server_header}") + else() + list(APPEND ${target_or_sources_var} "${_server_header}") + set(${target_or_sources_var} ${${target_or_sources_var}} PARENT_SCOPE) + endif() +endfunction() diff --git a/cmake/Findbluez.cmake b/cmake/Findbluez.cmake new file mode 100644 index 00000000..007cdac9 --- /dev/null +++ b/cmake/Findbluez.cmake @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: 2022 Andrea Pappacoda +# SPDX-License-Identifier: ISC + +find_package(bluez CONFIG) +if (NOT bluez_FOUND) + find_package(PkgConfig) + if (PKG_CONFIG_FOUND) + pkg_search_module(bluez IMPORTED_TARGET GLOBAL bluez-1.0 bluez) + if (bluez_FOUND) + add_library(bluez::bluez ALIAS PkgConfig::bluez) + endif () + endif () +endif () + +find_package_handle_standard_args(bluez + REQUIRED_VARS + bluez_LINK_LIBRARIES + bluez_FOUND + VERSION_VAR bluez_VERSION +) diff --git a/cmake/Findlibusb.cmake b/cmake/Findlibusb.cmake new file mode 100644 index 00000000..85da6736 --- /dev/null +++ b/cmake/Findlibusb.cmake @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: 2022 Andrea Pappacoda +# SPDX-License-Identifier: ISC + +find_package(libusb CONFIG) +if (NOT libusb_FOUND) + find_package(PkgConfig) + if (PKG_CONFIG_FOUND) + pkg_search_module(libusb IMPORTED_TARGET GLOBAL libusb-1.0 libusb) + if (libusb_FOUND) + add_library(libusb::libusb ALIAS PkgConfig::libusb) + endif () + endif () +endif () + +find_package_handle_standard_args(libusb + REQUIRED_VARS + libusb_LINK_LIBRARIES + libusb_FOUND + VERSION_VAR libusb_VERSION +) diff --git a/dependencies/Vulkan-Headers b/dependencies/Vulkan-Headers index 71567370..9b9fd871 160000 --- a/dependencies/Vulkan-Headers +++ b/dependencies/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 715673702f5b18ffb8e5832e67cf731468d32ac6 +Subproject commit 9b9fd871b08110cd8f0b74e721b03213d9cc3081 diff --git a/dependencies/ZArchive b/dependencies/ZArchive index 48914a07..d2c71773 160000 --- a/dependencies/ZArchive +++ b/dependencies/ZArchive @@ -1 +1 @@ -Subproject commit 48914a07df3c213333c580bb5e5bb3393442ca5b +Subproject commit d2c717730092c7bf8cbb033b12fd4001b7c4d932 diff --git a/dependencies/cubeb b/dependencies/cubeb index dc511c6b..2071354a 160000 --- a/dependencies/cubeb +++ b/dependencies/cubeb @@ -1 +1 @@ -Subproject commit dc511c6b3597b6384d28949285b9289e009830ea +Subproject commit 2071354a69aca7ed6df3b4222e305746c2113f60 diff --git a/dependencies/gamemode/CMakeLists.txt b/dependencies/gamemode/CMakeLists.txt new file mode 100644 index 00000000..78275174 --- /dev/null +++ b/dependencies/gamemode/CMakeLists.txt @@ -0,0 +1,4 @@ +project( gamemode LANGUAGES C ) +add_library (gamemode + "lib/gamemode_client.h" + "lib/client_loader.c") \ No newline at end of file diff --git a/dependencies/gamemode/lib/client_loader.c b/dependencies/gamemode/lib/client_loader.c new file mode 100644 index 00000000..08c86d06 --- /dev/null +++ b/dependencies/gamemode/lib/client_loader.c @@ -0,0 +1,35 @@ +/* + +Copyright (c) 2017-2019, Feral Interactive +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Feral Interactive nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + + */ + +// Simply include the header with GAMEMODE_AUTO set +// This will ensure it calls the functions when it's loaded +#define GAMEMODE_AUTO +#include "gamemode_client.h" diff --git a/dependencies/gamemode/lib/gamemode_client.h b/dependencies/gamemode/lib/gamemode_client.h new file mode 100644 index 00000000..b9f64fe4 --- /dev/null +++ b/dependencies/gamemode/lib/gamemode_client.h @@ -0,0 +1,376 @@ +/* + +Copyright (c) 2017-2019, Feral Interactive +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Feral Interactive nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + + */ +#ifndef CLIENT_GAMEMODE_H +#define CLIENT_GAMEMODE_H +/* + * GameMode supports the following client functions + * Requests are refcounted in the daemon + * + * int gamemode_request_start() - Request gamemode starts + * 0 if the request was sent successfully + * -1 if the request failed + * + * int gamemode_request_end() - Request gamemode ends + * 0 if the request was sent successfully + * -1 if the request failed + * + * GAMEMODE_AUTO can be defined to make the above two functions apply during static init and + * destruction, as appropriate. In this configuration, errors will be printed to stderr + * + * int gamemode_query_status() - Query the current status of gamemode + * 0 if gamemode is inactive + * 1 if gamemode is active + * 2 if gamemode is active and this client is registered + * -1 if the query failed + * + * int gamemode_request_start_for(pid_t pid) - Request gamemode starts for another process + * 0 if the request was sent successfully + * -1 if the request failed + * -2 if the request was rejected + * + * int gamemode_request_end_for(pid_t pid) - Request gamemode ends for another process + * 0 if the request was sent successfully + * -1 if the request failed + * -2 if the request was rejected + * + * int gamemode_query_status_for(pid_t pid) - Query status of gamemode for another process + * 0 if gamemode is inactive + * 1 if gamemode is active + * 2 if gamemode is active and this client is registered + * -1 if the query failed + * + * const char* gamemode_error_string() - Get an error string + * returns a string describing any of the above errors + * + * Note: All the above requests can be blocking - dbus requests can and will block while the daemon + * handles the request. It is not recommended to make these calls in performance critical code + */ + +#include +#include + +#include +#include + +#include + +#include + +static char internal_gamemode_client_error_string[512] = { 0 }; + +/** + * Load libgamemode dynamically to dislodge us from most dependencies. + * This allows clients to link and/or use this regardless of runtime. + * See SDL2 for an example of the reasoning behind this in terms of + * dynamic versioning as well. + */ +static volatile int internal_libgamemode_loaded = 1; + +/* Typedefs for the functions to load */ +typedef int (*api_call_return_int)(void); +typedef const char *(*api_call_return_cstring)(void); +typedef int (*api_call_pid_return_int)(pid_t); + +/* Storage for functors */ +static api_call_return_int REAL_internal_gamemode_request_start = NULL; +static api_call_return_int REAL_internal_gamemode_request_end = NULL; +static api_call_return_int REAL_internal_gamemode_query_status = NULL; +static api_call_return_cstring REAL_internal_gamemode_error_string = NULL; +static api_call_pid_return_int REAL_internal_gamemode_request_start_for = NULL; +static api_call_pid_return_int REAL_internal_gamemode_request_end_for = NULL; +static api_call_pid_return_int REAL_internal_gamemode_query_status_for = NULL; + +/** + * Internal helper to perform the symbol binding safely. + * + * Returns 0 on success and -1 on failure + */ +__attribute__((always_inline)) static inline int internal_bind_libgamemode_symbol( + void *handle, const char *name, void **out_func, size_t func_size, bool required) +{ + void *symbol_lookup = NULL; + char *dl_error = NULL; + + /* Safely look up the symbol */ + symbol_lookup = dlsym(handle, name); + dl_error = dlerror(); + if (required && (dl_error || !symbol_lookup)) { + snprintf(internal_gamemode_client_error_string, + sizeof(internal_gamemode_client_error_string), + "dlsym failed - %s", + dl_error); + return -1; + } + + /* Have the symbol correctly, copy it to make it usable */ + memcpy(out_func, &symbol_lookup, func_size); + return 0; +} + +/** + * Loads libgamemode and needed functions + * + * Returns 0 on success and -1 on failure + */ +__attribute__((always_inline)) static inline int internal_load_libgamemode(void) +{ + /* We start at 1, 0 is a success and -1 is a fail */ + if (internal_libgamemode_loaded != 1) { + return internal_libgamemode_loaded; + } + + /* Anonymous struct type to define our bindings */ + struct binding { + const char *name; + void **functor; + size_t func_size; + bool required; + } bindings[] = { + { "real_gamemode_request_start", + (void **)&REAL_internal_gamemode_request_start, + sizeof(REAL_internal_gamemode_request_start), + true }, + { "real_gamemode_request_end", + (void **)&REAL_internal_gamemode_request_end, + sizeof(REAL_internal_gamemode_request_end), + true }, + { "real_gamemode_query_status", + (void **)&REAL_internal_gamemode_query_status, + sizeof(REAL_internal_gamemode_query_status), + false }, + { "real_gamemode_error_string", + (void **)&REAL_internal_gamemode_error_string, + sizeof(REAL_internal_gamemode_error_string), + true }, + { "real_gamemode_request_start_for", + (void **)&REAL_internal_gamemode_request_start_for, + sizeof(REAL_internal_gamemode_request_start_for), + false }, + { "real_gamemode_request_end_for", + (void **)&REAL_internal_gamemode_request_end_for, + sizeof(REAL_internal_gamemode_request_end_for), + false }, + { "real_gamemode_query_status_for", + (void **)&REAL_internal_gamemode_query_status_for, + sizeof(REAL_internal_gamemode_query_status_for), + false }, + }; + + void *libgamemode = NULL; + + /* Try and load libgamemode */ + libgamemode = dlopen("libgamemode.so.0", RTLD_NOW); + if (!libgamemode) { + /* Attempt to load unversioned library for compatibility with older + * versions (as of writing, there are no ABI changes between the two - + * this may need to change if ever ABI-breaking changes are made) */ + libgamemode = dlopen("libgamemode.so", RTLD_NOW); + if (!libgamemode) { + snprintf(internal_gamemode_client_error_string, + sizeof(internal_gamemode_client_error_string), + "dlopen failed - %s", + dlerror()); + internal_libgamemode_loaded = -1; + return -1; + } + } + + /* Attempt to bind all symbols */ + for (size_t i = 0; i < sizeof(bindings) / sizeof(bindings[0]); i++) { + struct binding *binder = &bindings[i]; + + if (internal_bind_libgamemode_symbol(libgamemode, + binder->name, + binder->functor, + binder->func_size, + binder->required)) { + internal_libgamemode_loaded = -1; + return -1; + }; + } + + /* Success */ + internal_libgamemode_loaded = 0; + return 0; +} + +/** + * Redirect to the real libgamemode + */ +__attribute__((always_inline)) static inline const char *gamemode_error_string(void) +{ + /* If we fail to load the system gamemode, or we have an error string already, return our error + * string instead of diverting to the system version */ + if (internal_load_libgamemode() < 0 || internal_gamemode_client_error_string[0] != '\0') { + return internal_gamemode_client_error_string; + } + + /* Assert for static analyser that the function is not NULL */ + assert(REAL_internal_gamemode_error_string != NULL); + + return REAL_internal_gamemode_error_string(); +} + +/** + * Redirect to the real libgamemode + * Allow automatically requesting game mode + * Also prints errors as they happen. + */ +#ifdef GAMEMODE_AUTO +__attribute__((constructor)) +#else +__attribute__((always_inline)) static inline +#endif +int gamemode_request_start(void) +{ + /* Need to load gamemode */ + if (internal_load_libgamemode() < 0) { +#ifdef GAMEMODE_AUTO + fprintf(stderr, "gamemodeauto: %s\n", gamemode_error_string()); +#endif + return -1; + } + + /* Assert for static analyser that the function is not NULL */ + assert(REAL_internal_gamemode_request_start != NULL); + + if (REAL_internal_gamemode_request_start() < 0) { +#ifdef GAMEMODE_AUTO + fprintf(stderr, "gamemodeauto: %s\n", gamemode_error_string()); +#endif + return -1; + } + + return 0; +} + +/* Redirect to the real libgamemode */ +#ifdef GAMEMODE_AUTO +__attribute__((destructor)) +#else +__attribute__((always_inline)) static inline +#endif +int gamemode_request_end(void) +{ + /* Need to load gamemode */ + if (internal_load_libgamemode() < 0) { +#ifdef GAMEMODE_AUTO + fprintf(stderr, "gamemodeauto: %s\n", gamemode_error_string()); +#endif + return -1; + } + + /* Assert for static analyser that the function is not NULL */ + assert(REAL_internal_gamemode_request_end != NULL); + + if (REAL_internal_gamemode_request_end() < 0) { +#ifdef GAMEMODE_AUTO + fprintf(stderr, "gamemodeauto: %s\n", gamemode_error_string()); +#endif + return -1; + } + + return 0; +} + +/* Redirect to the real libgamemode */ +__attribute__((always_inline)) static inline int gamemode_query_status(void) +{ + /* Need to load gamemode */ + if (internal_load_libgamemode() < 0) { + return -1; + } + + if (REAL_internal_gamemode_query_status == NULL) { + snprintf(internal_gamemode_client_error_string, + sizeof(internal_gamemode_client_error_string), + "gamemode_query_status missing (older host?)"); + return -1; + } + + return REAL_internal_gamemode_query_status(); +} + +/* Redirect to the real libgamemode */ +__attribute__((always_inline)) static inline int gamemode_request_start_for(pid_t pid) +{ + /* Need to load gamemode */ + if (internal_load_libgamemode() < 0) { + return -1; + } + + if (REAL_internal_gamemode_request_start_for == NULL) { + snprintf(internal_gamemode_client_error_string, + sizeof(internal_gamemode_client_error_string), + "gamemode_request_start_for missing (older host?)"); + return -1; + } + + return REAL_internal_gamemode_request_start_for(pid); +} + +/* Redirect to the real libgamemode */ +__attribute__((always_inline)) static inline int gamemode_request_end_for(pid_t pid) +{ + /* Need to load gamemode */ + if (internal_load_libgamemode() < 0) { + return -1; + } + + if (REAL_internal_gamemode_request_end_for == NULL) { + snprintf(internal_gamemode_client_error_string, + sizeof(internal_gamemode_client_error_string), + "gamemode_request_end_for missing (older host?)"); + return -1; + } + + return REAL_internal_gamemode_request_end_for(pid); +} + +/* Redirect to the real libgamemode */ +__attribute__((always_inline)) static inline int gamemode_query_status_for(pid_t pid) +{ + /* Need to load gamemode */ + if (internal_load_libgamemode() < 0) { + return -1; + } + + if (REAL_internal_gamemode_query_status_for == NULL) { + snprintf(internal_gamemode_client_error_string, + sizeof(internal_gamemode_client_error_string), + "gamemode_query_status_for missing (older host?)"); + return -1; + } + + return REAL_internal_gamemode_query_status_for(pid); +} + +#endif // CLIENT_GAMEMODE_H diff --git a/dependencies/ih264d/CMakeLists.txt b/dependencies/ih264d/CMakeLists.txt index 4f538f69..64ac0931 100644 --- a/dependencies/ih264d/CMakeLists.txt +++ b/dependencies/ih264d/CMakeLists.txt @@ -2,14 +2,6 @@ project ("ih264d") -set(LIBAVCDEC_X86_INCLUDES "common/x86" "decoder/x86") - -include_directories("common/" "decoder/" ${LIBAVCDEC_X86_INCLUDES}) - -if((CMAKE_C_COMPILER_ID MATCHES "GNU") OR (CMAKE_C_COMPILER_ID MATCHES "Clang")) - add_compile_options(-mssse3 -mavx2) -endif() - add_library (ih264d "common/ih264_buf_mgr.c" "common/ih264_buf_mgr.h" @@ -57,21 +49,6 @@ add_library (ih264d "common/ih264_weighted_pred.h" "common/ithread.c" "common/ithread.h" -"common/x86/ih264_chroma_intra_pred_filters_ssse3.c" -"common/x86/ih264_deblk_chroma_ssse3.c" -"common/x86/ih264_deblk_luma_ssse3.c" -"common/x86/ih264_ihadamard_scaling_sse42.c" -"common/x86/ih264_ihadamard_scaling_ssse3.c" -"common/x86/ih264_inter_pred_filters_ssse3.c" -"common/x86/ih264_iquant_itrans_recon_dc_ssse3.c" -"common/x86/ih264_iquant_itrans_recon_sse42.c" -"common/x86/ih264_iquant_itrans_recon_ssse3.c" -"common/x86/ih264_luma_intra_pred_filters_ssse3.c" -"common/x86/ih264_mem_fns_ssse3.c" -"common/x86/ih264_padding_ssse3.c" -"common/x86/ih264_platform_macros.h" -"common/x86/ih264_resi_trans_quant_sse42.c" -"common/x86/ih264_weighted_pred_sse42.c" "decoder/ih264d.h" "decoder/ih264d_api.c" "decoder/ih264d_bitstrm.c" @@ -138,11 +115,87 @@ add_library (ih264d "decoder/ih264d_vui.h" "decoder/iv.h" "decoder/ivd.h" +) + +if (CMAKE_OSX_ARCHITECTURES) +set(IH264D_ARCHITECTURE ${CMAKE_OSX_ARCHITECTURES}) +else() +set(IH264D_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR}) +endif() + +if (IH264D_ARCHITECTURE STREQUAL "x86_64" OR IH264D_ARCHITECTURE STREQUAL "amd64" OR IH264D_ARCHITECTURE STREQUAL "AMD64") +set(LIBAVCDEC_X86_INCLUDES "common/x86" "decoder/x86") +include_directories("common/" "decoder/" ${LIBAVCDEC_X86_INCLUDES}) +target_sources(ih264d PRIVATE +"common/x86/ih264_chroma_intra_pred_filters_ssse3.c" +"common/x86/ih264_deblk_chroma_ssse3.c" +"common/x86/ih264_deblk_luma_ssse3.c" +"common/x86/ih264_ihadamard_scaling_sse42.c" +"common/x86/ih264_ihadamard_scaling_ssse3.c" +"common/x86/ih264_inter_pred_filters_ssse3.c" +"common/x86/ih264_iquant_itrans_recon_dc_ssse3.c" +"common/x86/ih264_iquant_itrans_recon_sse42.c" +"common/x86/ih264_iquant_itrans_recon_ssse3.c" +"common/x86/ih264_luma_intra_pred_filters_ssse3.c" +"common/x86/ih264_mem_fns_ssse3.c" +"common/x86/ih264_padding_ssse3.c" +"common/x86/ih264_platform_macros.h" +"common/x86/ih264_resi_trans_quant_sse42.c" +"common/x86/ih264_weighted_pred_sse42.c" "decoder/x86/ih264d_function_selector.c" "decoder/x86/ih264d_function_selector_sse42.c" "decoder/x86/ih264d_function_selector_ssse3.c" ) +elseif(IH264D_ARCHITECTURE STREQUAL "aarch64" OR IH264D_ARCHITECTURE STREQUAL "arm64") +enable_language( C CXX ASM ) +set(LIBAVCDEC_ARM_INCLUDES "common/armv8" "decoder/arm") +include_directories("common/" "decoder/" ${LIBAVCDEC_ARM_INCLUDES}) +target_sources(ih264d PRIVATE +"common/armv8/ih264_deblk_chroma_av8.s" +"common/armv8/ih264_deblk_luma_av8.s" +"common/armv8/ih264_default_weighted_pred_av8.s" +"common/armv8/ih264_ihadamard_scaling_av8.s" +"common/armv8/ih264_inter_pred_chroma_av8.s" +"common/armv8/ih264_inter_pred_filters_luma_horz_av8.s" +"common/armv8/ih264_inter_pred_filters_luma_vert_av8.s" +"common/armv8/ih264_inter_pred_luma_copy_av8.s" +"common/armv8/ih264_inter_pred_luma_horz_hpel_vert_hpel_av8.s" +"common/armv8/ih264_inter_pred_luma_horz_hpel_vert_qpel_av8.s" +"common/armv8/ih264_inter_pred_luma_horz_qpel_av8.s" +"common/armv8/ih264_inter_pred_luma_horz_qpel_vert_hpel_av8.s" +"common/armv8/ih264_inter_pred_luma_horz_qpel_vert_qpel_av8.s" +"common/armv8/ih264_inter_pred_luma_vert_qpel_av8.s" +"common/armv8/ih264_intra_pred_chroma_av8.s" +"common/armv8/ih264_intra_pred_luma_16x16_av8.s" +"common/armv8/ih264_intra_pred_luma_4x4_av8.s" +"common/armv8/ih264_intra_pred_luma_8x8_av8.s" +"common/armv8/ih264_iquant_itrans_recon_av8.s" +"common/armv8/ih264_iquant_itrans_recon_dc_av8.s" +"common/armv8/ih264_mem_fns_neon_av8.s" +"common/armv8/ih264_neon_macros.s" +"common/armv8/ih264_padding_neon_av8.s" +"common/armv8/ih264_platform_macros.h" +"common/armv8/ih264_resi_trans_quant_av8.s" +"common/armv8/ih264_weighted_bi_pred_av8.s" +"common/armv8/ih264_weighted_pred_av8.s" +"decoder/arm/ih264d_function_selector_a9q.c" +"decoder/arm/ih264d_function_selector_av8.c" +"decoder/arm/ih264d_function_selector.c" +) +target_compile_options(ih264d PRIVATE -DARMV8) +if(APPLE) + target_sources(ih264d PRIVATE "common/armv8/macos_arm_symbol_aliases.s") +endif() +else() +message(FATAL_ERROR "ih264d unknown architecture: ${IH264D_ARCHITECTURE}") +endif() if(MSVC) set_property(TARGET ih264d PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") + +# tune settings for slightly better performance +target_compile_options(ih264d PRIVATE $<$:/Oi>) # enable intrinsic functions +target_compile_options(ih264d PRIVATE $<$:/Ot>) # favor speed +target_compile_options(ih264d PRIVATE "/GS-") # disable runtime checks + endif() diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s index 39c02560..c0d9cf99 100644 --- a/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s +++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s @@ -429,8 +429,13 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8: rev64 v7.4h, v2.4h ld1 {v3.2s}, [x10] sub x5, x3, #8 +#ifdef __APPLE__ + adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGE + ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGEOFF] +#else adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs1 ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs1] +#endif usubl v10.8h, v5.8b, v1.8b ld1 {v8.8b, v9.8b}, [x12] // Load multiplication factors 1 to 8 into D3 mov v8.d[1], v9.d[0] @@ -484,10 +489,13 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8: zip1 v1.8h, v0.8h, v2.8h zip2 v2.8h, v0.8h, v2.8h mov v0.16b, v1.16b - +#ifdef __APPLE__ + adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGE + ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGEOFF] +#else adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs2 ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs2] - +#endif ld1 {v8.2s, v9.2s}, [x12] mov v8.d[1], v9.d[0] mov v10.16b, v8.16b diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s index fa19c121..2422d8cd 100644 --- a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s +++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s @@ -431,10 +431,13 @@ ih264_intra_pred_luma_16x16_mode_plane_av8: mov x10, x1 //top_left mov x4, #-1 ld1 {v2.2s}, [x1], x8 - +#ifdef __APPLE__ + adrp x7, _ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGE + ldr x7, [x7, _ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGEOFF] +#else adrp x7, :got:ih264_gai1_intrapred_luma_plane_coeffs ldr x7, [x7, #:got_lo12:ih264_gai1_intrapred_luma_plane_coeffs] - +#endif ld1 {v0.2s}, [x1] rev64 v2.8b, v2.8b ld1 {v6.2s, v7.2s}, [x7] diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s index 273aa81b..6fa31ded 100644 --- a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s +++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s @@ -1029,9 +1029,13 @@ ih264_intra_pred_luma_8x8_mode_horz_u_av8: mov v3.d[0], v2.d[1] ext v4.16b, v2.16b , v2.16b , #1 mov v5.d[0], v4.d[1] - +#ifdef __APPLE__ + adrp x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGE + ldr x12, [x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGEOFF] +#else adrp x12, :got:ih264_gai1_intrapred_luma_8x8_horz_u ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_luma_8x8_horz_u] +#endif uaddl v20.8h, v0.8b, v2.8b uaddl v22.8h, v1.8b, v3.8b uaddl v24.8h, v2.8b, v4.8b diff --git a/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s b/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s index 475f690e..8d6aa995 100644 --- a/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s +++ b/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s @@ -142,14 +142,22 @@ ih264_weighted_bi_pred_luma_av8: sxtw x4, w4 sxtw x5, w5 stp x19, x20, [sp, #-16]! +#ifndef __APPLE__ ldr w8, [sp, #80] //Load wt2 in w8 ldr w9, [sp, #88] //Load ofst1 in w9 - add w6, w6, #1 //w6 = log_WD + 1 - neg w10, w6 //w10 = -(log_WD + 1) - dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit) ldr w10, [sp, #96] //Load ofst2 in w10 ldr w11, [sp, #104] //Load ht in w11 ldr w12, [sp, #112] //Load wd in w12 +#else + ldr w8, [sp, #80] //Load wt2 in w8 + ldr w9, [sp, #84] //Load ofst1 in w9 + ldr w10, [sp, #88] //Load ofst2 in w10 + ldr w11, [sp, #92] //Load ht in w11 + ldr w12, [sp, #96] //Load wd in w12 +#endif + add w6, w6, #1 //w6 = log_WD + 1 + neg w10, w6 //w10 = -(log_WD + 1) + dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit) add w9, w9, #1 //w9 = ofst1 + 1 add w9, w9, w10 //w9 = ofst1 + ofst2 + 1 mov v2.s[0], w7 @@ -424,17 +432,24 @@ ih264_weighted_bi_pred_chroma_av8: sxtw x5, w5 stp x19, x20, [sp, #-16]! - +#ifndef __APPLE__ ldr w8, [sp, #80] //Load wt2 in w8 + ldr w9, [sp, #88] //Load ofst1 in w9 + ldr w10, [sp, #96] //Load ofst2 in w10 + ldr w11, [sp, #104] //Load ht in w11 + ldr w12, [sp, #112] //Load wd in w12 +#else + ldr w8, [sp, #80] //Load wt2 in w8 + ldr w9, [sp, #84] //Load ofst1 in w9 + ldr w10, [sp, #88] //Load ofst2 in w10 + ldr w11, [sp, #92] //Load ht in w11 + ldr w12, [sp, #96] //Load wd in w12 +#endif dup v4.4s, w8 //Q2 = (wt2_u, wt2_v) (32-bit) dup v2.4s, w7 //Q1 = (wt1_u, wt1_v) (32-bit) add w6, w6, #1 //w6 = log_WD + 1 - ldr w9, [sp, #88] //Load ofst1 in w9 - ldr w10, [sp, #96] //Load ofst2 in w10 neg w20, w6 //w20 = -(log_WD + 1) dup v0.8h, w20 //Q0 = -(log_WD + 1) (16-bit) - ldr w11, [sp, #104] //Load ht in x11 - ldr w12, [sp, #112] //Load wd in x12 dup v20.8h, w9 //0ffset1 dup v21.8h, w10 //0ffset2 srhadd v6.8b, v20.8b, v21.8b diff --git a/dependencies/ih264d/common/armv8/macos_arm_symbol_aliases.s b/dependencies/ih264d/common/armv8/macos_arm_symbol_aliases.s new file mode 100644 index 00000000..3639f1b3 --- /dev/null +++ b/dependencies/ih264d/common/armv8/macos_arm_symbol_aliases.s @@ -0,0 +1,185 @@ +// macOS clang compilers append preceding underscores to function names, this is to prevent +// mismatches with the assembly function names and the C functions as defined in the header. + +.global _ih264_deblk_chroma_horz_bs4_av8 +_ih264_deblk_chroma_horz_bs4_av8 = ih264_deblk_chroma_horz_bs4_av8 + +.global _ih264_deblk_chroma_horz_bslt4_av8 +_ih264_deblk_chroma_horz_bslt4_av8 = ih264_deblk_chroma_horz_bslt4_av8 + +.global _ih264_deblk_chroma_vert_bs4_av8 +_ih264_deblk_chroma_vert_bs4_av8 = ih264_deblk_chroma_vert_bs4_av8 + +.global _ih264_deblk_chroma_vert_bslt4_av8 +_ih264_deblk_chroma_vert_bslt4_av8 = ih264_deblk_chroma_vert_bslt4_av8 + +.global _ih264_deblk_luma_horz_bs4_av8 +_ih264_deblk_luma_horz_bs4_av8 = ih264_deblk_luma_horz_bs4_av8 + +.global _ih264_deblk_luma_horz_bslt4_av8 +_ih264_deblk_luma_horz_bslt4_av8 = ih264_deblk_luma_horz_bslt4_av8 + +.global _ih264_deblk_luma_vert_bs4_av8 +_ih264_deblk_luma_vert_bs4_av8 = ih264_deblk_luma_vert_bs4_av8 + +.global _ih264_deblk_luma_vert_bslt4_av8 +_ih264_deblk_luma_vert_bslt4_av8 = ih264_deblk_luma_vert_bslt4_av8 + +.global _ih264_default_weighted_pred_chroma_av8 +_ih264_default_weighted_pred_chroma_av8 = ih264_default_weighted_pred_chroma_av8 + +.global _ih264_default_weighted_pred_luma_av8 +_ih264_default_weighted_pred_luma_av8 = ih264_default_weighted_pred_luma_av8 + +.global _ih264_ihadamard_scaling_4x4_av8 +_ih264_ihadamard_scaling_4x4_av8 = ih264_ihadamard_scaling_4x4_av8 + +.global _ih264_inter_pred_chroma_av8 +_ih264_inter_pred_chroma_av8 = ih264_inter_pred_chroma_av8 + +.global _ih264_inter_pred_luma_copy_av8 +_ih264_inter_pred_luma_copy_av8 = ih264_inter_pred_luma_copy_av8 + +.global _ih264_inter_pred_luma_horz_av8 +_ih264_inter_pred_luma_horz_av8 = ih264_inter_pred_luma_horz_av8 + +.global _ih264_inter_pred_luma_horz_hpel_vert_hpel_av8 +_ih264_inter_pred_luma_horz_hpel_vert_hpel_av8 = ih264_inter_pred_luma_horz_hpel_vert_hpel_av8 + +.global _ih264_inter_pred_luma_horz_hpel_vert_qpel_av8 +_ih264_inter_pred_luma_horz_hpel_vert_qpel_av8 = ih264_inter_pred_luma_horz_hpel_vert_qpel_av8 + +.global _ih264_inter_pred_luma_horz_qpel_av8 +_ih264_inter_pred_luma_horz_qpel_av8 = ih264_inter_pred_luma_horz_qpel_av8 + +.global _ih264_inter_pred_luma_horz_qpel_vert_hpel_av8 +_ih264_inter_pred_luma_horz_qpel_vert_hpel_av8 = ih264_inter_pred_luma_horz_qpel_vert_hpel_av8 + +.global _ih264_inter_pred_luma_horz_qpel_vert_qpel_av8 +_ih264_inter_pred_luma_horz_qpel_vert_qpel_av8 = ih264_inter_pred_luma_horz_qpel_vert_qpel_av8 + +.global _ih264_inter_pred_luma_vert_av8 +_ih264_inter_pred_luma_vert_av8 = ih264_inter_pred_luma_vert_av8 + +.global _ih264_inter_pred_luma_vert_qpel_av8 +_ih264_inter_pred_luma_vert_qpel_av8 = ih264_inter_pred_luma_vert_qpel_av8 + +.global _ih264_intra_pred_chroma_8x8_mode_horz_av8 +_ih264_intra_pred_chroma_8x8_mode_horz_av8 = ih264_intra_pred_chroma_8x8_mode_horz_av8 + +.global _ih264_intra_pred_chroma_8x8_mode_plane_av8 +_ih264_intra_pred_chroma_8x8_mode_plane_av8 = ih264_intra_pred_chroma_8x8_mode_plane_av8 + +.global _ih264_intra_pred_chroma_8x8_mode_vert_av8 +_ih264_intra_pred_chroma_8x8_mode_vert_av8 = ih264_intra_pred_chroma_8x8_mode_vert_av8 + +.global _ih264_intra_pred_luma_16x16_mode_dc_av8 +_ih264_intra_pred_luma_16x16_mode_dc_av8 = ih264_intra_pred_luma_16x16_mode_dc_av8 + +.global _ih264_intra_pred_luma_16x16_mode_horz_av8 +_ih264_intra_pred_luma_16x16_mode_horz_av8 = ih264_intra_pred_luma_16x16_mode_horz_av8 + +.global _ih264_intra_pred_luma_16x16_mode_plane_av8 +_ih264_intra_pred_luma_16x16_mode_plane_av8 = ih264_intra_pred_luma_16x16_mode_plane_av8 + +.global _ih264_intra_pred_luma_16x16_mode_vert_av8 +_ih264_intra_pred_luma_16x16_mode_vert_av8 = ih264_intra_pred_luma_16x16_mode_vert_av8 + +.global _ih264_intra_pred_luma_4x4_mode_dc_av8 +_ih264_intra_pred_luma_4x4_mode_dc_av8 = ih264_intra_pred_luma_4x4_mode_dc_av8 + +.global _ih264_intra_pred_luma_4x4_mode_diag_dl_av8 +_ih264_intra_pred_luma_4x4_mode_diag_dl_av8 = ih264_intra_pred_luma_4x4_mode_diag_dl_av8 + +.global _ih264_intra_pred_luma_4x4_mode_diag_dr_av8 +_ih264_intra_pred_luma_4x4_mode_diag_dr_av8 = ih264_intra_pred_luma_4x4_mode_diag_dr_av8 + +.global _ih264_intra_pred_luma_4x4_mode_horz_av8 +_ih264_intra_pred_luma_4x4_mode_horz_av8 = ih264_intra_pred_luma_4x4_mode_horz_av8 + +.global _ih264_intra_pred_luma_4x4_mode_horz_d_av8 +_ih264_intra_pred_luma_4x4_mode_horz_d_av8 = ih264_intra_pred_luma_4x4_mode_horz_d_av8 + +.global _ih264_intra_pred_luma_4x4_mode_horz_u_av8 +_ih264_intra_pred_luma_4x4_mode_horz_u_av8 = ih264_intra_pred_luma_4x4_mode_horz_u_av8 + +.global _ih264_intra_pred_luma_4x4_mode_vert_av8 +_ih264_intra_pred_luma_4x4_mode_vert_av8 = ih264_intra_pred_luma_4x4_mode_vert_av8 + +.global _ih264_intra_pred_luma_4x4_mode_vert_l_av8 +_ih264_intra_pred_luma_4x4_mode_vert_l_av8 = ih264_intra_pred_luma_4x4_mode_vert_l_av8 + +.global _ih264_intra_pred_luma_4x4_mode_vert_r_av8 +_ih264_intra_pred_luma_4x4_mode_vert_r_av8 = ih264_intra_pred_luma_4x4_mode_vert_r_av8 + +.global _ih264_intra_pred_luma_8x8_mode_dc_av8 +_ih264_intra_pred_luma_8x8_mode_dc_av8 = ih264_intra_pred_luma_8x8_mode_dc_av8 + +.global _ih264_intra_pred_luma_8x8_mode_diag_dl_av8 +_ih264_intra_pred_luma_8x8_mode_diag_dl_av8 = ih264_intra_pred_luma_8x8_mode_diag_dl_av8 + +.global _ih264_intra_pred_luma_8x8_mode_diag_dr_av8 +_ih264_intra_pred_luma_8x8_mode_diag_dr_av8 = ih264_intra_pred_luma_8x8_mode_diag_dr_av8 + +.global _ih264_intra_pred_luma_8x8_mode_horz_av8 +_ih264_intra_pred_luma_8x8_mode_horz_av8 = ih264_intra_pred_luma_8x8_mode_horz_av8 + +.global _ih264_intra_pred_luma_8x8_mode_horz_d_av8 +_ih264_intra_pred_luma_8x8_mode_horz_d_av8 = ih264_intra_pred_luma_8x8_mode_horz_d_av8 + +.global _ih264_intra_pred_luma_8x8_mode_horz_u_av8 +_ih264_intra_pred_luma_8x8_mode_horz_u_av8 = ih264_intra_pred_luma_8x8_mode_horz_u_av8 + +.global _ih264_intra_pred_luma_8x8_mode_vert_av8 +_ih264_intra_pred_luma_8x8_mode_vert_av8 = ih264_intra_pred_luma_8x8_mode_vert_av8 + +.global _ih264_intra_pred_luma_8x8_mode_vert_l_av8 +_ih264_intra_pred_luma_8x8_mode_vert_l_av8 = ih264_intra_pred_luma_8x8_mode_vert_l_av8 + +.global _ih264_intra_pred_luma_8x8_mode_vert_r_av8 +_ih264_intra_pred_luma_8x8_mode_vert_r_av8 = ih264_intra_pred_luma_8x8_mode_vert_r_av8 + +.global _ih264_iquant_itrans_recon_4x4_av8 +_ih264_iquant_itrans_recon_4x4_av8 = ih264_iquant_itrans_recon_4x4_av8 + +.global _ih264_iquant_itrans_recon_4x4_dc_av8 +_ih264_iquant_itrans_recon_4x4_dc_av8 = ih264_iquant_itrans_recon_4x4_dc_av8 + +.global _ih264_iquant_itrans_recon_8x8_av8 +_ih264_iquant_itrans_recon_8x8_av8 = ih264_iquant_itrans_recon_8x8_av8 + +.global _ih264_iquant_itrans_recon_8x8_dc_av8 +_ih264_iquant_itrans_recon_8x8_dc_av8 = ih264_iquant_itrans_recon_8x8_dc_av8 + +.global _ih264_iquant_itrans_recon_chroma_4x4_av8 +_ih264_iquant_itrans_recon_chroma_4x4_av8 = ih264_iquant_itrans_recon_chroma_4x4_av8 + +.global _ih264_iquant_itrans_recon_chroma_4x4_dc_av8 +_ih264_iquant_itrans_recon_chroma_4x4_dc_av8 = ih264_iquant_itrans_recon_chroma_4x4_dc_av8 + +.global _ih264_pad_left_chroma_av8 +_ih264_pad_left_chroma_av8 = ih264_pad_left_chroma_av8 + +.global _ih264_pad_left_luma_av8 +_ih264_pad_left_luma_av8 = ih264_pad_left_luma_av8 + +.global _ih264_pad_right_chroma_av8 +_ih264_pad_right_chroma_av8 = ih264_pad_right_chroma_av8 + +.global _ih264_pad_right_luma_av8 +_ih264_pad_right_luma_av8 = ih264_pad_right_luma_av8 + +.global _ih264_pad_top_av8 +_ih264_pad_top_av8 = ih264_pad_top_av8 + +.global _ih264_weighted_bi_pred_chroma_av8 +_ih264_weighted_bi_pred_chroma_av8 = ih264_weighted_bi_pred_chroma_av8 + +.global _ih264_weighted_bi_pred_luma_av8 +_ih264_weighted_bi_pred_luma_av8 = ih264_weighted_bi_pred_luma_av8 + +.global _ih264_weighted_pred_chroma_av8 +_ih264_weighted_pred_chroma_av8 = ih264_weighted_pred_chroma_av8 + +.global _ih264_weighted_pred_luma_av8 +_ih264_weighted_pred_luma_av8 = ih264_weighted_pred_luma_av8 \ No newline at end of file diff --git a/dependencies/ih264d/common/ithread.c b/dependencies/ih264d/common/ithread.c index d710e323..2c25bdb0 100644 --- a/dependencies/ih264d/common/ithread.c +++ b/dependencies/ih264d/common/ithread.c @@ -85,28 +85,59 @@ UWORD32 ithread_get_mutex_lock_size(void) return sizeof(CRITICAL_SECTION); } +struct _ithread_launch_param +{ + void (*startFunc)(void* argument); + void* argument; +}; + +DWORD WINAPI _ithread_WinThreadStartRoutine(LPVOID lpThreadParameter) +{ + struct _ithread_launch_param* param = (struct _ithread_launch_param*)lpThreadParameter; + typedef void *(*ThreadStartRoutineType)(void *); + ThreadStartRoutineType pfnThreadRoutine = (ThreadStartRoutineType)param->startFunc; + void* arg = param->argument; + free(param); + pfnThreadRoutine(arg); + return 0; +} + WORD32 ithread_create(void* thread_handle, void* attribute, void* strt, void* argument) { - //UNUSED(attribute); - //return pthread_create((pthread_t*)thread_handle, NULL, (void* (*)(void*)) strt, argument); - __debugbreak(); + UNUSED(attribute); + struct _ithread_launch_param* param = malloc(sizeof(struct _ithread_launch_param)); + param->startFunc = (void (*)(void*))strt; + param->argument = argument; + HANDLE *handle = (HANDLE*)thread_handle; + *handle = CreateThread(NULL, 0, _ithread_WinThreadStartRoutine, param, 0, NULL); + if(*handle == NULL) + { + return -1; + } return 0; } WORD32 ithread_join(void* thread_handle, void** val_ptr) { //UNUSED(val_ptr); - //pthread_t* pthread_handle = (pthread_t*)thread_handle; - //return pthread_join(*pthread_handle, NULL); - - __debugbreak(); - return 0; + HANDLE *handle = (HANDLE*)thread_handle; + DWORD result = WaitForSingleObject(*handle, INFINITE); + if(result == WAIT_OBJECT_0) + { + CloseHandle(*handle); + return 0; + } + else + { + return -1; + } } WORD32 ithread_get_mutex_struct_size(void) { return sizeof(CRITICAL_SECTION); } + WORD32 ithread_mutex_init(void* mutex) { InitializeCriticalSection((LPCRITICAL_SECTION)mutex); @@ -153,7 +184,6 @@ UWORD32 ithread_get_sem_struct_size(void) //return(sizeof(sem_t)); } - WORD32 ithread_sem_init(void* sem, WORD32 pshared, UWORD32 value) { __debugbreak(); @@ -168,7 +198,6 @@ WORD32 ithread_sem_post(void* sem) //return sem_post((sem_t*)sem); } - WORD32 ithread_sem_wait(void* sem) { __debugbreak(); @@ -176,7 +205,6 @@ WORD32 ithread_sem_wait(void* sem) //return sem_wait((sem_t*)sem); } - WORD32 ithread_sem_destroy(void* sem) { __debugbreak(); diff --git a/dependencies/ih264d/common/x86/ih264_chroma_intra_pred_filters_ssse3.c b/dependencies/ih264d/common/x86/ih264_chroma_intra_pred_filters_ssse3.c index d43ce207..502420b1 100644 --- a/dependencies/ih264d/common/x86/ih264_chroma_intra_pred_filters_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_chroma_intra_pred_filters_ssse3.c @@ -56,6 +56,11 @@ #include "ih264_platform_macros.h" #include "ih264_intra_pred_filters.h" +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif /*****************************************************************************/ /* Chroma Intra prediction 8x8 filters */ @@ -93,6 +98,8 @@ * ****************************************************************************** */ + +ATTRIBUTE_SSSE3 void ih264_intra_pred_chroma_8x8_mode_horz_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -169,6 +176,8 @@ void ih264_intra_pred_chroma_8x8_mode_horz_ssse3(UWORD8 *pu1_src, * ******************************************************************************* */ + +ATTRIBUTE_SSSE3 void ih264_intra_pred_chroma_8x8_mode_vert_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -237,6 +246,8 @@ void ih264_intra_pred_chroma_8x8_mode_vert_ssse3(UWORD8 *pu1_src, * ****************************************************************************** */ + +ATTRIBUTE_SSSE3 void ih264_intra_pred_chroma_8x8_mode_plane_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, diff --git a/dependencies/ih264d/common/x86/ih264_deblk_chroma_ssse3.c b/dependencies/ih264d/common/x86/ih264_deblk_chroma_ssse3.c index a36447a2..d73d9d35 100644 --- a/dependencies/ih264d/common/x86/ih264_deblk_chroma_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_deblk_chroma_ssse3.c @@ -53,6 +53,12 @@ #include "ih264_deblk_edge_filters.h" #include "ih264_macros.h" +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /*****************************************************************************/ /* Function Definitions */ /*****************************************************************************/ @@ -91,6 +97,8 @@ /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ + +ATTRIBUTE_SSSE3 void ih264_deblk_chroma_vert_bs4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha_cb, @@ -274,6 +282,8 @@ void ih264_deblk_chroma_vert_bs4_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ + +ATTRIBUTE_SSSE3 void ih264_deblk_chroma_horz_bs4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha_cb, @@ -424,6 +434,8 @@ void ih264_deblk_chroma_horz_bs4_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ + +ATTRIBUTE_SSSE3 void ih264_deblk_chroma_vert_bslt4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha_cb, @@ -645,6 +657,8 @@ void ih264_deblk_chroma_vert_bslt4_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ + +ATTRIBUTE_SSSE3 void ih264_deblk_chroma_horz_bslt4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha_cb, @@ -829,6 +843,8 @@ void ih264_deblk_chroma_horz_bslt4_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ + +ATTRIBUTE_SSSE3 void ih264_deblk_chroma_vert_bs4_mbaff_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha_cb, @@ -963,6 +979,8 @@ void ih264_deblk_chroma_vert_bs4_mbaff_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ + +ATTRIBUTE_SSSE3 void ih264_deblk_chroma_vert_bslt4_mbaff_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha_cb, diff --git a/dependencies/ih264d/common/x86/ih264_deblk_luma_ssse3.c b/dependencies/ih264d/common/x86/ih264_deblk_luma_ssse3.c index e29bebbe..c135f6b6 100644 --- a/dependencies/ih264d/common/x86/ih264_deblk_luma_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_deblk_luma_ssse3.c @@ -53,6 +53,12 @@ #include "ih264_deblk_edge_filters.h" #include "ih264_macros.h" +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /*****************************************************************************/ /* Function Definitions */ /*****************************************************************************/ @@ -87,6 +93,7 @@ /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_deblk_luma_vert_bs4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha, @@ -508,6 +515,7 @@ void ih264_deblk_luma_vert_bs4_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_deblk_luma_horz_bs4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha, @@ -847,6 +855,7 @@ void ih264_deblk_luma_horz_bs4_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_deblk_luma_vert_bslt4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha, @@ -1142,6 +1151,7 @@ void ih264_deblk_luma_vert_bslt4_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_deblk_luma_horz_bslt4_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha, @@ -1439,6 +1449,7 @@ void ih264_deblk_luma_horz_bslt4_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_deblk_luma_vert_bs4_mbaff_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha, @@ -1758,6 +1769,7 @@ void ih264_deblk_luma_vert_bs4_mbaff_ssse3(UWORD8 *pu1_src, /* 12 02 2015 Naveen Kumar P Initial version */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_deblk_luma_vert_bslt4_mbaff_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 alpha, diff --git a/dependencies/ih264d/common/x86/ih264_ihadamard_scaling_sse42.c b/dependencies/ih264d/common/x86/ih264_ihadamard_scaling_sse42.c index 3c4bb1c6..bf8e88ed 100644 --- a/dependencies/ih264d/common/x86/ih264_ihadamard_scaling_sse42.c +++ b/dependencies/ih264d/common/x86/ih264_ihadamard_scaling_sse42.c @@ -52,6 +52,12 @@ #include #include +#ifdef __GNUC__ +#define ATTRIBUTE_SSE42 __attribute__((target("sse4.2"))) +#else +#define ATTRIBUTE_SSE42 +#endif + /* ******************************************************************************** * @@ -87,6 +93,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSE42 void ih264_ihadamard_scaling_4x4_sse42(WORD16* pi2_src, WORD16* pi2_out, const UWORD16 *pu2_iscal_mat, @@ -202,6 +209,7 @@ void ih264_ihadamard_scaling_4x4_sse42(WORD16* pi2_src, _mm_storeu_si128((__m128i *) (&pi2_out[8]), src_r2_r3); } +ATTRIBUTE_SSE42 void ih264_ihadamard_scaling_2x2_uv_sse42(WORD16* pi2_src, WORD16* pi2_out, const UWORD16 *pu2_iscal_mat, diff --git a/dependencies/ih264d/common/x86/ih264_ihadamard_scaling_ssse3.c b/dependencies/ih264d/common/x86/ih264_ihadamard_scaling_ssse3.c index b4d483f1..4dc6d827 100644 --- a/dependencies/ih264d/common/x86/ih264_ihadamard_scaling_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_ihadamard_scaling_ssse3.c @@ -50,6 +50,12 @@ #include "ih264_trans_quant_itrans_iquant.h" #include +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /* ******************************************************************************** * @@ -85,6 +91,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_ihadamard_scaling_4x4_ssse3(WORD16* pi2_src, WORD16* pi2_out, const UWORD16 *pu2_iscal_mat, diff --git a/dependencies/ih264d/common/x86/ih264_inter_pred_filters_ssse3.c b/dependencies/ih264d/common/x86/ih264_inter_pred_filters_ssse3.c index 480a8c7c..7927eeb6 100644 --- a/dependencies/ih264d/common/x86/ih264_inter_pred_filters_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_inter_pred_filters_ssse3.c @@ -54,6 +54,12 @@ #include "ih264_platform_macros.h" #include "ih264_inter_pred_filters.h" +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /*****************************************************************************/ /* Constant Data variables */ /*****************************************************************************/ @@ -87,6 +93,7 @@ /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_copy_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -213,6 +220,7 @@ void ih264_inter_pred_luma_copy_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_horz_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -478,6 +486,7 @@ void ih264_inter_pred_luma_horz_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_vert_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -764,6 +773,7 @@ void ih264_inter_pred_luma_vert_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1488,6 +1498,7 @@ void ih264_inter_pred_luma_horz_hpel_vert_hpel_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_horz_qpel_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1782,6 +1793,7 @@ void ih264_inter_pred_luma_horz_qpel_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_vert_qpel_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -2107,6 +2119,7 @@ void ih264_inter_pred_luma_vert_qpel_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -2675,6 +2688,7 @@ void ih264_inter_pred_luma_horz_qpel_vert_qpel_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -3285,6 +3299,7 @@ void ih264_inter_pred_luma_horz_qpel_vert_hpel_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -3991,6 +4006,7 @@ void ih264_inter_pred_luma_horz_hpel_vert_qpel_ssse3(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_inter_pred_chroma_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, diff --git a/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c b/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c index bcfe503f..10dd2647 100644 --- a/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_dc_ssse3.c @@ -50,6 +50,12 @@ #include "ih264_trans_quant_itrans_iquant.h" #include +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /* ******************************************************************************** * @@ -98,6 +104,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_iquant_itrans_recon_4x4_dc_ssse3(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, @@ -224,6 +231,7 @@ void ih264_iquant_itrans_recon_4x4_dc_ssse3(WORD16 *pi2_src, ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_iquant_itrans_recon_8x8_dc_ssse3 (WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, @@ -385,6 +393,7 @@ void ih264_iquant_itrans_recon_8x8_dc_ssse3 (WORD16 *pi2_src, * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_iquant_itrans_recon_chroma_4x4_dc_ssse3(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, diff --git a/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_sse42.c b/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_sse42.c index a7b9e824..e97ca4d0 100644 --- a/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_sse42.c +++ b/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_sse42.c @@ -50,6 +50,12 @@ #include "ih264_trans_quant_itrans_iquant.h" #include +#ifdef __GNUC__ +#define ATTRIBUTE_SSE42 __attribute__((target("sse4.2"))) +#else +#define ATTRIBUTE_SSE42 +#endif + /* ******************************************************************************** * @@ -97,6 +103,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSE42 void ih264_iquant_itrans_recon_4x4_sse42(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, @@ -348,6 +355,7 @@ void ih264_iquant_itrans_recon_4x4_sse42(WORD16 *pi2_src, * ******************************************************************************* */ +ATTRIBUTE_SSE42 void ih264_iquant_itrans_recon_chroma_4x4_sse42(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, diff --git a/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_ssse3.c b/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_ssse3.c index 506be495..96773253 100644 --- a/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_iquant_itrans_recon_ssse3.c @@ -50,6 +50,12 @@ #include "ih264_trans_quant_itrans_iquant.h" #include +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /* ******************************************************************************** * @@ -97,6 +103,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_iquant_itrans_recon_4x4_ssse3(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, @@ -366,6 +373,7 @@ void ih264_iquant_itrans_recon_4x4_ssse3(WORD16 *pi2_src, ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_iquant_itrans_recon_8x8_ssse3(WORD16 *pi2_src, UWORD8 *pu1_pred, UWORD8 *pu1_out, diff --git a/dependencies/ih264d/common/x86/ih264_luma_intra_pred_filters_ssse3.c b/dependencies/ih264d/common/x86/ih264_luma_intra_pred_filters_ssse3.c index a1721d52..417e986b 100644 --- a/dependencies/ih264d/common/x86/ih264_luma_intra_pred_filters_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_luma_intra_pred_filters_ssse3.c @@ -75,6 +75,12 @@ #include "ih264_platform_macros.h" #include "ih264_intra_pred_filters.h" +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /******************* LUMA INTRAPREDICTION *******************/ @@ -114,6 +120,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_vert_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -173,6 +180,7 @@ void ih264_intra_pred_luma_4x4_mode_vert_ssse3(UWORD8 *pu1_src, * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_horz_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -238,6 +246,7 @@ void ih264_intra_pred_luma_4x4_mode_horz_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_dc_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -316,6 +325,7 @@ void ih264_intra_pred_luma_4x4_mode_dc_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -400,6 +410,7 @@ void ih264_intra_pred_luma_4x4_mode_diag_dl_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -487,6 +498,7 @@ void ih264_intra_pred_luma_4x4_mode_diag_dr_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_vert_r_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -579,6 +591,7 @@ void ih264_intra_pred_luma_4x4_mode_vert_r_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_horz_d_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -675,6 +688,7 @@ void ih264_intra_pred_luma_4x4_mode_horz_d_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_vert_l_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -764,6 +778,7 @@ void ih264_intra_pred_luma_4x4_mode_vert_l_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_4x4_mode_horz_u_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -864,6 +879,7 @@ void ih264_intra_pred_luma_4x4_mode_horz_u_ssse3(UWORD8 *pu1_src, * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_vert_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -921,6 +937,7 @@ void ih264_intra_pred_luma_8x8_mode_vert_ssse3(UWORD8 *pu1_src, * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_horz_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -985,6 +1002,7 @@ void ih264_intra_pred_luma_8x8_mode_horz_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_dc_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1078,6 +1096,7 @@ void ih264_intra_pred_luma_8x8_mode_dc_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1176,6 +1195,7 @@ void ih264_intra_pred_luma_8x8_mode_diag_dl_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1278,6 +1298,7 @@ void ih264_intra_pred_luma_8x8_mode_diag_dr_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_vert_r_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1398,6 +1419,7 @@ void ih264_intra_pred_luma_8x8_mode_vert_r_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_horz_d_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1502,6 +1524,7 @@ void ih264_intra_pred_luma_8x8_mode_horz_d_ssse3(UWORD8 *pu1_src, * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_vert_l_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1598,6 +1621,7 @@ void ih264_intra_pred_luma_8x8_mode_vert_l_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_8x8_mode_horz_u_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1699,6 +1723,7 @@ void ih264_intra_pred_luma_8x8_mode_horz_u_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_16x16_mode_vert_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1778,6 +1803,7 @@ void ih264_intra_pred_luma_16x16_mode_vert_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_16x16_mode_horz_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1875,6 +1901,7 @@ void ih264_intra_pred_luma_16x16_mode_horz_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_16x16_mode_dc_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -1998,6 +2025,7 @@ void ih264_intra_pred_luma_16x16_mode_dc_ssse3(UWORD8 *pu1_src, * None * *******************************************************************************/ +ATTRIBUTE_SSSE3 void ih264_intra_pred_luma_16x16_mode_plane_ssse3(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, diff --git a/dependencies/ih264d/common/x86/ih264_mem_fns_ssse3.c b/dependencies/ih264d/common/x86/ih264_mem_fns_ssse3.c index 8ca1f3e5..be3d622c 100644 --- a/dependencies/ih264d/common/x86/ih264_mem_fns_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_mem_fns_ssse3.c @@ -50,6 +50,12 @@ #include +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /** ******************************************************************************* * @@ -78,6 +84,7 @@ +ATTRIBUTE_SSSE3 void ih264_memcpy_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_bytes) { int col; @@ -117,6 +124,7 @@ void ih264_memcpy_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 *pu1_src, UWORD32 num_byte */ +ATTRIBUTE_SSSE3 void ih264_memset_mul_8_ssse3(UWORD8 *pu1_dst, UWORD8 value, UWORD32 num_bytes) { int col; diff --git a/dependencies/ih264d/common/x86/ih264_padding_ssse3.c b/dependencies/ih264d/common/x86/ih264_padding_ssse3.c index 43ded8e7..d2aa368a 100644 --- a/dependencies/ih264d/common/x86/ih264_padding_ssse3.c +++ b/dependencies/ih264d/common/x86/ih264_padding_ssse3.c @@ -49,6 +49,12 @@ #include +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /** ******************************************************************************* @@ -89,6 +95,7 @@ ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_pad_left_luma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, @@ -156,6 +163,7 @@ void ih264_pad_left_luma_ssse3(UWORD8 *pu1_src, ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_pad_left_chroma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, @@ -222,6 +230,7 @@ void ih264_pad_left_chroma_ssse3(UWORD8 *pu1_src, ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_pad_right_luma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, @@ -289,6 +298,7 @@ void ih264_pad_right_luma_ssse3(UWORD8 *pu1_src, ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264_pad_right_chroma_ssse3(UWORD8 *pu1_src, WORD32 src_strd, WORD32 ht, diff --git a/dependencies/ih264d/common/x86/ih264_platform_macros.h b/dependencies/ih264d/common/x86/ih264_platform_macros.h index ebc1b106..22de33d6 100644 --- a/dependencies/ih264d/common/x86/ih264_platform_macros.h +++ b/dependencies/ih264d/common/x86/ih264_platform_macros.h @@ -79,10 +79,8 @@ static inline int __builtin_clz(unsigned x) { unsigned long n; - if (x == 0) - return 32; _BitScanReverse(&n, x); - return 31 - n; + return n ^ 31; } static inline int __builtin_ctz(unsigned x) { diff --git a/dependencies/ih264d/common/x86/ih264_resi_trans_quant_sse42.c b/dependencies/ih264d/common/x86/ih264_resi_trans_quant_sse42.c index f4f5cbfa..fa3442f0 100644 --- a/dependencies/ih264d/common/x86/ih264_resi_trans_quant_sse42.c +++ b/dependencies/ih264d/common/x86/ih264_resi_trans_quant_sse42.c @@ -51,6 +51,12 @@ #include "ih264_structs.h" #include "ih264_trans_quant_itrans_iquant.h" #include + +#ifdef __GNUC__ +#define ATTRIBUTE_SSE42 __attribute__((target("sse4.2"))) +#else +#define ATTRIBUTE_SSE42 +#endif /** ******************************************************************************* * @@ -103,6 +109,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSE42 void ih264_resi_trans_quant_4x4_sse42(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD16 *pi2_out, WORD32 src_strd, WORD32 pred_strd, const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix, @@ -376,6 +383,7 @@ void ih264_resi_trans_quant_4x4_sse42(UWORD8 *pu1_src, UWORD8 *pu1_pred, * ******************************************************************************* */ +ATTRIBUTE_SSE42 void ih264_resi_trans_quant_chroma_4x4_sse42(UWORD8 *pu1_src,UWORD8 *pu1_pred,WORD16 *pi2_out, WORD32 src_strd,WORD32 pred_strd, const UWORD16 *pu2_scale_matrix, @@ -663,6 +671,7 @@ void ih264_resi_trans_quant_chroma_4x4_sse42(UWORD8 *pu1_src,UWORD8 *pu1_pred,WO * */ +ATTRIBUTE_SSE42 void ih264_hadamard_quant_4x4_sse42(WORD16 *pi2_src, WORD16 *pi2_dst, const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits, @@ -892,6 +901,7 @@ void ih264_hadamard_quant_4x4_sse42(WORD16 *pi2_src, WORD16 *pi2_dst, * */ +ATTRIBUTE_SSE42 void ih264_hadamard_quant_2x2_uv_sse42(WORD16 *pi2_src, WORD16 *pi2_dst, const UWORD16 *pu2_scale_matrix, const UWORD16 *pu2_threshold_matrix, UWORD32 u4_qbits, diff --git a/dependencies/ih264d/common/x86/ih264_weighted_pred_sse42.c b/dependencies/ih264d/common/x86/ih264_weighted_pred_sse42.c index 48f1f542..8e10db28 100644 --- a/dependencies/ih264d/common/x86/ih264_weighted_pred_sse42.c +++ b/dependencies/ih264d/common/x86/ih264_weighted_pred_sse42.c @@ -50,6 +50,12 @@ #include "ih264_platform_macros.h" #include "ih264_weighted_pred.h" +#ifdef __GNUC__ +#define ATTRIBUTE_SSE42 __attribute__((target("sse4.2"))) +#else +#define ATTRIBUTE_SSE42 +#endif + /*****************************************************************************/ /* Function definitions . */ /*****************************************************************************/ @@ -82,6 +88,7 @@ /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSE42 void ih264_default_weighted_pred_luma_sse42(UWORD8 *pu1_src1, UWORD8 *pu1_src2, UWORD8 *pu1_dst, @@ -245,6 +252,7 @@ void ih264_default_weighted_pred_luma_sse42(UWORD8 *pu1_src1, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSE42 void ih264_default_weighted_pred_chroma_sse42(UWORD8 *pu1_src1, UWORD8 *pu1_src2, UWORD8 *pu1_dst, @@ -375,6 +383,7 @@ void ih264_default_weighted_pred_chroma_sse42(UWORD8 *pu1_src1, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSE42 void ih264_weighted_pred_luma_sse42(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -605,6 +614,7 @@ void ih264_weighted_pred_luma_sse42(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSE42 void ih264_weighted_pred_chroma_sse42(UWORD8 *pu1_src, UWORD8 *pu1_dst, WORD32 src_strd, @@ -814,6 +824,7 @@ void ih264_weighted_pred_chroma_sse42(UWORD8 *pu1_src, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSE42 void ih264_weighted_bi_pred_luma_sse42(UWORD8 *pu1_src1, UWORD8 *pu1_src2, UWORD8 *pu1_dst, @@ -1101,6 +1112,7 @@ void ih264_weighted_bi_pred_luma_sse42(UWORD8 *pu1_src1, /* Senthoor */ /* */ /*****************************************************************************/ +ATTRIBUTE_SSE42 void ih264_weighted_bi_pred_chroma_sse42(UWORD8 *pu1_src1, UWORD8 *pu1_src2, UWORD8 *pu1_dst, diff --git a/dependencies/ih264d/decoder/x86/ih264d_function_selector_sse42.c b/dependencies/ih264d/decoder/x86/ih264d_function_selector_sse42.c index 0c493d22..c7636f38 100644 --- a/dependencies/ih264d/decoder/x86/ih264d_function_selector_sse42.c +++ b/dependencies/ih264d/decoder/x86/ih264d_function_selector_sse42.c @@ -60,6 +60,12 @@ #include "ih264d_structs.h" +#ifdef __GNUC__ +#define ATTRIBUTE_SSE42 __attribute__((target("sse4.2"))) +#else +#define ATTRIBUTE_SSE42 +#endif + /** ******************************************************************************* @@ -79,6 +85,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSE42 void ih264d_init_function_ptr_sse42(dec_struct_t *ps_codec) { ps_codec->pf_default_weighted_pred_luma = ih264_default_weighted_pred_luma_sse42; diff --git a/dependencies/ih264d/decoder/x86/ih264d_function_selector_ssse3.c b/dependencies/ih264d/decoder/x86/ih264d_function_selector_ssse3.c index 17862139..cd8043c6 100644 --- a/dependencies/ih264d/decoder/x86/ih264d_function_selector_ssse3.c +++ b/dependencies/ih264d/decoder/x86/ih264d_function_selector_ssse3.c @@ -60,6 +60,12 @@ #include "ih264d_structs.h" +#ifdef __GNUC__ +#define ATTRIBUTE_SSSE3 __attribute__((target("ssse3"))) +#else +#define ATTRIBUTE_SSSE3 +#endif + /** ******************************************************************************* @@ -79,6 +85,7 @@ * ******************************************************************************* */ +ATTRIBUTE_SSSE3 void ih264d_init_function_ptr_ssse3(dec_struct_t *ps_codec) { diff --git a/dependencies/imgui b/dependencies/imgui new file mode 160000 index 00000000..f65bcf48 --- /dev/null +++ b/dependencies/imgui @@ -0,0 +1 @@ +Subproject commit f65bcf481ab34cd07d3909aab1479f409fa79f2f diff --git a/dependencies/vcpkg b/dependencies/vcpkg index 1b0252ca..533a5fda 160000 --- a/dependencies/vcpkg +++ b/dependencies/vcpkg @@ -1 +1 @@ -Subproject commit 1b0252ca70ca2244a711535462c7f981eb439e83 +Subproject commit 533a5fda5c0646d1771345fb572e759283444d5f diff --git a/src/util/ThreadPool/ThreadPool.cpp b/dependencies/vcpkg_overlay_ports/.gitkeep similarity index 100% rename from src/util/ThreadPool/ThreadPool.cpp rename to dependencies/vcpkg_overlay_ports/.gitkeep diff --git a/dependencies/vcpkg_overlay_ports/fmt/fix-warning4189.patch b/dependencies/vcpkg_overlay_ports/fmt/fix-warning4189.patch deleted file mode 100644 index 0efab0f1..00000000 --- a/dependencies/vcpkg_overlay_ports/fmt/fix-warning4189.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff --git a/include/fmt/format.h b/include/fmt/format.h -index 4e96539..0f1d179 100644 ---- a/include/fmt/format.h -+++ b/include/fmt/format.h -@@ -33,6 +33,7 @@ - #ifndef FMT_FORMAT_H_ - #define FMT_FORMAT_H_ - -+#pragma warning(disable:4189) - #include - #include - #include diff --git a/dependencies/vcpkg_overlay_ports/fmt/portfile.cmake b/dependencies/vcpkg_overlay_ports/fmt/portfile.cmake deleted file mode 100644 index e3edc0f6..00000000 --- a/dependencies/vcpkg_overlay_ports/fmt/portfile.cmake +++ /dev/null @@ -1,65 +0,0 @@ -vcpkg_from_github( - OUT_SOURCE_PATH SOURCE_PATH - REPO fmtlib/fmt - REF a33701196adfad74917046096bf5a2aa0ab0bb50 # v9.1.0 - SHA512 0faf00e99b332fcb3d9fc50cc9649ddc004ca9035f3652c1a001facee725dab09f67b65a9dfcce0aedb47e76c74c45a9262a1fd6e250a9e9a27c7d021c8ee6b8 - HEAD_REF master - # PATCHES fix-warning4189.patch -) - -vcpkg_cmake_configure( - SOURCE_PATH ${SOURCE_PATH} - DISABLE_PARALLEL_CONFIGURE # with MSBuild (on UWP), fmt writes into the source directory - OPTIONS - -DFMT_CMAKE_DIR=share/fmt - -DFMT_TEST=OFF - -DFMT_DOC=OFF -) - -vcpkg_cmake_install() -file(INSTALL ${SOURCE_PATH}/LICENSE.rst DESTINATION ${CURRENT_PACKAGES_DIR}/share/${PORT} RENAME copyright) -if(VCPKG_LIBRARY_LINKAGE STREQUAL dynamic) - if(VCPKG_TARGET_IS_WINDOWS) - if(NOT DEFINED VCPKG_BUILD_TYPE OR VCPKG_BUILD_TYPE STREQUAL "debug") - if(EXISTS "${CURRENT_PACKAGES_DIR}/debug/lib/fmtd.dll") - file(MAKE_DIRECTORY ${CURRENT_PACKAGES_DIR}/debug/bin) - file(RENAME ${CURRENT_PACKAGES_DIR}/debug/lib/fmtd.dll ${CURRENT_PACKAGES_DIR}/debug/bin/fmtd.dll) - endif() - endif() - if(NOT DEFINED VCPKG_BUILD_TYPE OR VCPKG_BUILD_TYPE STREQUAL "release") - if(EXISTS "${CURRENT_PACKAGES_DIR}/lib/fmt.dll") - file(MAKE_DIRECTORY ${CURRENT_PACKAGES_DIR}/bin) - file(RENAME ${CURRENT_PACKAGES_DIR}/lib/fmt.dll ${CURRENT_PACKAGES_DIR}/bin/fmt.dll) - endif() - endif() - endif() - - vcpkg_replace_string(${CURRENT_PACKAGES_DIR}/include/fmt/core.h - "defined(FMT_SHARED)" - "1" - ) -endif() -file(REMOVE_RECURSE ${CURRENT_PACKAGES_DIR}/debug/include) - -vcpkg_cmake_config_fixup() -vcpkg_fixup_pkgconfig() - -if(VCPKG_TARGET_IS_WINDOWS) - if(NOT DEFINED VCPKG_BUILD_TYPE OR VCPKG_BUILD_TYPE STREQUAL "debug") - vcpkg_replace_string(${CURRENT_PACKAGES_DIR}/share/fmt/fmt-targets-debug.cmake - "lib/fmtd.dll" - "bin/fmtd.dll" - ) - endif() - if(NOT DEFINED VCPKG_BUILD_TYPE OR VCPKG_BUILD_TYPE STREQUAL "release") - vcpkg_replace_string(${CURRENT_PACKAGES_DIR}/share/fmt/fmt-targets-release.cmake - "lib/fmt.dll" - "bin/fmt.dll" - ) - endif() -endif() -file(REMOVE_RECURSE ${CURRENT_PACKAGES_DIR}/debug/share) - -# Handle post-build CMake instructions -vcpkg_copy_pdbs() -file(INSTALL ${CMAKE_CURRENT_LIST_DIR}/usage DESTINATION ${CURRENT_PACKAGES_DIR}/share/${PORT}) diff --git a/dependencies/vcpkg_overlay_ports/fmt/usage b/dependencies/vcpkg_overlay_ports/fmt/usage deleted file mode 100644 index c9988aa7..00000000 --- a/dependencies/vcpkg_overlay_ports/fmt/usage +++ /dev/null @@ -1,7 +0,0 @@ -The package fmt provides CMake targets: - - find_package(fmt CONFIG REQUIRED) - target_link_libraries(main PRIVATE fmt::fmt) - - # Or use the header-only version - target_link_libraries(main PRIVATE fmt::fmt-header-only) diff --git a/dependencies/vcpkg_overlay_ports/fmt/vcpkg.json b/dependencies/vcpkg_overlay_ports/fmt/vcpkg.json deleted file mode 100644 index 880adf38..00000000 --- a/dependencies/vcpkg_overlay_ports/fmt/vcpkg.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "name": "fmt", - "version": "7.1.3", - "port-version": 2, - "description": "Formatting library for C++. It can be used as a safe alternative to printf or as a fast alternative to IOStreams.", - "homepage": "https://github.com/fmtlib/fmt", - "dependencies": [ - { - "name": "vcpkg-cmake", - "host": true - }, - { - "name": "vcpkg-cmake-config", - "host": true - } - ] -} diff --git a/dependencies/vcpkg_overlay_ports/wxwidgets/example/CMakeLists.txt b/dependencies/vcpkg_overlay_ports/wxwidgets/example/CMakeLists.txt deleted file mode 100644 index 229b7107..00000000 --- a/dependencies/vcpkg_overlay_ports/wxwidgets/example/CMakeLists.txt +++ /dev/null @@ -1,31 +0,0 @@ -cmake_minimum_required(VERSION 3.7) - -project(wxwidgets-example) - -add_executable(main WIN32 popup.cpp) - -find_package(wxWidgets REQUIRED) -target_compile_definitions(main PRIVATE ${wxWidgets_DEFINITIONS} "$<$:${wxWidgets_DEFINITIONS_DEBUG}>") -target_include_directories(main PRIVATE ${wxWidgets_INCLUDE_DIRS}) -target_link_libraries(main PRIVATE ${wxWidgets_LIBRARIES}) - -add_executable(main2 WIN32 popup.cpp) - -find_package(wxWidgets CONFIG REQUIRED) -target_link_libraries(main2 PRIVATE wx::core wx::base) - -option(USE_WXRC "Use the wxrc resource compiler" ON) -if(USE_WXRC) - execute_process( - COMMAND "${wxWidgets_wxrc_EXECUTABLE}" --help - RESULTS_VARIABLE error_result - ) - if(error_result) - message(FATAL_ERROR "Failed to run wxWidgets_wxrc_EXECUTABLE (${wxWidgets_wxrc_EXECUTABLE})") - endif() -endif() - -set(PRINT_VARS "" CACHE STRING "Variables to print at the end of configuration") -foreach(var IN LISTS PRINT_VARS) - message(STATUS "${var}:=${${var}}") -endforeach() diff --git a/dependencies/vcpkg_overlay_ports/wxwidgets/fix-libs-export.patch b/dependencies/vcpkg_overlay_ports/wxwidgets/fix-libs-export.patch deleted file mode 100644 index 064c3a7e..00000000 --- a/dependencies/vcpkg_overlay_ports/wxwidgets/fix-libs-export.patch +++ /dev/null @@ -1,21 +0,0 @@ -diff --git a/build/cmake/config.cmake b/build/cmake/config.cmake -index 52ae69d3f6..f261d5d262 100644 ---- a/build/cmake/config.cmake -+++ b/build/cmake/config.cmake -@@ -39,8 +39,14 @@ macro(wx_get_dependencies var lib) - else() - # For the value like $<$:LIB_PATH> - # Or $<$>:LIB_PATH> -- string(REGEX REPLACE "^.+>:(.+)>$" "\\1" dep_name ${dep}) -- if (NOT dep_name) -+ if(dep MATCHES "^(.+>):(.+)>$") -+ if(CMAKE_BUILD_TYPE STREQUAL "Debug" AND CMAKE_MATCH_1 STREQUAL [[$<$>]]) -+ continue() -+ elseif(CMAKE_BUILD_TYPE STREQUAL "Release" AND CMAKE_MATCH_1 STREQUAL [[$<$]]) -+ continue() -+ endif() -+ set(dep_name "${CMAKE_MATCH_2}") -+ else() - set(dep_name ${dep}) - endif() - endif() diff --git a/dependencies/vcpkg_overlay_ports/wxwidgets/fix-pcre2.patch b/dependencies/vcpkg_overlay_ports/wxwidgets/fix-pcre2.patch deleted file mode 100644 index 20063f44..00000000 --- a/dependencies/vcpkg_overlay_ports/wxwidgets/fix-pcre2.patch +++ /dev/null @@ -1,23 +0,0 @@ -diff --git a/build/cmake/modules/FindPCRE2.cmake b/build/cmake/modules/FindPCRE2.cmake -index a27693a..455675a 100644 ---- a/build/cmake/modules/FindPCRE2.cmake -+++ b/build/cmake/modules/FindPCRE2.cmake -@@ -24,7 +24,10 @@ set(PCRE2_CODE_UNIT_WIDTH_USED "${PCRE2_CODE_UNIT_WIDTH}" CACHE INTERNAL "") - - find_package(PkgConfig QUIET) - pkg_check_modules(PC_PCRE2 QUIET libpcre2-${PCRE2_CODE_UNIT_WIDTH}) -+set(PCRE2_LIBRARIES ${PC_PCRE2_LINK_LIBRARIES}) -+set(PCRE2_INCLUDE_DIRS ${PC_PCRE2_INCLUDE_DIRS}) - -+if (0) - find_path(PCRE2_INCLUDE_DIRS - NAMES pcre2.h - HINTS ${PC_PCRE2_INCLUDEDIR} -@@ -36,6 +39,7 @@ find_library(PCRE2_LIBRARIES - HINTS ${PC_PCRE2_LIBDIR} - ${PC_PCRE2_LIBRARY_DIRS} - ) -+endif() - - include(FindPackageHandleStandardArgs) - FIND_PACKAGE_HANDLE_STANDARD_ARGS(PCRE2 REQUIRED_VARS PCRE2_LIBRARIES PCRE2_INCLUDE_DIRS VERSION_VAR PC_PCRE2_VERSION) diff --git a/dependencies/vcpkg_overlay_ports/wxwidgets/gtk3-link-libraries.patch b/dependencies/vcpkg_overlay_ports/wxwidgets/gtk3-link-libraries.patch deleted file mode 100644 index fe2736b2..00000000 --- a/dependencies/vcpkg_overlay_ports/wxwidgets/gtk3-link-libraries.patch +++ /dev/null @@ -1,12 +0,0 @@ -diff --git a/build/cmake/modules/FindGTK3.cmake b/build/cmake/modules/FindGTK3.cmake -index d2939a1..daf33fe 100644 ---- a/build/cmake/modules/FindGTK3.cmake -+++ b/build/cmake/modules/FindGTK3.cmake -@@ -47,6 +47,7 @@ include(CheckSymbolExists) - set(CMAKE_REQUIRED_INCLUDES ${GTK3_INCLUDE_DIRS}) - check_symbol_exists(GDK_WINDOWING_WAYLAND "gdk/gdk.h" wxHAVE_GDK_WAYLAND) - check_symbol_exists(GDK_WINDOWING_X11 "gdk/gdk.h" wxHAVE_GDK_X11) -+set(GTK3_LIBRARIES "${GTK3_LINK_LIBRARIES}" CACHE INTERNAL "") - include(FindPackageHandleStandardArgs) - FIND_PACKAGE_HANDLE_STANDARD_ARGS(GTK3 DEFAULT_MSG GTK3_INCLUDE_DIRS GTK3_LIBRARIES VERSION_OK) - diff --git a/dependencies/vcpkg_overlay_ports/wxwidgets/install-layout.patch b/dependencies/vcpkg_overlay_ports/wxwidgets/install-layout.patch deleted file mode 100644 index e55381d3..00000000 --- a/dependencies/vcpkg_overlay_ports/wxwidgets/install-layout.patch +++ /dev/null @@ -1,52 +0,0 @@ -diff --git a/build/cmake/functions.cmake b/build/cmake/functions.cmake -index 32bd959..74f31ed 100644 ---- a/build/cmake/functions.cmake -+++ b/build/cmake/functions.cmake -@@ -418,7 +418,7 @@ macro(wx_add_library name) - set_target_properties(${name} PROPERTIES PROJECT_LABEL ${name_short}) - - # Setup install -- set(runtime_dir "lib") -+ set(runtime_dir "bin") - if(WIN32 AND NOT WIN32_MSVC_NAMING) - # configure puts the .dll in the bin directory - set(runtime_dir "bin") -diff --git a/build/cmake/init.cmake b/build/cmake/init.cmake -index 3ff14ab..7bd00d3 100644 ---- a/build/cmake/init.cmake -+++ b/build/cmake/init.cmake -@@ -146,7 +146,7 @@ if(WIN32) - endif() - endif() - --if(WIN32_MSVC_NAMING) -+if(0) - if(wxBUILD_SHARED) - set(lib_suffix "_dll") - else() -diff --git a/build/cmake/install.cmake b/build/cmake/install.cmake -index 84cb9f5..e2f460a 100644 ---- a/build/cmake/install.cmake -+++ b/build/cmake/install.cmake -@@ -48,7 +48,7 @@ else() - - install(DIRECTORY DESTINATION "bin") - install(CODE "execute_process( \ -- COMMAND ${CMAKE_COMMAND} -E create_symlink \ -+ COMMAND ${CMAKE_COMMAND} -E copy \ - ${CMAKE_INSTALL_PREFIX}/lib/wx/config/${wxBUILD_FILE_ID} \ - ${CMAKE_INSTALL_PREFIX}/bin/wx-config \ - )" -diff --git a/build/cmake/utils/CMakeLists.txt b/build/cmake/utils/CMakeLists.txt -index d6b3465..870897b 100644 ---- a/build/cmake/utils/CMakeLists.txt -+++ b/build/cmake/utils/CMakeLists.txt -@@ -38,7 +38,7 @@ if(wxUSE_XRC) - endif() - - wx_install(CODE "execute_process( \ -- COMMAND ${CMAKE_COMMAND} -E create_symlink \ -+ COMMAND ${CMAKE_COMMAND} -E copy \ - ${CMAKE_INSTALL_PREFIX}/bin/${wxrc_output_name}${EXE_SUFFIX} \ - ${CMAKE_INSTALL_PREFIX}/bin/wxrc${EXE_SUFFIX} \ - )" diff --git a/dependencies/vcpkg_overlay_ports/wxwidgets/nanosvg-ext-depend.patch b/dependencies/vcpkg_overlay_ports/wxwidgets/nanosvg-ext-depend.patch deleted file mode 100644 index 029b8c70..00000000 --- a/dependencies/vcpkg_overlay_ports/wxwidgets/nanosvg-ext-depend.patch +++ /dev/null @@ -1,42 +0,0 @@ -diff --git a/build/cmake/lib/nanosvg.cmake b/build/cmake/lib/nanosvg.cmake -index 401bf48..b9e4b57 100644 ---- a/build/cmake/lib/nanosvg.cmake -+++ b/build/cmake/lib/nanosvg.cmake -@@ -16,9 +16,9 @@ elseif(wxUSE_NANOSVG) - set(NANOSVG_INCLUDE_DIRS ) - set(wxUSE_NANOSVG_EXTERNAL_ENABLE_IMPL TRUE) - -- find_package(NanoSVG REQUIRED) -+ find_package(unofficial-nanosvg CONFIG REQUIRED) - -- foreach(TARGETNAME NanoSVG::nanosvg NanoSVG::nanosvgrast unofficial::nanosvg) -+ foreach(TARGETNAME unofficial::nanosvg::nanosvg) - if(NOT TARGET ${TARGETNAME}) - continue() - endif() -diff --git a/build/cmake/options.cmake b/build/cmake/options.cmake -index 49c536a..5630526 100644 ---- a/build/cmake/options.cmake -+++ b/build/cmake/options.cmake -@@ -114,7 +114,7 @@ wx_add_thirdparty_library(wxUSE_EXPAT EXPAT "use expat for XML parsing" DEFAULT_ - wx_add_thirdparty_library(wxUSE_LIBJPEG JPEG "use libjpeg (JPEG file format)") - wx_add_thirdparty_library(wxUSE_LIBPNG PNG "use libpng (PNG image format)") - wx_add_thirdparty_library(wxUSE_LIBTIFF TIFF "use libtiff (TIFF file format)") --wx_add_thirdparty_library(wxUSE_NANOSVG NanoSVG "use NanoSVG for rasterizing SVG") -+wx_add_thirdparty_library(wxUSE_NANOSVG unofficial-nanosvg "use NanoSVG for rasterizing SVG") - - wx_option(wxUSE_LIBLZMA "use LZMA compression" OFF) - set(wxTHIRD_PARTY_LIBRARIES ${wxTHIRD_PARTY_LIBRARIES} wxUSE_LIBLZMA "use liblzma for LZMA compression") -diff --git a/build/cmake/wxWidgetsConfig.cmake.in b/build/cmake/wxWidgetsConfig.cmake.in -index 6ef5a6e..248a701 100644 ---- a/build/cmake/wxWidgetsConfig.cmake.in -+++ b/build/cmake/wxWidgetsConfig.cmake.in -@@ -1,5 +1,8 @@ - @PACKAGE_INIT@ - -+include(CMakeFindDependencyMacro) -+find_dependency(unofficial-nanosvg CONFIG) -+ - # determine target from compiler, platform and library type - if(WIN32 AND NOT CYGWIN AND NOT MSYS) - if(${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC) diff --git a/dependencies/vcpkg_overlay_ports/wxwidgets/portfile.cmake b/dependencies/vcpkg_overlay_ports/wxwidgets/portfile.cmake deleted file mode 100644 index 984fc083..00000000 --- a/dependencies/vcpkg_overlay_ports/wxwidgets/portfile.cmake +++ /dev/null @@ -1,236 +0,0 @@ -vcpkg_from_github( - OUT_SOURCE_PATH SOURCE_PATH - REPO wxWidgets/wxWidgets - REF v3.2.0 - SHA512 0bb40ccab51f5e83a38feeaf462c9d1852f821d19592328327f829890d89a3abb2a991c43cdbac55da8f5ee40aab8bd5fea6abcd052198302770292f92f9f9ad - HEAD_REF master - PATCHES - install-layout.patch - relocatable-wx-config.patch - fix-libs-export.patch - fix-pcre2.patch - gtk3-link-libraries.patch -) - -if(VCPKG_TARGET_IS_LINUX) - message(WARNING [[ -Port wxwidgets currently requires the following packages from the system package manager: - pkg-config - GTK 3 - libsecret - libgcrypt - libsystemd -These development packages can be installed on Ubuntu systems via - sudo apt-get install pkg-config libgtk-3-dev libsecret-1-dev libgcrypt20-dev libsystemd-dev -]]) - foreach(conflicting_port IN ITEMS freetype glib) - if(EXISTS "${CURRENT_INSTALLED_DIR}/share/${conflicting_port}/copyright") - message(FATAL_ERROR "Port ${conflicting_port} must not be installed when building ${PORT}:${TARGET_TRIPLET}.") - endif() - endforeach() -endif() - -vcpkg_check_features( - OUT_FEATURE_OPTIONS FEATURE_OPTIONS - FEATURES - sound wxUSE_SOUND - fonts wxUSE_PRIVATE_FONTS -) - -set(OPTIONS_RELEASE "") -if(NOT "debug-support" IN_LIST FEATURES) - list(APPEND OPTIONS_RELEASE "-DwxBUILD_DEBUG_LEVEL=0") -endif() - -set(OPTIONS "") -if(VCPKG_TARGET_IS_WINDOWS AND (VCPKG_TARGET_ARCHITECTURE STREQUAL "arm64" OR VCPKG_TARGET_ARCHITECTURE STREQUAL "arm")) - list(APPEND OPTIONS - -DwxUSE_OPENGL=OFF - -DwxUSE_STACKWALKER=OFF - ) -endif() - -if(VCPKG_TARGET_IS_WINDOWS OR VCPKG_TARGET_IS_OSX) - list(APPEND OPTIONS -DwxUSE_WEBREQUEST_CURL=OFF) -else() - list(APPEND OPTIONS -DwxUSE_WEBREQUEST_CURL=ON) -endif() - -if(DEFINED ENV{PKG_CONFIG}) - set(PKGCONFIG "$ENV{PKG_CONFIG}") -elseif(VCPKG_TARGET_IS_LINUX AND NOT VCPKG_CROSSCOMPILING) - # wxWidgets on Linux currently needs to find the system's `gtk+-3.0.pc`. - # vcpkg's port pkgconf would prevent this lookup. - find_program(system_pkg_config NAMES pkg-config) - if(system_pkg_config) - set(PKGCONFIG "${system_pkg_config}") - endif() - if(VCPKG_LIBRARY_LINKAGE STREQUAL "static") - list(APPEND OPTIONS -DPKG_CONFIG_ARGN=--static) - endif() -endif() -vcpkg_find_acquire_program(PKGCONFIG) - -# This may be set to ON by users in a custom triplet. -# The use of 'wxUSE_STL' and 'WXWIDGETS_USE_STD_CONTAINERS' (ON or OFF) are not API compatible -# which is why they must be set in a custom triplet rather than a port feature. -if(NOT DEFINED WXWIDGETS_USE_STL) - #set(WXWIDGETS_USE_STL OFF) -endif() - -if(NOT DEFINED WXWIDGETS_USE_STD_CONTAINERS) - set(WXWIDGETS_USE_STD_CONTAINERS OFF) -endif() - -vcpkg_cmake_configure( - SOURCE_PATH "${SOURCE_PATH}" - OPTIONS - ${FEATURE_OPTIONS} - -DwxUSE_REGEX=sys - -DwxUSE_ZLIB=sys - -DwxUSE_EXPAT=sys - -DwxUSE_LIBJPEG=sys - -DwxUSE_LIBPNG=sys - -DwxUSE_LIBTIFF=sys - -DwxUSE_SECRETSTORE=FALSE - -DwxUSE_STL=ON - -DwxUSE_STD_CONTAINERS=${WXWIDGETS_USE_STD_CONTAINERS} - -DwxBUILD_DISABLE_PLATFORM_LIB_DIR=ON - -DwxUSE_LIBLZMA=OFF - -DwxUSE_JOYSTICK=OFF - -DwxUSE_SOCKETS=OFF - -DwxUSE_IPV6=OFF - -DwxUSE_FS_ZIP=OFF - -DwxUSE_FS_ARCHIVE=OFF - -DwxUSE_FS_INET=OFF - -DwxUSE_ARCHIVE_STREAMS=OFF - -DwxUSE_ZIPSTREAM=OFF - -DwxUSE_TARSTREAM=OFF - -DwxUSE_PROTOCOL=OFF - -DwxUSE_PROTOCOL_FTP=OFF - -DwxUSE_PROTOCOL_HTTP=OFF - -DwxUSE_URL=OFF - -DwxUSE_SOUND=OFF - -DwxUSE_WEBVIEW=OFF - -DwxUSE_RICHTEXT=OFF - -DwxUSE_SVG=OFF - -DwxUSE_GIF=OFF - -DwxUSE_PNM=OFF - -DwxUSE_PCX=OFF - -DwxUSE_WEBREQUEST=OFF - -DwxUSE_ACTIVEX=OFF - -DwxUSE_REGEX=OFF - -DwxUSE_NANOSVG=OFF - -DwxUSE_NANOSVG_EXTERNAL=OFF - ${OPTIONS} - "-DPKG_CONFIG_EXECUTABLE=${PKGCONFIG}" - # The minimum cmake version requirement for Cotire is 2.8.12. - # however, we need to declare that the minimum cmake version requirement is at least 3.1 to use CMAKE_PREFIX_PATH as the path to find .pc. - -DPKG_CONFIG_USE_CMAKE_PREFIX_PATH=ON - OPTIONS_RELEASE - ${OPTIONS_RELEASE} -) - -vcpkg_cmake_install() -vcpkg_cmake_config_fixup(CONFIG_PATH lib/cmake/wxWidgets) - -# The CMake export is not ready for use: It lacks a config file. -file(REMOVE_RECURSE - ${CURRENT_PACKAGES_DIR}/lib/cmake - ${CURRENT_PACKAGES_DIR}/debug/lib/cmake -) - -set(tools wxrc) -if(NOT VCPKG_TARGET_IS_WINDOWS OR NOT VCPKG_HOST_IS_WINDOWS) - list(APPEND tools wxrc-3.2) - file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/tools/${PORT}") - file(RENAME "${CURRENT_PACKAGES_DIR}/bin/wx-config" "${CURRENT_PACKAGES_DIR}/tools/${PORT}/wx-config") - if(NOT VCPKG_BUILD_TYPE) - file(MAKE_DIRECTORY "${CURRENT_PACKAGES_DIR}/tools/${PORT}/debug") - file(RENAME "${CURRENT_PACKAGES_DIR}/debug/bin/wx-config" "${CURRENT_PACKAGES_DIR}/tools/${PORT}/debug/wx-config") - endif() -endif() -vcpkg_copy_tools(TOOL_NAMES ${tools} AUTO_CLEAN) - -# do the copy pdbs now after the dlls got moved to the expected /bin folder above -vcpkg_copy_pdbs() - -file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/include/msvc") -file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include") -file(GLOB_RECURSE INCLUDES "${CURRENT_PACKAGES_DIR}/include/*.h") -if(EXISTS "${CURRENT_PACKAGES_DIR}/lib/mswu/wx/setup.h") - list(APPEND INCLUDES "${CURRENT_PACKAGES_DIR}/lib/mswu/wx/setup.h") -endif() -if(EXISTS "${CURRENT_PACKAGES_DIR}/debug/lib/mswud/wx/setup.h") - list(APPEND INCLUDES "${CURRENT_PACKAGES_DIR}/debug/lib/mswud/wx/setup.h") -endif() -foreach(INC IN LISTS INCLUDES) - file(READ "${INC}" _contents) - if(VCPKG_LIBRARY_LINKAGE STREQUAL "static") - string(REPLACE "defined(WXUSINGDLL)" "0" _contents "${_contents}") - else() - string(REPLACE "defined(WXUSINGDLL)" "1" _contents "${_contents}") - endif() - # Remove install prefix from setup.h to ensure package is relocatable - string(REGEX REPLACE "\n#define wxINSTALL_PREFIX [^\n]*" "\n#define wxINSTALL_PREFIX \"\"" _contents "${_contents}") - file(WRITE "${INC}" "${_contents}") -endforeach() - -if(NOT EXISTS "${CURRENT_PACKAGES_DIR}/include/wx/setup.h") - file(GLOB_RECURSE WX_SETUP_H_FILES_DBG "${CURRENT_PACKAGES_DIR}/debug/lib/*.h") - file(GLOB_RECURSE WX_SETUP_H_FILES_REL "${CURRENT_PACKAGES_DIR}/lib/*.h") - - if(NOT DEFINED VCPKG_BUILD_TYPE OR VCPKG_BUILD_TYPE STREQUAL "release") - vcpkg_replace_string("${WX_SETUP_H_FILES_REL}" "${CURRENT_PACKAGES_DIR}" "") - - string(REPLACE "${CURRENT_PACKAGES_DIR}/lib/" "" WX_SETUP_H_FILES_REL "${WX_SETUP_H_FILES_REL}") - string(REPLACE "/setup.h" "" WX_SETUP_H_REL_RELATIVE "${WX_SETUP_H_FILES_REL}") - endif() - if(NOT DEFINED VCPKG_BUILD_TYPE OR VCPKG_BUILD_TYPE STREQUAL "debug") - vcpkg_replace_string("${WX_SETUP_H_FILES_DBG}" "${CURRENT_PACKAGES_DIR}" "") - - string(REPLACE "${CURRENT_PACKAGES_DIR}/debug/lib/" "" WX_SETUP_H_FILES_DBG "${WX_SETUP_H_FILES_DBG}") - string(REPLACE "/setup.h" "" WX_SETUP_H_DBG_RELATIVE "${WX_SETUP_H_FILES_DBG}") - endif() - - configure_file("${CMAKE_CURRENT_LIST_DIR}/setup.h.in" "${CURRENT_PACKAGES_DIR}/include/wx/setup.h" @ONLY) -endif() - -file(GLOB configs LIST_DIRECTORIES false "${CURRENT_PACKAGES_DIR}/lib/wx/config/*" "${CURRENT_PACKAGES_DIR}/tools/${PORT}/wx-config") -foreach(config IN LISTS configs) - vcpkg_replace_string("${config}" "${CURRENT_INSTALLED_DIR}" [[${prefix}]]) -endforeach() -file(GLOB configs LIST_DIRECTORIES false "${CURRENT_PACKAGES_DIR}/debug/lib/wx/config/*" "${CURRENT_PACKAGES_DIR}/tools/${PORT}/debug/wx-config") -foreach(config IN LISTS configs) - vcpkg_replace_string("${config}" "${CURRENT_INSTALLED_DIR}/debug" [[${prefix}]]) -endforeach() - -# For CMake multi-config in connection with wrapper -if(EXISTS "${CURRENT_PACKAGES_DIR}/debug/lib/mswud/wx/setup.h") - file(INSTALL "${CURRENT_PACKAGES_DIR}/debug/lib/mswud/wx/setup.h" - DESTINATION "${CURRENT_PACKAGES_DIR}/lib/mswud/wx" - ) -endif() - -if(NOT "debug-support" IN_LIST FEATURES) - if(VCPKG_TARGET_IS_WINDOWS AND VCPKG_HOST_IS_WINDOWS) - vcpkg_replace_string("${CURRENT_PACKAGES_DIR}/include/wx/debug.h" "#define wxDEBUG_LEVEL 1" "#define wxDEBUG_LEVEL 0") - else() - vcpkg_replace_string("${CURRENT_PACKAGES_DIR}/include/wx-3.2/wx/debug.h" "#define wxDEBUG_LEVEL 1" "#define wxDEBUG_LEVEL 0") - endif() -endif() - -if("example" IN_LIST FEATURES) - file(INSTALL - "${CMAKE_CURRENT_LIST_DIR}/example/CMakeLists.txt" - "${SOURCE_PATH}/samples/popup/popup.cpp" - "${SOURCE_PATH}/samples/sample.xpm" - DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}/example" - ) - vcpkg_replace_string("${CURRENT_PACKAGES_DIR}/share/${PORT}/example/popup.cpp" "../sample.xpm" "sample.xpm") -endif() - -configure_file("${CMAKE_CURRENT_LIST_DIR}/vcpkg-cmake-wrapper.cmake" "${CURRENT_PACKAGES_DIR}/share/${PORT}/vcpkg-cmake-wrapper.cmake" @ONLY) - -file(INSTALL "${CMAKE_CURRENT_LIST_DIR}/usage" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}") -file(INSTALL "${SOURCE_PATH}/docs/licence.txt" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}" RENAME copyright) diff --git a/dependencies/vcpkg_overlay_ports/wxwidgets/relocatable-wx-config.patch b/dependencies/vcpkg_overlay_ports/wxwidgets/relocatable-wx-config.patch deleted file mode 100644 index 8a5e7c4d..00000000 --- a/dependencies/vcpkg_overlay_ports/wxwidgets/relocatable-wx-config.patch +++ /dev/null @@ -1,49 +0,0 @@ -diff --git a/wx-config.in b/wx-config.in -index 441f88c..b326867 100755 ---- a/wx-config.in -+++ b/wx-config.in -@@ -91,7 +91,7 @@ EOF - - - # Contentious tools determined by configure. --EGREP="@EGREP@" -+EGREP="grep -E" # no absolute path from host - - - # For the people who know what they want, or think they do: -@@ -402,8 +402,23 @@ is_cross() { [ "x@cross_compiling@" = "xyes" ]; } - - - # Determine the base directories we require. --prefix=${input_option_prefix-${this_prefix:-@prefix@}} --exec_prefix=${input_option_exec_prefix-${input_option_prefix-${this_exec_prefix:-@exec_prefix@}}} -+vcpkg_prefix=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd -P) -+case "$vcpkg_prefix" in -+ */lib/wx/config) -+ vcpkg_prefix=${vcpkg_prefix%/*/*/*} -+ ;; -+ */tools/wxwidgets/debug) -+ vcpkg_prefix=${vcpkg_prefix%/*/*/*}/debug -+ ;; -+ */tools/wxwidgets) -+ vcpkg_prefix=${vcpkg_prefix%/*/*} -+ ;; -+esac -+if [ -n "@MINGW@" -a -n "@CMAKE_HOST_WIN32@" ]; then -+ vcpkg_prefix=$(cygpath -m "$vcpkg_prefix") -+fi -+prefix=${input_option_prefix-${this_prefix:-$vcpkg_prefix}} -+exec_prefix=${input_option_exec_prefix-${input_option_prefix-${this_exec_prefix:-$prefix}}} - wxconfdir="@libdir@/wx/config" - - installed_configs=`cd "$wxconfdir" 2> /dev/null && ls | grep -v "^inplace-"` -@@ -940,6 +949,9 @@ prefix=${this_prefix-$prefix} - exec_prefix=${this_exec_prefix-$exec_prefix} - - includedir="@includedir@" -+if [ "@CMAKE_BUILD_TYPE@" = "Debug" ] ; then -+ includedir="${includedir%/debug/include}/include" -+fi - libdir="@libdir@" - bindir="@bindir@" - diff --git a/dependencies/vcpkg_overlay_ports/wxwidgets/setup.h.in b/dependencies/vcpkg_overlay_ports/wxwidgets/setup.h.in deleted file mode 100644 index b927735b..00000000 --- a/dependencies/vcpkg_overlay_ports/wxwidgets/setup.h.in +++ /dev/null @@ -1,5 +0,0 @@ -#ifdef _DEBUG -#include "../../debug/lib/@WX_SETUP_H_DBG_RELATIVE@/setup.h" -#else -#include "../../lib/@WX_SETUP_H_REL_RELATIVE@/setup.h" -#endif diff --git a/dependencies/vcpkg_overlay_ports/wxwidgets/usage b/dependencies/vcpkg_overlay_ports/wxwidgets/usage deleted file mode 100644 index bf1043e8..00000000 --- a/dependencies/vcpkg_overlay_ports/wxwidgets/usage +++ /dev/null @@ -1,4 +0,0 @@ -The package wxwidgets provides CMake targets: - - find_package(wxWidgets CONFIG REQUIRED) - target_link_libraries(main PRIVATE wx::core wx::base) diff --git a/dependencies/vcpkg_overlay_ports/wxwidgets/vcpkg-cmake-wrapper.cmake b/dependencies/vcpkg_overlay_ports/wxwidgets/vcpkg-cmake-wrapper.cmake deleted file mode 100644 index b605525a..00000000 --- a/dependencies/vcpkg_overlay_ports/wxwidgets/vcpkg-cmake-wrapper.cmake +++ /dev/null @@ -1,77 +0,0 @@ -cmake_policy(PUSH) -cmake_policy(SET CMP0012 NEW) -cmake_policy(SET CMP0054 NEW) - -get_filename_component(_vcpkg_wx_root "${CMAKE_CURRENT_LIST_DIR}/../.." ABSOLUTE) -set(wxWidgets_ROOT_DIR "${_vcpkg_wx_root}" CACHE INTERNAL "") -set(WX_ROOT_DIR "${_vcpkg_wx_root}" CACHE INTERNAL "") -unset(_vcpkg_wx_root) - -if(WIN32 AND CMAKE_HOST_WIN32) - # Find all libs with "32" infix which is unknown to FindwxWidgets.cmake - function(z_vcpkg_wxwidgets_find_base_library BASENAME) - find_library(WX_${BASENAME}d wx${BASENAME}32ud NAMES wx${BASENAME}d PATHS "${wxWidgets_ROOT_DIR}/debug/lib" NO_DEFAULT_PATH) - find_library(WX_${BASENAME} wx${BASENAME}32u NAMES wx${BASENAME} PATHS "${wxWidgets_ROOT_DIR}/lib" NO_DEFAULT_PATH REQUIRED) - endfunction() - function(z_vcpkg_wxwidgets_find_suffix_library BASENAME) - foreach(lib IN LISTS ARGN) - find_library(WX_${lib}d NAMES wx${BASENAME}32ud_${lib} PATHS "${wxWidgets_ROOT_DIR}/debug/lib" NO_DEFAULT_PATH) - find_library(WX_${lib} NAMES wx${BASENAME}32u_${lib} PATHS "${wxWidgets_ROOT_DIR}/lib" NO_DEFAULT_PATH) - endforeach() - endfunction() - z_vcpkg_wxwidgets_find_base_library(base) - z_vcpkg_wxwidgets_find_suffix_library(base net odbc xml) - z_vcpkg_wxwidgets_find_suffix_library(msw core adv aui html media xrc dbgrid gl qa richtext stc ribbon propgrid webview) - if(WX_stc AND "@VCPKG_LIBRARY_LINKAGE@" STREQUAL "static") - z_vcpkg_wxwidgets_find_base_library(scintilla) - endif() - # Force FindwxWidgets.cmake win32 mode for all windows targets built on windows - set(_vcpkg_wxwidgets_backup_crosscompiling "${CMAKE_CROSSCOMPILING}") - set(CMAKE_CROSSCOMPILING 0) - set(wxWidgets_LIB_DIR "${wxWidgets_ROOT_DIR}/lib" CACHE INTERNAL "") -else() - # FindwxWidgets.cmake unix mode, single-config - if(MINGW) - # Force FindwxWidgets.cmake unix mode for mingw cross builds - set(_vcpkg_wxwidgets_backup_crosscompiling "${CMAKE_CROSSCOMPILING}") - set(CMAKE_CROSSCOMPILING 1) - endif() - set(_vcpkg_wxconfig "") - if(CMAKE_BUILD_TYPE STREQUAL "Debug" OR "Debug" IN_LIST MAP_IMPORTED_CONFIG_${CMAKE_BUILD_TYPE}) - # Debug - set(wxWidgets_LIB_DIR "${wxWidgets_ROOT_DIR}/debug/lib" CACHE INTERNAL "") - file(GLOB _vcpkg_wxconfig LIST_DIRECTORIES false "${wxWidgets_LIB_DIR}/wx/config/*") - endif() - if(NOT _vcpkg_wxconfig) - # Release or fallback - set(wxWidgets_LIB_DIR "${wxWidgets_ROOT_DIR}/lib" CACHE INTERNAL "") - file(GLOB _vcpkg_wxconfig LIST_DIRECTORIES false "${wxWidgets_LIB_DIR}/wx/config/*") - endif() - set(wxWidgets_CONFIG_EXECUTABLE "${_vcpkg_wxconfig}" CACHE INTERNAL "") - unset(_vcpkg_wxconfig) -endif() -set(WX_LIB_DIR "${wxWidgets_LIB_DIR}" CACHE INTERNAL "") - -_find_package(${ARGS}) - -if(DEFINED _vcpkg_wxwidgets_backup_crosscompiling) - set(CMAKE_CROSSCOMPILING "${_vcpkg_wxwidgets_backup_crosscompiling}") - unset(_vcpkg_wxwidgets_backup_crosscompiling) -endif() - -if(WIN32 AND CMAKE_HOST_WIN32 AND "@VCPKG_LIBRARY_LINKAGE@" STREQUAL "static" AND NOT "wx::core" IN_LIST wxWidgets_LIBRARIES) - find_package(EXPAT QUIET) - find_package(JPEG QUIET) - find_package(PNG QUIET) - find_package(TIFF QUIET) - find_package(ZLIB QUIET) - list(APPEND wxWidgets_LIBRARIES - ${EXPAT_LIBRARIES} - ${JPEG_LIBRARIES} - ${PNG_LIBRARIES} - ${TIFF_LIBRARIES} - ${ZLIB_LIBRARIES} - ) -endif() - -cmake_policy(POP) diff --git a/dependencies/vcpkg_overlay_ports/wxwidgets/vcpkg.json b/dependencies/vcpkg_overlay_ports/wxwidgets/vcpkg.json deleted file mode 100644 index 3f04aece..00000000 --- a/dependencies/vcpkg_overlay_ports/wxwidgets/vcpkg.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "name": "wxwidgets", - "version": "3.2.0", - "port-version": 1, - "description": [ - "Widget toolkit and tools library for creating graphical user interfaces (GUIs) for cross-platform applications. ", - "Set WXWIDGETS_USE_STL in a custom triplet to build with the wxUSE_STL build option.", - "Set WXWIDGETS_USE_STD_CONTAINERS in a custom triplet to build with the wxUSE_STD_CONTAINERS build option." - ], - "homepage": "https://github.com/wxWidgets/wxWidgets", - "license": "LGPL-2.0-or-later WITH WxWindows-exception-3.1", - "supports": "!uwp", - "dependencies": [ - { - "name": "curl", - "default-features": false, - "platform": "!windows & !osx" - }, - "expat", - "libpng", - "tiff", - { - "name": "vcpkg-cmake", - "host": true - }, - { - "name": "vcpkg-cmake-config", - "host": true - }, - "zlib" - ], - "default-features": [ - "debug-support", - "sound" - ], - "features": { - "debug-support": { - "description": "Enable wxWidgets debugging support hooks even for release builds (wxDEBUG_LEVEL 1)" - }, - "example": { - "description": "Example source code and CMake project" - }, - "fonts": { - "description": "Enable to use the font functionality of wxWidgets", - "dependencies": [ - { - "name": "fontconfig", - "platform": "!windows & !osx" - }, - { - "name": "pango", - "platform": "!windows & !osx" - } - ] - }, - "sound": { - "description": "Build wxSound support", - "dependencies": [ - { - "name": "sdl2", - "default-features": false, - "platform": "!windows & !osx" - } - ] - } - } -} diff --git a/dependencies/vcpkg_overlay_ports_linux/.gitkeep b/dependencies/vcpkg_overlay_ports_linux/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/dependencies/vcpkg_overlay_ports_linux/cairo/portfile.cmake b/dependencies/vcpkg_overlay_ports_linux/cairo/portfile.cmake new file mode 100644 index 00000000..065116c2 --- /dev/null +++ b/dependencies/vcpkg_overlay_ports_linux/cairo/portfile.cmake @@ -0,0 +1 @@ +set(VCPKG_POLICY_EMPTY_PACKAGE enabled) diff --git a/dependencies/vcpkg_overlay_ports_linux/cairo/vcpkg.json b/dependencies/vcpkg_overlay_ports_linux/cairo/vcpkg.json new file mode 100644 index 00000000..6d520494 --- /dev/null +++ b/dependencies/vcpkg_overlay_ports_linux/cairo/vcpkg.json @@ -0,0 +1,8 @@ +{ + "name": "cairo", + "version": "1.17.8", + "description": "Cairo is a 2D graphics library with support for multiple output devices. Currently supported output targets include the X Window System (via both Xlib and XCB), Quartz, Win32, image buffers, PostScript, PDF, and SVG file output. Experimental backends include OpenGL, BeOS, OS/2, and DirectFB.", + "homepage": "https://cairographics.org", + "license": "MPL-1.1", + "port-version": 2 +} diff --git a/dependencies/vcpkg_overlay_ports_linux/glm/portfile.cmake b/dependencies/vcpkg_overlay_ports_linux/glm/portfile.cmake new file mode 100644 index 00000000..065116c2 --- /dev/null +++ b/dependencies/vcpkg_overlay_ports_linux/glm/portfile.cmake @@ -0,0 +1 @@ +set(VCPKG_POLICY_EMPTY_PACKAGE enabled) diff --git a/dependencies/vcpkg_overlay_ports_linux/glm/vcpkg.json b/dependencies/vcpkg_overlay_ports_linux/glm/vcpkg.json new file mode 100644 index 00000000..17f5fb1d --- /dev/null +++ b/dependencies/vcpkg_overlay_ports_linux/glm/vcpkg.json @@ -0,0 +1,8 @@ +{ + "name": "glm", + "version": "0.9.9.8", + "port-version": 3, + "description": "OpenGL Mathematics (GLM)", + "homepage": "https://glm.g-truc.net", + "license": "MIT" +} diff --git a/dependencies/vcpkg_overlay_ports_linux/gtk3/portfile.cmake b/dependencies/vcpkg_overlay_ports_linux/gtk3/portfile.cmake new file mode 100644 index 00000000..065116c2 --- /dev/null +++ b/dependencies/vcpkg_overlay_ports_linux/gtk3/portfile.cmake @@ -0,0 +1 @@ +set(VCPKG_POLICY_EMPTY_PACKAGE enabled) diff --git a/dependencies/vcpkg_overlay_ports_linux/gtk3/vcpkg.json b/dependencies/vcpkg_overlay_ports_linux/gtk3/vcpkg.json new file mode 100644 index 00000000..c160ac00 --- /dev/null +++ b/dependencies/vcpkg_overlay_ports_linux/gtk3/vcpkg.json @@ -0,0 +1,8 @@ +{ + "name": "gtk3", + "version": "3.24.34", + "port-version": 5, + "description": "Portable library for creating graphical user interfaces.", + "homepage": "https://www.gtk.org/", + "license": null +} diff --git a/dependencies/vcpkg_overlay_ports_linux/libpng/portfile.cmake b/dependencies/vcpkg_overlay_ports_linux/libpng/portfile.cmake new file mode 100644 index 00000000..065116c2 --- /dev/null +++ b/dependencies/vcpkg_overlay_ports_linux/libpng/portfile.cmake @@ -0,0 +1 @@ +set(VCPKG_POLICY_EMPTY_PACKAGE enabled) diff --git a/dependencies/vcpkg_overlay_ports_linux/libpng/vcpkg.json b/dependencies/vcpkg_overlay_ports_linux/libpng/vcpkg.json new file mode 100644 index 00000000..8c54781c --- /dev/null +++ b/dependencies/vcpkg_overlay_ports_linux/libpng/vcpkg.json @@ -0,0 +1,8 @@ +{ + "name": "libpng", + "version": "1.6.39", + "port-version": 2, + "description": "libpng is a library implementing an interface for reading and writing PNG (Portable Network Graphics) format files", + "homepage": "https://github.com/glennrp/libpng", + "license": "libpng-2.0" +} diff --git a/dependencies/vcpkg_overlay_ports_mac/.gitkeep b/dependencies/vcpkg_overlay_ports_mac/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/dependencies/vcpkg_overlay_ports_mac/libusb/portfile.cmake b/dependencies/vcpkg_overlay_ports_mac/libusb/portfile.cmake new file mode 100644 index 00000000..7b76bba0 --- /dev/null +++ b/dependencies/vcpkg_overlay_ports_mac/libusb/portfile.cmake @@ -0,0 +1,71 @@ +set(VCPKG_LIBRARY_LINKAGE dynamic) + +if(VCPKG_TARGET_IS_LINUX) + message("${PORT} currently requires the following tools and libraries from the system package manager:\n autoreconf\n libudev\n\nThese can be installed on Ubuntu systems via apt-get install autoconf libudev-dev") +endif() + +set(VERSION 1.0.26) +vcpkg_from_github( + OUT_SOURCE_PATH SOURCE_PATH + REPO libusb/libusb + REF fcf0c710ef5911ae37fbbf1b39d48a89f6f14e8a # v1.0.26.11791 2023-03-12 + SHA512 0aa6439f7988487adf2a3bff473fec80b5c722a47f117a60696d2aa25c87cc3f20fb6aaca7c66e49be25db6a35eb0bb5f71ed7b211d1b8ee064c5d7f1b985c73 + HEAD_REF master +) + +if(VCPKG_TARGET_IS_WINDOWS AND NOT VCPKG_TARGET_IS_MINGW) + + if(VCPKG_LIBRARY_LINKAGE STREQUAL "dynamic") + set(LIBUSB_PROJECT_TYPE dll) + else() + set(LIBUSB_PROJECT_TYPE static) + endif() + + # The README.md file in the archive is a symlink to README + # which causes issues with the windows MSBUILD process + file(REMOVE "${SOURCE_PATH}/README.md") + + vcpkg_msbuild_install( + SOURCE_PATH "${SOURCE_PATH}" + PROJECT_SUBPATH msvc/libusb_${LIBUSB_PROJECT_TYPE}.vcxproj + ) + + file(INSTALL "${SOURCE_PATH}/libusb/libusb.h" DESTINATION "${CURRENT_PACKAGES_DIR}/include/libusb-1.0") + set(prefix "") + set(exec_prefix [[${prefix}]]) + set(libdir [[${prefix}/lib]]) + set(includedir [[${prefix}/include]]) + configure_file("${SOURCE_PATH}/libusb-1.0.pc.in" "${CURRENT_PACKAGES_DIR}/lib/pkgconfig/libusb-1.0.pc" @ONLY) + vcpkg_replace_string("${CURRENT_PACKAGES_DIR}/lib/pkgconfig/libusb-1.0.pc" " -lusb-1.0" " -llibusb-1.0") + if(NOT VCPKG_BUILD_TYPE) + set(includedir [[${prefix}/../include]]) + configure_file("${SOURCE_PATH}/libusb-1.0.pc.in" "${CURRENT_PACKAGES_DIR}/debug/lib/pkgconfig/libusb-1.0.pc" @ONLY) + vcpkg_replace_string("${CURRENT_PACKAGES_DIR}/debug/lib/pkgconfig/libusb-1.0.pc" " -lusb-1.0" " -llibusb-1.0") + endif() +else() + vcpkg_list(SET MAKE_OPTIONS) + vcpkg_list(SET LIBUSB_LINK_LIBRARIES) + if(VCPKG_TARGET_IS_EMSCRIPTEN) + vcpkg_list(APPEND MAKE_OPTIONS BUILD_TRIPLET --host=wasm32) + endif() + if("udev" IN_LIST FEATURES) + vcpkg_list(APPEND MAKE_OPTIONS "--enable-udev") + vcpkg_list(APPEND LIBUSB_LINK_LIBRARIES udev) + else() + vcpkg_list(APPEND MAKE_OPTIONS "--disable-udev") + endif() + vcpkg_configure_make( + SOURCE_PATH "${SOURCE_PATH}" + AUTOCONFIG + OPTIONS + ${MAKE_OPTIONS} + "--enable-examples-build=no" + "--enable-tests-build=no" + ) + vcpkg_install_make() +endif() + +vcpkg_fixup_pkgconfig() + +file(INSTALL "${CMAKE_CURRENT_LIST_DIR}/usage" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}") +vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/COPYING") diff --git a/dependencies/vcpkg_overlay_ports_mac/libusb/usage b/dependencies/vcpkg_overlay_ports_mac/libusb/usage new file mode 100644 index 00000000..87e6e860 --- /dev/null +++ b/dependencies/vcpkg_overlay_ports_mac/libusb/usage @@ -0,0 +1,5 @@ +libusb can be imported via CMake FindPkgConfig module: + find_package(PkgConfig REQUIRED) + pkg_check_modules(libusb REQUIRED IMPORTED_TARGET libusb-1.0) + + target_link_libraries(main PRIVATE PkgConfig::libusb) diff --git a/dependencies/vcpkg_overlay_ports_mac/libusb/vcpkg.json b/dependencies/vcpkg_overlay_ports_mac/libusb/vcpkg.json new file mode 100644 index 00000000..efc70f3d --- /dev/null +++ b/dependencies/vcpkg_overlay_ports_mac/libusb/vcpkg.json @@ -0,0 +1,8 @@ +{ + "name": "libusb", + "version": "1.0.26.11791", + "port-version": 7, + "description": "a cross-platform library to access USB devices", + "homepage": "https://github.com/libusb/libusb", + "license": "LGPL-2.1-or-later" +} diff --git a/dependencies/xbyak_aarch64 b/dependencies/xbyak_aarch64 new file mode 160000 index 00000000..904b8923 --- /dev/null +++ b/dependencies/xbyak_aarch64 @@ -0,0 +1 @@ +Subproject commit 904b8923457f3ec0d6f82ea2d6832a792851194d diff --git a/dist/linux/appimage.sh b/dist/linux/appimage.sh new file mode 100755 index 00000000..b66326d7 --- /dev/null +++ b/dist/linux/appimage.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +if [[ -z "${GITHUB_WORKSPACE}" ]]; then + export GITHUB_WORKSPACE="." +fi + +curl -sSfLO "https://github.com/linuxdeploy/linuxdeploy/releases/download/continuous/linuxdeploy-x86_64.AppImage" +chmod a+x linuxdeploy*.AppImage +curl -sSfL https://github.com"$(curl https://github.com/probonopd/go-appimage/releases/expanded_assets/continuous | grep "mkappimage-.*-x86_64.AppImage" | head -n 1 | cut -d '"' -f 2)" -o mkappimage.AppImage +chmod a+x mkappimage.AppImage +curl -sSfLO "https://raw.githubusercontent.com/linuxdeploy/linuxdeploy-plugin-gtk/master/linuxdeploy-plugin-gtk.sh" +chmod a+x linuxdeploy-plugin-gtk.sh +curl -sSfLO "https://github.com/darealshinji/linuxdeploy-plugin-checkrt/releases/download/continuous/linuxdeploy-plugin-checkrt.sh" +chmod a+x linuxdeploy-plugin-checkrt.sh + +if [[ ! -e /usr/lib/x86_64-linux-gnu ]]; then + sed -i 's#lib\/x86_64-linux-gnu#lib64#g' linuxdeploy-plugin-gtk.sh +fi + +mkdir -p AppDir/usr/bin +mkdir -p AppDir/usr/share/Cemu +mkdir -p AppDir/usr/share/applications +mkdir -p AppDir/usr/share/icons/hicolor/128x128/apps +mkdir -p AppDir/usr/share/metainfo +mkdir -p AppDir/usr/lib + +cp dist/linux/info.cemu.Cemu.{desktop,png} AppDir/ +cp dist/linux/info.cemu.Cemu.metainfo.xml AppDir/usr/share/metainfo/info.cemu.Cemu.appdata.xml + +cp -r bin/* AppDir/usr/share/Cemu + +mv AppDir/usr/share/Cemu/Cemu AppDir/usr/bin/ +chmod +x AppDir/usr/bin/Cemu + +cp /usr/lib/x86_64-linux-gnu/{libsepol.so.1,libffi.so.7,libpcre.so.3,libGLU.so.1,libthai.so.0} AppDir/usr/lib + +export UPD_INFO="gh-releases-zsync|cemu-project|Cemu|ci|Cemu.AppImage.zsync" +export NO_STRIP=1 +./linuxdeploy-x86_64.AppImage --appimage-extract-and-run \ + --appdir="${GITHUB_WORKSPACE}"/AppDir/ \ + -d "${GITHUB_WORKSPACE}"/AppDir/info.cemu.Cemu.desktop \ + -i "${GITHUB_WORKSPACE}"/AppDir/info.cemu.Cemu.png \ + -e "${GITHUB_WORKSPACE}"/AppDir/usr/bin/Cemu \ + --plugin gtk \ + --plugin checkrt + +if ! GITVERSION="$(git rev-parse --short HEAD 2>/dev/null)"; then + GITVERSION=experimental +fi +echo "Cemu Version Cemu-${GITVERSION}" + +rm AppDir/usr/lib/libwayland-client.so.0 +echo -e "export LC_ALL=C\nexport FONTCONFIG_PATH=/etc/fonts" >> AppDir/apprun-hooks/linuxdeploy-plugin-gtk.sh +VERSION="${GITVERSION}" ./mkappimage.AppImage --appimage-extract-and-run "${GITHUB_WORKSPACE}"/AppDir + +mkdir -p "${GITHUB_WORKSPACE}"/artifacts/ +mv Cemu-"${GITVERSION}"-x86_64.AppImage "${GITHUB_WORKSPACE}"/artifacts/ diff --git a/dist/linux/info.cemu.Cemu.desktop b/dist/linux/info.cemu.Cemu.desktop index 1a25b665..6eeb0120 100644 --- a/dist/linux/info.cemu.Cemu.desktop +++ b/dist/linux/info.cemu.Cemu.desktop @@ -3,7 +3,7 @@ Name=Cemu Type=Application Terminal=false Icon=info.cemu.Cemu -Exec=cemu +Exec=Cemu GenericName=Wii U Emulator GenericName[fi]=Wii U -emulaattori GenericName[el]=Πρόγραμμα προσομοίωσης Wii U @@ -24,3 +24,4 @@ Comment[it]=Software per emulare giochi e applicazioni per Wii U su PC Categories=Game;Emulator; Keywords=Nintendo; MimeType=application/x-wii-u-rom; +StartupWMClass=Cemu diff --git a/dist/linux/info.cemu.Cemu.metainfo.xml b/dist/linux/info.cemu.Cemu.metainfo.xml index d270a8ab..ef59427f 100644 --- a/dist/linux/info.cemu.Cemu.metainfo.xml +++ b/dist/linux/info.cemu.Cemu.metainfo.xml @@ -3,15 +3,15 @@ info.cemu.Cemu Cemu - Software to emulate Wii U games and applications on PC - Software zum emulieren von Wii U Spielen und Anwendungen auf dem PC - Application pour émuler des jeux et applications Wii U sur PC - Applicatie om Wii U spellen en applicaties te emuleren op PC - Πρόγραμμα προσομοίωσης παιχνιδιών και εφαρμογών Wii U στον υπολογιστή - Software para emular juegos y aplicaciones de Wii U en PC - Software para emular jogos e aplicativos de Wii U no PC - Software per emulare giochi e applicazioni per Wii U su PC - Ojelmisto Wii U -pelien ja -sovellusten emulointiin PC:llä + Nintendo Wii U Emulator + Nintendo Wii U Emulator + Émulateur Nintendo Wii U + Nintendo Wii U Emulator + Εξομοιωτής Nintendo Wii U + Emulador de Nintendo Wii U + Emulador Nintendo Wii U + Emulatore Nintendo Wii U + Nintendo Wii U Emulaattori Cemu Project info.cemu.Cemu.desktop CC0-1.0 @@ -52,7 +52,7 @@ - + https://github.com/cemu-project/Cemu/releases/tag/v2.0 @@ -60,14 +60,11 @@ https://github.com/cemu-project/Cemu/issues https://cemu.info/faq.html https://wiki.cemu.info - https://github.com/cemu-project/Cemu + Game Emulator - - 4096 - 8192 diff --git a/dist/linux/info.cemu.Cemu.png b/dist/linux/info.cemu.Cemu.png new file mode 100644 index 00000000..6f655072 Binary files /dev/null and b/dist/linux/info.cemu.Cemu.png differ diff --git a/dist/network_services.xml b/dist/network_services.xml new file mode 100644 index 00000000..0c0f2e3e --- /dev/null +++ b/dist/network_services.xml @@ -0,0 +1,17 @@ + + + CustomExample + 0 + + https://account.nintendo.net + https://ecs.wup.shop.nintendo.net/ecs/services/ECommerceSOAP + https://nus.wup.shop.nintendo.net/nus/services/NetUpdateSOAP + https://ias.wup.shop.nintendo.net/ias/services/IdentityAuthenticationSOAP + https://ccs.wup.shop.nintendo.net/ccs/download + http://ccs.cdn.wup.shop.nintendo.net/ccs/download + https://idbe-wup.cdn.nintendo.net/icondata + https://npts.app.nintendo.net/p01/tasksheet + https://tagaya.wup.shop.nintendo.net/tagaya/versionlist + https://discovery.olv.nintendo.net/v1/endpoint + + \ No newline at end of file diff --git a/dist/windows/Cemu.manifest b/dist/windows/Cemu.manifest new file mode 100644 index 00000000..5ff952b1 --- /dev/null +++ b/dist/windows/Cemu.manifest @@ -0,0 +1,16 @@ + + + + + + + + + + + + + True/PM + + + \ No newline at end of file diff --git a/generate_vs_solution.bat b/generate_vs_solution.bat deleted file mode 100644 index 21060027..00000000 --- a/generate_vs_solution.bat +++ /dev/null @@ -1,2 +0,0 @@ -"C:\PROGRAM FILES\MICROSOFT VISUAL STUDIO\2022\COMMUNITY\COMMON7\IDE\COMMONEXTENSIONS\MICROSOFT\CMAKE\CMake\bin\cmake.exe" -B build/ -pause \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d3bf8aec..04b6dfdd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,11 +11,6 @@ endif() if(MSVC) add_compile_definitions(WIN32_LEAN_AND_MEAN CURL_STATICLIB) - #add_compile_definitions(VK_USE_PLATFORM_WIN32_KHR) - # _CRT_SECURE_NO_WARNINGS - # _WINSOCK_DEPRECATED_NO_WARNINGS - # _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING - # _SILENCE_ALL_CXX17_DEPRECATION_WARNINGS elseif(UNIX) if(APPLE) add_compile_definitions( @@ -28,8 +23,10 @@ elseif(UNIX) VK_USE_PLATFORM_XLIB_KHR # legacy. Do we need to support XLIB surfaces? VK_USE_PLATFORM_XCB_KHR ) + if (ENABLE_WAYLAND) + add_compile_definitions(VK_USE_PLATFORM_WAYLAND_KHR) + endif() endif() - add_compile_options(-maes) # warnings if(CMAKE_C_COMPILER_ID STREQUAL "Clang") add_compile_options(-Wno-ambiguous-reversed-operator) @@ -52,33 +49,84 @@ add_subdirectory(audio) add_subdirectory(util) add_subdirectory(imgui) add_subdirectory(resource) -add_subdirectory(asm) -if(PUBLIC_RELEASE) - add_executable(CemuBin WIN32 - main.cpp - mainLLE.cpp - ) -else() - add_executable(CemuBin - main.cpp - mainLLE.cpp - ) +add_executable(CemuBin + main.cpp + mainLLE.cpp +) + +if(MSVC AND MSVC_VERSION EQUAL 1940) + # workaround for an msvc issue on VS 17.10 where generated ILK files are too large + # see https://developercommunity.visualstudio.com/t/After-updating-to-VS-1710-the-size-of-/10665511 + set_target_properties(CemuBin PROPERTIES LINK_FLAGS "/INCREMENTAL:NO") endif() if(WIN32) target_sources(CemuBin PRIVATE resource/cemu.rc -) + ../dist/windows/cemu.manifest + ) endif() set_property(TARGET CemuBin PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") +set_property(TARGET CemuBin PROPERTY WIN32_EXECUTABLE $>) +set(OUTPUT_NAME "Cemu_$>") + +if (MACOS_BUNDLE) + set_property(TARGET CemuBin PROPERTY MACOSX_BUNDLE_INFO_PLIST "${CMAKE_CURRENT_SOURCE_DIR}/resource/MacOSXBundleInfo.plist.in") + + set(RESOURCE_FILES "${CMAKE_SOURCE_DIR}/src/resource/cemu.icns") + target_sources(CemuBin PRIVATE "${RESOURCE_FILES}") + + set(MACOSX_BUNDLE_ICON_FILE "cemu.icns") + set(MACOSX_BUNDLE_GUI_IDENTIFIER "info.cemu.Cemu") + set(MACOSX_BUNDLE_BUNDLE_NAME "Cemu") + set(MACOSX_BUNDLE_SHORT_VERSION_STRING "${EMULATOR_VERSION_MAJOR}.${EMULATOR_VERSION_MINOR}.${EMULATOR_VERSION_PATCH}") + set(MACOSX_BUNDLE_BUNDLE_VERSION "${EMULATOR_VERSION_MAJOR}.${EMULATOR_VERSION_MINOR}.${EMULATOR_VERSION_PATCH}") + set(MACOSX_BUNDLE_COPYRIGHT "Copyright © 2024 Cemu Project") + + set(MACOSX_BUNDLE_CATEGORY "public.app-category.games") + set(MACOSX_MINIMUM_SYSTEM_VERSION "12.0") + set(MACOSX_BUNDLE_TYPE_EXTENSION "wua") + + set_target_properties(CemuBin PROPERTIES + MACOSX_BUNDLE true + RESOURCE "${RESOURCE_FILES}" + ) + + set(FOLDERS gameProfiles resources) + foreach(folder ${FOLDERS}) + add_custom_command (TARGET CemuBin POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy_directory "${CMAKE_SOURCE_DIR}/bin/${folder}" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/SharedSupport/${folder}") + endforeach(folder) + + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/debug/lib/libusb-1.0.0.dylib") + else() + set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib/libusb-1.0.0.dylib") + endif() + + if (EXISTS "/usr/local/lib/libMoltenVK.dylib") + set(MOLTENVK_PATH "/usr/local/lib/libMoltenVK.dylib") + elseif (EXISTS "/opt/homebrew/lib/libMoltenVK.dylib") + set(MOLTENVK_PATH "/opt/homebrew/lib/libMoltenVK.dylib") + else() + message(FATAL_ERROR "failed to find libMoltenVK.dylib") + endif () + + add_custom_command (TARGET CemuBin POST_BUILD + COMMAND ${CMAKE_COMMAND} ARGS -E copy "${MOLTENVK_PATH}" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libMoltenVK.dylib" + COMMAND ${CMAKE_COMMAND} ARGS -E copy "${LIBUSB_PATH}" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libusb-1.0.0.dylib" + COMMAND ${CMAKE_COMMAND} ARGS -E copy "${CMAKE_SOURCE_DIR}/src/resource/update.sh" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/update.sh" + COMMAND bash -c "install_name_tool -add_rpath @executable_path/../Frameworks ${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/${OUTPUT_NAME}" + COMMAND bash -c "install_name_tool -change ${LIBUSB_PATH} @executable_path/../Frameworks/libusb-1.0.0.dylib ${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/${OUTPUT_NAME}") +endif() set_target_properties(CemuBin PROPERTIES # multi-configuration generators will add a config subdirectory to RUNTIME_OUTPUT_DIRECTORY if no generator expression is used # to get the same behavior everywhere we append an empty generator expression RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/../bin/$<1:>" - OUTPUT_NAME "Cemu_$>" + OUTPUT_NAME "${OUTPUT_NAME}" ) target_link_libraries(CemuBin PRIVATE @@ -94,6 +142,15 @@ target_link_libraries(CemuBin PRIVATE SDL2::SDL2 ) +if(UNIX AND NOT APPLE) + # due to nasm output some linkers will make stack executable + # cemu does not require this so we explicity disable it + target_link_options(CemuBin PRIVATE -z noexecstack) + # some residual debug info from boost/discord-rpc is normally included + # most likely not helpful in debugging problems with cemu code + target_link_options(CemuBin PRIVATE "$<$:-Xlinker;--strip-debug>") +endif() + if (ENABLE_WXWIDGETS) target_link_libraries(CemuBin PRIVATE wx::base wx::core) endif() diff --git a/src/Cafe/Account/Account.cpp b/src/Cafe/Account/Account.cpp index 2cf424f1..d022b604 100644 --- a/src/Cafe/Account/Account.cpp +++ b/src/Cafe/Account/Account.cpp @@ -1,14 +1,13 @@ #include "Account.h" #include "util/helpers/helpers.h" -#include "gui/CemuApp.h" #include "util/helpers/SystemException.h" - +#include "util/helpers/StringHelpers.h" #include "config/ActiveSettings.h" #include "Cafe/IOSU/legacy/iosu_crypto.h" #include "Common/FileStream.h" +#include #include -#include std::vector Account::s_account_list; @@ -168,14 +167,14 @@ std::error_code Account::Load() } catch(const std::exception& ex) { - forceLog_printf("handled error in Account::Load: %s", ex.what()); + cemuLog_log(LogType::Force, "handled error in Account::Load: {}", ex.what()); return AccountErrc::ParseError; } } std::error_code Account::Save() { - fs::path path = CemuApp::GetMLCPath(fmt::format(L"usr/save/system/act/{:08x}", m_persistent_id)).ToStdWstring(); + fs::path path = ActiveSettings::GetMlcPath(fmt::format(L"usr/save/system/act/{:08x}", m_persistent_id)); if (!fs::exists(path)) { std::error_code ec; @@ -184,7 +183,7 @@ std::error_code Account::Save() return ec; } - path /= L"account.dat"; + path /= "account.dat"; try { @@ -302,7 +301,7 @@ void Account::SetMiiName(std::wstring_view name) const std::vector& Account::RefreshAccounts() { std::vector result; - const fs::path path = CemuApp::GetMLCPath(L"usr/save/system/act").ToStdWstring(); + const fs::path path = ActiveSettings::GetMlcPath("usr/save/system/act"); if (fs::exists(path)) { for (const auto& it : fs::directory_iterator(path)) @@ -349,7 +348,7 @@ void Account::UpdatePersisidDat() f.close(); } else - forceLog_printf("Unable to save persisid.dat"); + cemuLog_log(LogType::Force, "Unable to save persisid.dat"); } bool Account::HasFreeAccountSlots() @@ -417,14 +416,14 @@ fs::path Account::GetFileName(uint32 persistent_id) if (persistent_id < kMinPersistendId) throw std::invalid_argument(fmt::format("persistent id {:#x} is invalid", persistent_id)); - return CemuApp::GetMLCPath(fmt::format(L"usr/save/system/act/{:08x}/account.dat", persistent_id)).ToStdWstring(); + return ActiveSettings::GetMlcPath(fmt::format("usr/save/system/act/{:08x}/account.dat", persistent_id)); } OnlineValidator Account::ValidateOnlineFiles() const { OnlineValidator result{}; - const auto otp = ActiveSettings::GetPath("otp.bin"); + const auto otp = ActiveSettings::GetUserDataPath("otp.bin"); if (!fs::exists(otp)) result.otp = OnlineValidator::FileState::Missing; else if (fs::file_size(otp) != 1024) @@ -432,7 +431,7 @@ OnlineValidator Account::ValidateOnlineFiles() const else result.otp = OnlineValidator::FileState::Ok; - const auto seeprom = ActiveSettings::GetPath("seeprom.bin"); + const auto seeprom = ActiveSettings::GetUserDataPath("seeprom.bin"); if (!fs::exists(seeprom)) result.seeprom = OnlineValidator::FileState::Missing; else if (fs::file_size(seeprom) != 512) @@ -462,15 +461,14 @@ OnlineValidator Account::ValidateOnlineFiles() const void Account::ParseFile(class FileStream* file) { - std::vector buffer; - - std::string tmp; - while (file->readLine(tmp)) - buffer.emplace_back(tmp); - for (const auto& s : buffer) + std::vector buffer; + buffer.resize(file->GetSize()); + if( file->readData(buffer.data(), buffer.size()) != buffer.size()) + throw std::system_error(AccountErrc::ParseError); + for (const auto& s : StringHelpers::StringLineIterator(buffer)) { std::string_view view = s; - const auto find = view.find(L'='); + const auto find = view.find('='); if (find == std::string_view::npos) continue; diff --git a/src/Cafe/Account/Account.h b/src/Cafe/Account/Account.h index 63eb5082..da196e42 100644 --- a/src/Cafe/Account/Account.h +++ b/src/Cafe/Account/Account.h @@ -16,22 +16,6 @@ enum class OnlineAccountError kPasswordCacheEmpty, kNoPrincipalId, }; -template <> -struct fmt::formatter : formatter { - template - auto format(const OnlineAccountError v, FormatContext& ctx) { - switch (v) - { - case OnlineAccountError::kNoAccountId: return formatter::format("AccountId missing (The account is not connected to a NNID)", ctx); - case OnlineAccountError::kNoPasswordCached: return formatter::format("IsPasswordCacheEnabled is set to false (The remember password option on your Wii U must be enabled for this account before dumping it)", ctx); - case OnlineAccountError::kPasswordCacheEmpty: return formatter::format("AccountPasswordCache is empty (The remember password option on your Wii U must be enabled for this account before dumping it)", ctx); - case OnlineAccountError::kNoPrincipalId: return formatter::format("PrincipalId missing", ctx); - default: break; - } - return formatter::format("no error", ctx); - } -}; - struct OnlineValidator { diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index c012f18a..2900059b 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -1,21 +1,561 @@ -project(CemuCafe) - -file(GLOB_RECURSE CPP_FILES *.cpp) -file(GLOB_RECURSE H_FILES *.h) +add_library(CemuCafe + Account/Account.cpp + Account/AccountError.h + Account/Account.h + CafeSystem.cpp + CafeSystem.h + Filesystem/fsc.cpp + Filesystem/fscDeviceHostFS.cpp + Filesystem/fscDeviceHostFS.h + Filesystem/fscDeviceRedirect.cpp + Filesystem/fscDeviceWua.cpp + Filesystem/fscDeviceWud.cpp + Filesystem/fscDeviceWuhb.cpp + Filesystem/fsc.h + Filesystem/FST/FST.cpp + Filesystem/FST/FST.h + Filesystem/FST/fstUtil.h + Filesystem/FST/KeyCache.cpp + Filesystem/FST/KeyCache.h + Filesystem/WUD/wud.cpp + Filesystem/WUD/wud.h + Filesystem/WUHB/RomFSStructs.h + Filesystem/WUHB/WUHBReader.cpp + Filesystem/WUHB/WUHBReader.h + GamePatch.cpp + GamePatch.h + GameProfile/GameProfile.cpp + GameProfile/GameProfile.h + GraphicPack/GraphicPack2.cpp + GraphicPack/GraphicPack2.h + GraphicPack/GraphicPack2PatchesApply.cpp + GraphicPack/GraphicPack2Patches.cpp + GraphicPack/GraphicPack2Patches.h + GraphicPack/GraphicPack2PatchesParser.cpp + GraphicPack/GraphicPackError.h + HW/ACR/ACR.cpp + HW/AI/AI.cpp + HW/AI/AI.h + HW/Common/HwReg.h + HW/Espresso/Const.h + HW/Espresso/Debugger/Debugger.cpp + HW/Espresso/Debugger/Debugger.h + HW/Espresso/Debugger/DebugSymbolStorage.cpp + HW/Espresso/Debugger/DebugSymbolStorage.h + HW/Espresso/Debugger/GDBStub.h + HW/Espresso/Debugger/GDBStub.cpp + HW/Espresso/Debugger/GDBBreakpoints.cpp + HW/Espresso/Debugger/GDBBreakpoints.h + HW/Espresso/EspressoISA.h + HW/Espresso/Interpreter/PPCInterpreterALU.hpp + HW/Espresso/Interpreter/PPCInterpreterFPU.cpp + HW/Espresso/Interpreter/PPCInterpreterHelper.h + HW/Espresso/Interpreter/PPCInterpreterHLE.cpp + HW/Espresso/Interpreter/PPCInterpreterImpl.cpp + HW/Espresso/Interpreter/PPCInterpreterInternal.h + HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp + HW/Espresso/Interpreter/PPCInterpreterMain.cpp + HW/Espresso/Interpreter/PPCInterpreterOPC.cpp + HW/Espresso/Interpreter/PPCInterpreterOPC.hpp + HW/Espresso/Interpreter/PPCInterpreterPS.cpp + HW/Espresso/Interpreter/PPCInterpreterSPR.hpp + HW/Espresso/PPCCallback.h + HW/Espresso/PPCScheduler.cpp + HW/Espresso/PPCSchedulerLLE.cpp + HW/Espresso/PPCState.h + HW/Espresso/PPCTimer.cpp + HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h + HW/Espresso/Recompiler/PPCRecompiler.cpp + HW/Espresso/Recompiler/PPCRecompiler.h + HW/Espresso/Recompiler/IML/IML.h + HW/Espresso/Recompiler/IML/IMLSegment.cpp + HW/Espresso/Recompiler/IML/IMLSegment.h + HW/Espresso/Recompiler/IML/IMLInstruction.cpp + HW/Espresso/Recompiler/IML/IMLInstruction.h + HW/Espresso/Recompiler/IML/IMLDebug.cpp + HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp + HW/Espresso/Recompiler/IML/IMLOptimizer.cpp + HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp + HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h + HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp + HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h + HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp + HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp + HW/Espresso/Recompiler/PPCRecompilerIml.h + HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp + HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp + HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp + HW/Espresso/Recompiler/BackendX64/BackendX64.cpp + HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp + HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp + HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp + HW/Espresso/Recompiler/BackendX64/BackendX64.h + HW/Espresso/Recompiler/BackendX64/X64Emit.hpp + HW/Espresso/Recompiler/BackendX64/x86Emitter.h + HW/Latte/Common/RegisterSerializer.cpp + HW/Latte/Common/RegisterSerializer.h + HW/Latte/Common/ShaderSerializer.cpp + HW/Latte/Common/ShaderSerializer.h + HW/Latte/Core/FetchShader.cpp + HW/Latte/Core/FetchShader.h + HW/Latte/Core/LatteAsyncCommands.cpp + HW/Latte/Core/LatteAsyncCommands.h + HW/Latte/Core/LatteBufferCache.cpp + HW/Latte/Core/LatteBufferCache.h + HW/Latte/Core/LatteBufferData.cpp + HW/Latte/Core/LatteCachedFBO.h + HW/Latte/Core/LatteCommandProcessor.cpp + HW/Latte/Core/LatteConst.h + HW/Latte/Core/LatteDefaultShaders.cpp + HW/Latte/Core/LatteDefaultShaders.h + HW/Latte/Core/LatteDraw.h + HW/Latte/Core/LatteGSCopyShaderParser.cpp + HW/Latte/Core/Latte.h + HW/Latte/Core/LatteIndices.cpp + HW/Latte/Core/LatteIndices.h + HW/Latte/Core/LatteOverlay.cpp + HW/Latte/Core/LatteOverlay.h + HW/Latte/Core/LattePerformanceMonitor.cpp + HW/Latte/Core/LattePerformanceMonitor.h + HW/Latte/Core/LattePM4.h + HW/Latte/Core/LatteQuery.cpp + HW/Latte/Core/LatteQueryObject.h + HW/Latte/Core/LatteRenderTarget.cpp + HW/Latte/Core/LatteRingBuffer.cpp + HW/Latte/Core/LatteRingBuffer.h + HW/Latte/Core/LatteShaderAssembly.h + HW/Latte/Core/LatteShaderCache.cpp + HW/Latte/Core/LatteShaderCache.h + HW/Latte/Core/LatteShader.cpp + HW/Latte/Core/LatteShaderGL.cpp + HW/Latte/Core/LatteShader.h + HW/Latte/Core/LatteSoftware.cpp + HW/Latte/Core/LatteSoftware.h + HW/Latte/Core/LatteStreamoutGPU.cpp + HW/Latte/Core/LatteSurfaceCopy.cpp + HW/Latte/Core/LatteTextureCache.cpp + HW/Latte/Core/LatteTexture.cpp + HW/Latte/Core/LatteTexture.h + HW/Latte/Core/LatteTextureLegacy.cpp + HW/Latte/Core/LatteTextureLoader.cpp + HW/Latte/Core/LatteTextureLoader.h + HW/Latte/Core/LatteTextureReadback.cpp + HW/Latte/Core/LatteTextureReadbackInfo.h + HW/Latte/Core/LatteTextureView.cpp + HW/Latte/Core/LatteTextureView.h + HW/Latte/Core/LatteThread.cpp + HW/Latte/Core/LatteTiming.cpp + HW/Latte/Core/LatteTiming.h + HW/Latte/ISA/LatteInstructions.h + HW/Latte/ISA/LatteReg.h + HW/Latte/ISA/RegDefines.h + HW/Latte/LatteAddrLib/AddrLibFastDecode.h + HW/Latte/LatteAddrLib/LatteAddrLib_Coord.cpp + HW/Latte/LatteAddrLib/LatteAddrLib.cpp + HW/Latte/LatteAddrLib/LatteAddrLib.h + HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp + HW/Latte/LegacyShaderDecompiler/LatteDecompiler.cpp + HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLAttrDecoder.cpp + HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSL.cpp + HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp + HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h + HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h + HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h + HW/Latte/LegacyShaderDecompiler/LatteDecompilerRegisterDataTypeTracker.cpp + HW/Latte/Renderer/OpenGL/CachedFBOGL.h + HW/Latte/Renderer/OpenGL/LatteTextureGL.cpp + HW/Latte/Renderer/OpenGL/LatteTextureGL.h + HW/Latte/Renderer/OpenGL/LatteTextureViewGL.cpp + HW/Latte/Renderer/OpenGL/LatteTextureViewGL.h + HW/Latte/Renderer/OpenGL/OpenGLQuery.cpp + HW/Latte/Renderer/OpenGL/OpenGLRendererCore.cpp + HW/Latte/Renderer/OpenGL/OpenGLRenderer.cpp + HW/Latte/Renderer/OpenGL/OpenGLRenderer.h + HW/Latte/Renderer/OpenGL/OpenGLRendererStreamout.cpp + HW/Latte/Renderer/OpenGL/OpenGLRendererUniformData.cpp + HW/Latte/Renderer/OpenGL/OpenGLSurfaceCopy.cpp + HW/Latte/Renderer/OpenGL/OpenGLTextureReadback.h + HW/Latte/Renderer/OpenGL/RendererShaderGL.cpp + HW/Latte/Renderer/OpenGL/RendererShaderGL.h + HW/Latte/Renderer/OpenGL/TextureReadbackGL.cpp + HW/Latte/Renderer/Renderer.cpp + HW/Latte/Renderer/Renderer.h + HW/Latte/Renderer/RendererOuputShader.cpp + HW/Latte/Renderer/RendererOuputShader.h + HW/Latte/Renderer/RendererShader.cpp + HW/Latte/Renderer/RendererShader.h + HW/Latte/Renderer/Vulkan/CachedFBOVk.cpp + HW/Latte/Renderer/Vulkan/CachedFBOVk.h + HW/Latte/Renderer/Vulkan/CocoaSurface.h + HW/Latte/Renderer/Vulkan/LatteTextureViewVk.cpp + HW/Latte/Renderer/Vulkan/LatteTextureViewVk.h + HW/Latte/Renderer/Vulkan/LatteTextureVk.cpp + HW/Latte/Renderer/Vulkan/LatteTextureVk.h + HW/Latte/Renderer/Vulkan/RendererShaderVk.cpp + HW/Latte/Renderer/Vulkan/RendererShaderVk.h + HW/Latte/Renderer/Vulkan/SwapchainInfoVk.cpp + HW/Latte/Renderer/Vulkan/SwapchainInfoVk.h + HW/Latte/Renderer/Vulkan/TextureReadbackVk.cpp + HW/Latte/Renderer/Vulkan/VKRBase.h + HW/Latte/Renderer/Vulkan/VKRMemoryManager.cpp + HW/Latte/Renderer/Vulkan/VKRMemoryManager.h + HW/Latte/Renderer/Vulkan/VKRPipelineInfo.cpp + HW/Latte/Renderer/Vulkan/VsyncDriver.cpp + HW/Latte/Renderer/Vulkan/VsyncDriver.h + HW/Latte/Renderer/Vulkan/VulkanAPI.cpp + HW/Latte/Renderer/Vulkan/VulkanAPI.h + HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.cpp + HW/Latte/Renderer/Vulkan/VulkanPipelineCompiler.h + HW/Latte/Renderer/Vulkan/VulkanPipelineStableCache.cpp + HW/Latte/Renderer/Vulkan/VulkanPipelineStableCache.h + HW/Latte/Renderer/Vulkan/VulkanQuery.cpp + HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp + HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp + HW/Latte/Renderer/Vulkan/VulkanRenderer.h + HW/Latte/Renderer/Vulkan/VulkanSurfaceCopy.cpp + HW/Latte/Renderer/Vulkan/VulkanTextureReadback.h + HW/Latte/ShaderInfo/ShaderDescription.cpp + HW/Latte/ShaderInfo/ShaderInfo.h + HW/Latte/ShaderInfo/ShaderInstanceInfo.cpp + HW/Latte/Transcompiler/LatteTC.cpp + HW/Latte/Transcompiler/LatteTCGenIR.cpp + HW/Latte/Transcompiler/LatteTC.h + HW/MMU/MMU.cpp + HW/MMU/MMU.h + HW/SI/SI.cpp + HW/SI/si.h + HW/VI/VI.cpp + IOSU/ccr_nfc/iosu_ccr_nfc.cpp + IOSU/ccr_nfc/iosu_ccr_nfc.h + IOSU/fsa/fsa_types.h + IOSU/fsa/iosu_fsa.cpp + IOSU/fsa/iosu_fsa.h + IOSU/iosu_ipc_common.h + IOSU/iosu_types_common.h + IOSU/kernel/iosu_kernel.cpp + IOSU/kernel/iosu_kernel.h + IOSU/legacy/iosu_acp.cpp + IOSU/legacy/iosu_acp.h + IOSU/legacy/iosu_act.cpp + IOSU/legacy/iosu_act.h + IOSU/legacy/iosu_boss.cpp + IOSU/legacy/iosu_boss.h + IOSU/legacy/iosu_crypto.cpp + IOSU/legacy/iosu_crypto.h + IOSU/legacy/iosu_fpd.cpp + IOSU/legacy/iosu_fpd.h + IOSU/legacy/iosu_ioctl.cpp + IOSU/legacy/iosu_ioctl.h + IOSU/legacy/iosu_mcp.cpp + IOSU/legacy/iosu_mcp.h + IOSU/legacy/iosu_nim.cpp + IOSU/legacy/iosu_nim.h + IOSU/nn/iosu_nn_service.cpp + IOSU/nn/iosu_nn_service.h + IOSU/PDM/iosu_pdm.cpp + IOSU/PDM/iosu_pdm.h + IOSU/ODM/iosu_odm.cpp + IOSU/ODM/iosu_odm.h + OS/common/OSCommon.cpp + OS/common/OSCommon.h + OS/common/OSUtil.h + OS/common/PPCConcurrentQueue.h + OS/libs/avm/avm.cpp + OS/libs/avm/avm.h + OS/libs/camera/camera.cpp + OS/libs/camera/camera.h + OS/libs/coreinit/coreinit_Alarm.cpp + OS/libs/coreinit/coreinit_Alarm.h + OS/libs/coreinit/coreinit_Atomic.cpp + OS/libs/coreinit/coreinit_Atomic.h + OS/libs/coreinit/coreinit_BSP.cpp + OS/libs/coreinit/coreinit_BSP.h + OS/libs/coreinit/coreinit_Callbacks.cpp + OS/libs/coreinit/coreinit_CodeGen.cpp + OS/libs/coreinit/coreinit_CodeGen.h + OS/libs/coreinit/coreinit_Coroutine.cpp + OS/libs/coreinit/coreinit_Coroutine.h + OS/libs/coreinit/coreinit.cpp + OS/libs/coreinit/coreinit_DynLoad.cpp + OS/libs/coreinit/coreinit_DynLoad.h + OS/libs/coreinit/coreinit_FG.cpp + OS/libs/coreinit/coreinit_FG.h + OS/libs/coreinit/coreinit_FS.cpp + OS/libs/coreinit/coreinit_FS.h + OS/libs/coreinit/coreinit_GHS.cpp + OS/libs/coreinit/coreinit_GHS.h + OS/libs/coreinit/coreinit.h + OS/libs/coreinit/coreinit_HWInterface.cpp + OS/libs/coreinit/coreinit_HWInterface.h + OS/libs/coreinit/coreinit_IM.cpp + OS/libs/coreinit/coreinit_IM.h + OS/libs/coreinit/coreinit_Init.cpp + OS/libs/coreinit/coreinit_IOS.cpp + OS/libs/coreinit/coreinit_IOS.h + OS/libs/coreinit/coreinit_IPCBuf.cpp + OS/libs/coreinit/coreinit_IPCBuf.h + OS/libs/coreinit/coreinit_IPC.cpp + OS/libs/coreinit/coreinit_IPC.h + OS/libs/coreinit/coreinit_LockedCache.cpp + OS/libs/coreinit/coreinit_LockedCache.h + OS/libs/coreinit/coreinit_MCP.cpp + OS/libs/coreinit/coreinit_MCP.h + OS/libs/coreinit/coreinit_MEM_BlockHeap.cpp + OS/libs/coreinit/coreinit_MEM_BlockHeap.h + OS/libs/coreinit/coreinit_MEM.cpp + OS/libs/coreinit/coreinit_MEM_ExpHeap.cpp + OS/libs/coreinit/coreinit_MEM_ExpHeap.h + OS/libs/coreinit/coreinit_MEM_FrmHeap.cpp + OS/libs/coreinit/coreinit_MEM_FrmHeap.h + OS/libs/coreinit/coreinit_MEM.h + OS/libs/coreinit/coreinit_Memory.cpp + OS/libs/coreinit/coreinit_Memory.h + OS/libs/coreinit/coreinit_MemoryMapping.cpp + OS/libs/coreinit/coreinit_MemoryMapping.h + OS/libs/coreinit/coreinit_MEM_UnitHeap.cpp + OS/libs/coreinit/coreinit_MEM_UnitHeap.h + OS/libs/coreinit/coreinit_MessageQueue.cpp + OS/libs/coreinit/coreinit_MessageQueue.h + OS/libs/coreinit/coreinit_Misc.cpp + OS/libs/coreinit/coreinit_Misc.h + OS/libs/coreinit/coreinit_MPQueue.cpp + OS/libs/coreinit/coreinit_MPQueue.h + OS/libs/coreinit/coreinit_OSScreen.cpp + OS/libs/coreinit/coreinit_OSScreen_font.h + OS/libs/coreinit/coreinit_OSScreen.h + OS/libs/coreinit/coreinit_OverlayArena.cpp + OS/libs/coreinit/coreinit_OverlayArena.h + OS/libs/coreinit/coreinit_Scheduler.cpp + OS/libs/coreinit/coreinit_Scheduler.h + OS/libs/coreinit/coreinit_Spinlock.cpp + OS/libs/coreinit/coreinit_Spinlock.h + OS/libs/coreinit/coreinit_Synchronization.cpp + OS/libs/coreinit/coreinit_SysHeap.cpp + OS/libs/coreinit/coreinit_SysHeap.h + OS/libs/coreinit/coreinit_SystemInfo.cpp + OS/libs/coreinit/coreinit_SystemInfo.h + OS/libs/coreinit/coreinit_Thread.cpp + OS/libs/coreinit/coreinit_Thread.h + OS/libs/coreinit/coreinit_ThreadQueue.cpp + OS/libs/coreinit/coreinit_Time.cpp + OS/libs/coreinit/coreinit_Time.h + OS/libs/dmae/dmae.cpp + OS/libs/dmae/dmae.h + OS/libs/drmapp/drmapp.cpp + OS/libs/drmapp/drmapp.h + OS/libs/erreula/erreula.cpp + OS/libs/erreula/erreula.h + OS/libs/gx2/GX2_AddrTest.cpp + OS/libs/gx2/GX2_Blit.cpp + OS/libs/gx2/GX2_Blit.h + OS/libs/gx2/GX2_Command.cpp + OS/libs/gx2/GX2_Command.h + OS/libs/gx2/GX2_ContextState.cpp + OS/libs/gx2/GX2.cpp + OS/libs/gx2/GX2_Draw.cpp + OS/libs/gx2/GX2_Draw.h + OS/libs/gx2/GX2_Event.cpp + OS/libs/gx2/GX2_Event.h + OS/libs/gx2/GX2.h + OS/libs/gx2/GX2_Memory.cpp + OS/libs/gx2/GX2_Memory.h + OS/libs/gx2/GX2_Misc.cpp + OS/libs/gx2/GX2_Misc.h + OS/libs/gx2/GX2_Query.cpp + OS/libs/gx2/GX2_Query.h + OS/libs/gx2/GX2_RenderTarget.cpp + OS/libs/gx2/GX2_Resource.cpp + OS/libs/gx2/GX2_Resource.h + OS/libs/gx2/GX2_Shader.cpp + OS/libs/gx2/GX2_Shader.h + OS/libs/gx2/GX2_shader_legacy.cpp + OS/libs/gx2/GX2_State.cpp + OS/libs/gx2/GX2_State.h + OS/libs/gx2/GX2_Streamout.cpp + OS/libs/gx2/GX2_Streamout.h + OS/libs/gx2/GX2_Surface_Copy.cpp + OS/libs/gx2/GX2_Surface_Copy.h + OS/libs/gx2/GX2_Surface.cpp + OS/libs/gx2/GX2_Surface.h + OS/libs/gx2/GX2_Texture.cpp + OS/libs/gx2/GX2_Texture.h + OS/libs/gx2/GX2_TilingAperture.cpp + OS/libs/h264_avc/H264Dec.cpp + OS/libs/h264_avc/H264DecBackendAVC.cpp + OS/libs/h264_avc/h264dec.h + OS/libs/h264_avc/H264DecInternal.h + OS/libs/h264_avc/parser + OS/libs/h264_avc/parser/H264Parser.cpp + OS/libs/h264_avc/parser/H264Parser.h + OS/libs/mic/mic.cpp + OS/libs/mic/mic.h + OS/libs/nfc/ndef.cpp + OS/libs/nfc/ndef.h + OS/libs/nfc/nfc.cpp + OS/libs/nfc/nfc.h + OS/libs/nfc/stream.cpp + OS/libs/nfc/stream.h + OS/libs/nfc/TagV0.cpp + OS/libs/nfc/TagV0.h + OS/libs/nfc/TLV.cpp + OS/libs/nfc/TLV.h + OS/libs/nlibcurl/nlibcurl.cpp + OS/libs/nlibcurl/nlibcurlDebug.hpp + OS/libs/nlibcurl/nlibcurl.h + OS/libs/nlibnss/nlibnss.cpp + OS/libs/nlibnss/nlibnss.h + OS/libs/nn_ac/nn_ac.cpp + OS/libs/nn_ac/nn_ac.h + OS/libs/nn_acp/nn_acp.cpp + OS/libs/nn_acp/nn_acp.h + OS/libs/nn_act/nn_act.cpp + OS/libs/nn_act/nn_act.h + OS/libs/nn_aoc/nn_aoc.cpp + OS/libs/nn_aoc/nn_aoc.h + OS/libs/nn_boss/nn_boss.cpp + OS/libs/nn_boss/nn_boss.h + OS/libs/nn_ccr/nn_ccr.cpp + OS/libs/nn_ccr/nn_ccr.h + OS/libs/nn_cmpt/nn_cmpt.cpp + OS/libs/nn_cmpt/nn_cmpt.h + OS/libs/nn_common.h + OS/libs/nn_ec/nn_ec.cpp + OS/libs/nn_ec/nn_ec.h + OS/libs/nn_fp/nn_fp.cpp + OS/libs/nn_fp/nn_fp.h + OS/libs/nn_idbe/nn_idbe.cpp + OS/libs/nn_idbe/nn_idbe.h + OS/libs/nn_ndm/nn_ndm.cpp + OS/libs/nn_ndm/nn_ndm.h + OS/libs/nn_spm/nn_spm.cpp + OS/libs/nn_spm/nn_spm.h + OS/libs/nn_sl/nn_sl.cpp + OS/libs/nn_sl/nn_sl.h + OS/libs/nn_nfp/AmiiboCrypto.h + OS/libs/nn_nfp/nn_nfp.cpp + OS/libs/nn_nfp/nn_nfp.h + OS/libs/nn_nim/nn_nim.cpp + OS/libs/nn_nim/nn_nim.h + OS/libs/nn_olv/nn_olv.cpp + OS/libs/nn_olv/nn_olv.h + OS/libs/nn_olv/nn_olv_Common.cpp + OS/libs/nn_olv/nn_olv_Common.h + OS/libs/nn_olv/nn_olv_InitializeTypes.cpp + OS/libs/nn_olv/nn_olv_InitializeTypes.h + OS/libs/nn_olv/nn_olv_DownloadCommunityTypes.cpp + OS/libs/nn_olv/nn_olv_DownloadCommunityTypes.h + OS/libs/nn_olv/nn_olv_UploadCommunityTypes.cpp + OS/libs/nn_olv/nn_olv_UploadCommunityTypes.h + OS/libs/nn_olv/nn_olv_UploadFavoriteTypes.cpp + OS/libs/nn_olv/nn_olv_UploadFavoriteTypes.h + OS/libs/nn_olv/nn_olv_PostTypes.cpp + OS/libs/nn_olv/nn_olv_PostTypes.h + OS/libs/nn_olv/nn_olv_OfflineDB.cpp + OS/libs/nn_olv/nn_olv_OfflineDB.h + OS/libs/nn_pdm/nn_pdm.cpp + OS/libs/nn_pdm/nn_pdm.h + OS/libs/nn_save/nn_save.cpp + OS/libs/nn_save/nn_save.h + OS/libs/nn_temp/nn_temp.cpp + OS/libs/nn_temp/nn_temp.h + OS/libs/nn_uds/nn_uds.cpp + OS/libs/nn_uds/nn_uds.h + OS/libs/nsyshid/nsyshid.cpp + OS/libs/nsyshid/nsyshid.h + OS/libs/nsyshid/Backend.h + OS/libs/nsyshid/AttachDefaultBackends.cpp + OS/libs/nsyshid/Whitelist.cpp + OS/libs/nsyshid/Whitelist.h + OS/libs/nsyshid/BackendEmulated.cpp + OS/libs/nsyshid/BackendEmulated.h + OS/libs/nsyshid/BackendLibusb.cpp + OS/libs/nsyshid/BackendLibusb.h + OS/libs/nsyshid/Dimensions.cpp + OS/libs/nsyshid/Dimensions.h + OS/libs/nsyshid/Infinity.cpp + OS/libs/nsyshid/Infinity.h + OS/libs/nsyshid/Skylander.cpp + OS/libs/nsyshid/Skylander.h + OS/libs/nsyshid/SkylanderXbox360.cpp + OS/libs/nsyshid/SkylanderXbox360.h + OS/libs/nsyshid/g721/g721.cpp + OS/libs/nsyshid/g721/g721.h + OS/libs/nsyskbd/nsyskbd.cpp + OS/libs/nsyskbd/nsyskbd.h + OS/libs/nsysnet/nsysnet.cpp + OS/libs/nsysnet/nsysnet.h + OS/libs/ntag/ntag.cpp + OS/libs/ntag/ntag.h + OS/libs/padscore/padscore.cpp + OS/libs/padscore/padscore.h + OS/libs/proc_ui/proc_ui.cpp + OS/libs/proc_ui/proc_ui.h + OS/libs/snd_core/ax_aux.cpp + OS/libs/snd_core/ax_exports.cpp + OS/libs/snd_core/ax.h + OS/libs/snd_core/ax_internal.h + OS/libs/snd_core/ax_ist.cpp + OS/libs/snd_core/ax_mix.cpp + OS/libs/snd_core/ax_multivoice.cpp + OS/libs/snd_core/ax_out.cpp + OS/libs/snd_core/ax_voice.cpp + OS/libs/snd_user/snd_user.cpp + OS/libs/snd_user/snd_user.h + OS/libs/swkbd/swkbd.cpp + OS/libs/swkbd/swkbd.h + OS/libs/sysapp/sysapp.cpp + OS/libs/sysapp/sysapp.h + OS/libs/TCL/TCL.cpp + OS/libs/TCL/TCL.h + OS/libs/vpad/vpad.cpp + OS/libs/vpad/vpad.h + OS/libs/zlib125 + OS/libs/zlib125/zlib125.cpp + OS/libs/zlib125/zlib125.h + OS/RPL/elf.cpp + OS/RPL/rpl.cpp + OS/RPL/rpl_debug_symbols.cpp + OS/RPL/rpl_debug_symbols.h + OS/RPL/rpl.h + OS/RPL/rpl_structs.h + OS/RPL/rpl_symbol_storage.cpp + OS/RPL/rpl_symbol_storage.h + TitleList/GameInfo.h + TitleList/ParsedMetaXml.h + TitleList/SaveInfo.cpp + TitleList/SaveInfo.h + TitleList/SaveList.cpp + TitleList/SaveList.h + TitleList/TitleId.h + TitleList/TitleInfo.cpp + TitleList/TitleInfo.h + TitleList/TitleList.cpp + TitleList/TitleList.h +) if(APPLE) - file(GLOB_RECURSE MM_FILES *.mm) - add_library(CemuCafe ${CPP_FILES} ${MM_FILES} ${H_FILES}) -else() - add_library(CemuCafe ${CPP_FILES} ${H_FILES}) + target_sources(CemuCafe PRIVATE "HW/Latte/Renderer/Vulkan/CocoaSurface.mm") +endif() + +if(CEMU_ARCHITECTURE MATCHES "(aarch64)|(AARCH64)|(arm64)|(ARM64)") + target_sources(CemuCafe PRIVATE + HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp + HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h + ) + target_link_libraries(CemuCafe PRIVATE xbyak_aarch64) endif() set_property(TARGET CemuCafe PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") target_include_directories(CemuCafe PUBLIC "../") +if (glslang_VERSION VERSION_LESS "15.0.0") + set(glslang_target "glslang::SPIRV") +else() + set(glslang_target "glslang::glslang") +endif() + target_link_libraries(CemuCafe PRIVATE - CemuAsm CemuAudio CemuCommon CemuComponents @@ -29,9 +569,8 @@ target_link_libraries(CemuCafe PRIVATE Boost::nowide CURL::libcurl fmt::fmt - glslang::SPIRV + ${glslang_target} ih264d - imgui::imgui OpenSSL::Crypto OpenSSL::SSL PNG::PNG @@ -41,6 +580,23 @@ target_link_libraries(CemuCafe PRIVATE zstd::zstd ) +if (ENABLE_WAYLAND) + # PUBLIC because wayland-client.h is included in VulkanAPI.h + target_link_libraries(CemuCafe PUBLIC Wayland::Client) +endif() + +if (ENABLE_VCPKG) + if(WIN32) + set(PKG_CONFIG_EXECUTABLE "${VCPKG_INSTALLED_DIR}/x64-windows/tools/pkgconf/pkgconf.exe") + endif() + find_package(PkgConfig REQUIRED) + pkg_check_modules(libusb REQUIRED IMPORTED_TARGET libusb-1.0) + target_link_libraries(CemuCafe PRIVATE PkgConfig::libusb) +else () + find_package(libusb MODULE REQUIRED) + target_link_libraries(CemuCafe PRIVATE libusb::libusb) +endif () + if (ENABLE_WXWIDGETS) target_link_libraries(CemuCafe PRIVATE wx::base wx::core) endif() diff --git a/src/Cafe/CafeSystem.cpp b/src/Cafe/CafeSystem.cpp index 8ec6ff0a..d20ccd9d 100644 --- a/src/Cafe/CafeSystem.cpp +++ b/src/Cafe/CafeSystem.cpp @@ -4,16 +4,17 @@ #include "Cafe/GameProfile/GameProfile.h" #include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h" #include "Cafe/HW/Espresso/Recompiler/PPCRecompiler.h" -#include "audio/IAudioAPI.h" #include "Cafe/HW/Espresso/Debugger/Debugger.h" - +#include "Cafe/OS/RPL/rpl_symbol_storage.h" +#include "audio/IAudioAPI.h" +#include "audio/IAudioInputAPI.h" #include "config/ActiveSettings.h" +#include "config/LaunchSettings.h" #include "Cafe/TitleList/GameInfo.h" -#include "util/helpers/SystemException.h" #include "Cafe/GraphicPack/GraphicPack2.h" - +#include "util/helpers/SystemException.h" +#include "Common/cpu_features.h" #include "input/InputManager.h" - #include "Cafe/CafeSystem.h" #include "Cafe/TitleList/TitleList.h" #include "Cafe/TitleList/GameInfo.h" @@ -21,14 +22,10 @@ #include "Cafe/OS/libs/snd_core/ax.h" #include "Cafe/OS/RPL/rpl.h" #include "Cafe/HW/Latte/Core/Latte.h" - #include "Cafe/Filesystem/FST/FST.h" - #include "Common/FileStream.h" - #include "GamePatch.h" - -#include +#include "HW/Espresso/Debugger/GDBStub.h" #include "Cafe/IOSU/legacy/iosu_ioctl.h" #include "Cafe/IOSU/legacy/iosu_act.h" @@ -39,12 +36,14 @@ #include "Cafe/IOSU/legacy/iosu_boss.h" #include "Cafe/IOSU/legacy/iosu_nim.h" #include "Cafe/IOSU/PDM/iosu_pdm.h" +#include "Cafe/IOSU/ccr_nfc/iosu_ccr_nfc.h" // IOSU initializer functions #include "Cafe/IOSU/kernel/iosu_kernel.h" #include "Cafe/IOSU/fsa/iosu_fsa.h" +#include "Cafe/IOSU/ODM/iosu_odm.h" -// Cafe OS initializer functions +// Cafe OS initializer and shutdown functions #include "Cafe/OS/libs/avm/avm.h" #include "Cafe/OS/libs/drmapp/drmapp.h" #include "Cafe/OS/libs/TCL/TCL.h" @@ -54,11 +53,14 @@ #include "Cafe/OS/libs/gx2/GX2.h" #include "Cafe/OS/libs/gx2/GX2_Misc.h" #include "Cafe/OS/libs/mic/mic.h" +#include "Cafe/OS/libs/nfc/nfc.h" +#include "Cafe/OS/libs/ntag/ntag.h" #include "Cafe/OS/libs/nn_aoc/nn_aoc.h" #include "Cafe/OS/libs/nn_pdm/nn_pdm.h" #include "Cafe/OS/libs/nn_cmpt/nn_cmpt.h" #include "Cafe/OS/libs/nn_ccr/nn_ccr.h" #include "Cafe/OS/libs/nn_temp/nn_temp.h" +#include "Cafe/OS/libs/nn_save/nn_save.h" // HW interfaces #include "Cafe/HW/SI/si.h" @@ -66,6 +68,15 @@ // dependency to be removed #include "gui/guiWrapper.h" +#include + +#if BOOST_OS_LINUX +#include +#elif BOOST_OS_MACOS +#include +#include +#endif + std::string _pathToExecutable; std::string _pathToBaseExecutable; @@ -131,7 +142,7 @@ void LoadMainExecutable() // otherwise search for first file with .rpx extension in the code folder if (!ScanForRPX()) { - forceLog_printf("Unable to find RPX executable"); + cemuLog_log(LogType::Force, "Unable to find RPX executable"); cemuLog_waitForFlush(); cemu_assert(false); } @@ -141,7 +152,7 @@ void LoadMainExecutable() uint8* rpxData = fsc_extractFile(_pathToExecutable.c_str(), &rpxSize); if (rpxData == nullptr) { - forceLog_printf("Failed to load \"%s\"", _pathToExecutable.c_str()); + cemuLog_log(LogType::Force, "Failed to load \"{}\"", _pathToExecutable); cemuLog_waitForFlush(); cemu_assert(false); } @@ -158,7 +169,7 @@ void LoadMainExecutable() { // RPX RPLLoader_AddDependency(_pathToExecutable.c_str()); - applicationRPX = rpl_loadFromMem(rpxData, rpxSize, (char*)_pathToExecutable.c_str()); + applicationRPX = RPLLoader_LoadFromMemory(rpxData, rpxSize, (char*)_pathToExecutable.c_str()); if (!applicationRPX) { wxMessageBox(_("Failed to run this title because the executable is damaged")); @@ -209,7 +220,7 @@ void InfoLog_TitleLoaded() fs::path effectiveSavePath = getTitleSavePath(); std::error_code ec; const bool saveDirExists = fs::exists(effectiveSavePath, ec); - cemuLog_force("Save path: {}{}", _pathToUtf8(effectiveSavePath), saveDirExists ? "" : " (not present)"); + cemuLog_log(LogType::Force, "Save path: {}{}", _pathToUtf8(effectiveSavePath), saveDirExists ? "" : " (not present)"); // log shader cache name cemuLog_log(LogType::Force, "Shader cache file: shaderCache/transferable/{:016x}.bin", titleId); @@ -230,28 +241,21 @@ void InfoLog_TitleLoaded() void InfoLog_PrintActiveSettings() { const auto& config = GetConfig(); - forceLog_printf("------- Active settings -------"); + cemuLog_log(LogType::Force, "------- Active settings -------"); // settings to log: - forceLog_printf("CPU-Mode: %s%s", fmt::format("{}", ActiveSettings::GetCPUMode()).c_str(), g_current_game_profile->GetCPUMode().has_value() ? " (gameprofile)" : ""); - forceLog_printf("Load shared libraries: %s%s", ActiveSettings::LoadSharedLibrariesEnabled() ? "true" : "false", g_current_game_profile->ShouldLoadSharedLibraries().has_value() ? " (gameprofile)" : ""); - forceLog_printf("Use precompiled shaders: %s%s", fmt::format("{}", ActiveSettings::GetPrecompiledShadersOption()).c_str(), g_current_game_profile->GetPrecompiledShadersState().has_value() ? " (gameprofile)" : ""); - forceLog_printf("Full sync at GX2DrawDone: %s", ActiveSettings::WaitForGX2DrawDoneEnabled() ? "true" : "false"); + cemuLog_log(LogType::Force, "CPU-Mode: {}{}", fmt::format("{}", ActiveSettings::GetCPUMode()).c_str(), g_current_game_profile->GetCPUMode().has_value() ? " (gameprofile)" : ""); + cemuLog_log(LogType::Force, "Load shared libraries: {}{}", ActiveSettings::LoadSharedLibrariesEnabled() ? "true" : "false", g_current_game_profile->ShouldLoadSharedLibraries().has_value() ? " (gameprofile)" : ""); + cemuLog_log(LogType::Force, "Use precompiled shaders: {}{}", fmt::format("{}", ActiveSettings::GetPrecompiledShadersOption()), g_current_game_profile->GetPrecompiledShadersState().has_value() ? " (gameprofile)" : ""); + cemuLog_log(LogType::Force, "Full sync at GX2DrawDone: {}", ActiveSettings::WaitForGX2DrawDoneEnabled() ? "true" : "false"); + cemuLog_log(LogType::Force, "Strict shader mul: {}", g_current_game_profile->GetAccurateShaderMul() == AccurateShaderMulOption::True ? "true" : "false"); if (ActiveSettings::GetGraphicsAPI() == GraphicAPI::kVulkan) { - forceLog_printf("Async compile: %s", GetConfig().async_compile.GetValue() ? "true" : "false"); + cemuLog_log(LogType::Force, "Async compile: {}", GetConfig().async_compile.GetValue() ? "true" : "false"); if(!GetConfig().vk_accurate_barriers.GetValue()) - forceLog_printf("Accurate barriers are disabled!"); + cemuLog_log(LogType::Force, "Accurate barriers are disabled!"); } - - forceLog_printf("Console language: %s", fmt::format("{}", config.console_language).c_str()); -} - -void PPCCore_setupSPR(PPCInterpreter_t* hCPU, uint32 coreIndex) -{ - hCPU->sprExtended.PVR = 0x70010001; - hCPU->spr.UPIR = coreIndex; - hCPU->sprExtended.msr |= MSR_FP; // enable floating point + cemuLog_log(LogType::Force, "Console language: {}", stdx::to_underlying(config.console_language.GetValue())); } struct SharedDataEntry @@ -282,14 +286,14 @@ struct static_assert(sizeof(SharedDataEntry) == 0x1C); -uint32 loadSharedData() +uint32 LoadSharedData() { // check if font files are dumped bool hasAllShareddataFiles = true; for (sint32 i = 0; i < sizeof(shareddataDef) / sizeof(shareddataDef[0]); i++) { bool existsInMLC = fs::exists(ActiveSettings::GetMlcPath(shareddataDef[i].mlcPath)); - bool existsInResources = fs::exists(ActiveSettings::GetPath(shareddataDef[i].resourcePath)); + bool existsInResources = fs::exists(ActiveSettings::GetDataPath(shareddataDef[i].resourcePath)); if (!existsInMLC && !existsInResources) { @@ -314,7 +318,7 @@ uint32 loadSharedData() // alternatively fall back to our shared fonts if (!fontFile) { - path = ActiveSettings::GetPath(shareddataDef[i].resourcePath); + path = ActiveSettings::GetDataPath(shareddataDef[i].resourcePath); fontFile = FileStream::openFile2(path); } if (!fontFile) @@ -336,11 +340,11 @@ uint32 loadSharedData() // advance write offset and pad to 16 byte alignment dataWritePtr += ((fileSize + 15) & ~15); } - forceLog_printfW(L"COS: System fonts found. Generated shareddata (%dKB)", (uint32)(dataWritePtr - (uint8*)shareddataTable) / 1024); + cemuLog_log(LogType::Force, "COS: System fonts found. Generated shareddata ({}KB)", (uint32)(dataWritePtr - (uint8*)shareddataTable) / 1024); return memory_getVirtualOffsetFromPointer(dataWritePtr); } // alternative method: load RAM dump - const auto path = ActiveSettings::GetPath("shareddata.bin"); + const auto path = ActiveSettings::GetUserDataPath("shareddata.bin"); FileStream* ramDumpFile = FileStream::openFile2(path); if (ramDumpFile) { @@ -381,7 +385,7 @@ void cemu_initForGame() RPLLoader_Link(); RPLLoader_NotifyControlPassedToApplication(); uint32 linkTime = GetTickCount() - linkTimeStart; - forceLog_printf("RPL link time: %dms", linkTime); + cemuLog_log(LogType::Force, "RPL link time: {}ms", linkTime); // for HBL ELF: Setup OS-specifics struct if (isLaunchTypeELF) { @@ -393,17 +397,24 @@ void cemu_initForGame() // replace any known function signatures with our HLE implementations and patch bugs in the games GamePatch_scan(); } + LatteGPUState.isDRCPrimary = ActiveSettings::DisplayDRCEnabled(); InfoLog_PrintActiveSettings(); Latte_Start(); // check for debugger entrypoint bp + if (g_gdbstub) + { + g_gdbstub->HandleEntryStop(_entryPoint); + g_gdbstub->Initialize(); + } debugger_handleEntryBreakpoint(_entryPoint); // load graphic packs - forceLog_printf("------- Activate graphic packs -------"); + cemuLog_log(LogType::Force, "------- Activate graphic packs -------"); GraphicPack2::ActivateForCurrentTitle(); // print audio log IAudioAPI::PrintLogging(); + IAudioInputAPI::PrintLogging(); // everything initialized - forceLog_printf("------- Run title -------"); + cemuLog_log(LogType::Force, "------- Run title -------"); // wait till GPU thread is initialized while (g_isGPUInitFinished == false) std::this_thread::sleep_for(std::chrono::milliseconds(50)); // init initial thread @@ -412,61 +423,162 @@ void cemu_initForGame() coreinit::OSRunThread(initialThread, PPCInterpreter_makeCallableExportDepr(coreinit_start), 0, nullptr); // init AX and start AX I/O thread snd_core::AXOut_init(); - // init ppc recompiler - PPCRecompiler_init(); -} - -void cemu_deinitForGame() -{ - // reset audio - snd_core::AXOut_reset(); - snd_core::reset(); - // reset alarms - coreinit::OSAlarm_resetAll(); - // delete all threads - PPCCore_deleteAllThreads(); - // reset mount paths - fsc_unmountAll(); - // reset RPL loader - RPLLoader_ResetState(); - // reset GX2 - GX2::_GX2DriverReset(); } namespace CafeSystem { void InitVirtualMlcStorage(); void MlcStorageMountTitle(TitleInfo& titleInfo); + void MlcStorageUnmountAllTitles(); - bool sLaunchModeIsStandalone = false; + static bool s_initialized = false; + static SystemImplementation* s_implementation{nullptr}; + bool sLaunchModeIsStandalone = false; + std::optional> s_overrideArgs; bool sSystemRunning = false; TitleId sForegroundTitleId = 0; GameInfo2 sGameInfo_ForegroundTitle; + + static void _CheckForWine() + { + #if BOOST_OS_WINDOWS + const HMODULE hmodule = GetModuleHandleA("ntdll.dll"); + if (!hmodule) + return; + + const auto pwine_get_version = (const char*(__cdecl*)())GetProcAddress(hmodule, "wine_get_version"); + if (pwine_get_version) + { + cemuLog_log(LogType::Force, "Wine version: {}", pwine_get_version()); + } + #endif + } + + void logCPUAndMemoryInfo() + { + std::string cpuName = g_CPUFeatures.GetCPUName(); + if (!cpuName.empty()) + cemuLog_log(LogType::Force, "CPU: {}", cpuName); + #if BOOST_OS_WINDOWS + MEMORYSTATUSEX statex; + statex.dwLength = sizeof(statex); + GlobalMemoryStatusEx(&statex); + uint32 memoryInMB = (uint32)(statex.ullTotalPhys / 1024LL / 1024LL); + cemuLog_log(LogType::Force, "RAM: {}MB", memoryInMB); + #elif BOOST_OS_LINUX + struct sysinfo info {}; + sysinfo(&info); + cemuLog_log(LogType::Force, "RAM: {}MB", ((static_cast(info.totalram) * info.mem_unit) / 1024LL / 1024LL)); + #elif BOOST_OS_MACOS + int64_t totalRam; + size_t size = sizeof(totalRam); + int result = sysctlbyname("hw.memsize", &totalRam, &size, NULL, 0); + if (result == 0) + cemuLog_log(LogType::Force, "RAM: {}MB", (totalRam / 1024LL / 1024LL)); + #endif + } + + #if BOOST_OS_WINDOWS + std::string GetWindowsNamedVersion(uint32& buildNumber) + { + char productName[256]; + HKEY hKey; + DWORD dwType = REG_SZ; + DWORD dwSize = sizeof(productName); + if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion", 0, KEY_QUERY_VALUE, &hKey) == ERROR_SUCCESS) + { + if (RegQueryValueExA(hKey, "ProductName", NULL, &dwType, (LPBYTE)productName, &dwSize) != ERROR_SUCCESS) + strcpy(productName, "Windows"); + RegCloseKey(hKey); + } + OSVERSIONINFO osvi; + ZeroMemory(&osvi, sizeof(OSVERSIONINFO)); + osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + GetVersionEx(&osvi); + buildNumber = osvi.dwBuildNumber; + return std::string(productName); + } + #endif + + void logPlatformInfo() + { + std::string buffer; + const char* platform = NULL; + #if BOOST_OS_WINDOWS + uint32 buildNumber; + std::string windowsVersionName = GetWindowsNamedVersion(buildNumber); + buffer = fmt::format("{} (Build {})", windowsVersionName, buildNumber); + platform = buffer.c_str(); + #elif BOOST_OS_LINUX + if (getenv ("APPIMAGE")) + platform = "Linux (AppImage)"; + else if (getenv ("SNAP")) + platform = "Linux (Snap)"; + else if (platform = getenv ("container")) + { + if (strcmp (platform, "flatpak") == 0) + platform = "Linux (Flatpak)"; + } + else + platform = "Linux"; + #elif BOOST_OS_MACOS + platform = "MacOS"; + #endif + cemuLog_log(LogType::Force, "Platform: {}", platform); + } + + static std::vector s_iosuModules = + { + // entries in this list are ordered by initialization order. Shutdown in reverse order + iosu::kernel::GetModule(), + iosu::acp::GetModule(), + iosu::fpd::GetModule(), + iosu::pdm::GetModule(), + iosu::ccr_nfc::GetModule(), + }; + + // initialize all subsystems which are persistent and don't depend on a game running void Initialize() { - static bool s_initialized = false; if (s_initialized) return; s_initialized = true; + // init core systems + cemuLog_log(LogType::Force, "------- Init {} -------", BUILD_VERSION_WITH_NAME_STRING); + fsc_init(); + memory_init(); + cemuLog_log(LogType::Force, "Init Wii U memory space (base: 0x{:016x})", (size_t)memory_base); + PPCCore_init(); + RPLLoader_InitState(); + cemuLog_log(LogType::Force, "mlc01 path: {}", _pathToUtf8(ActiveSettings::GetMlcPath())); + _CheckForWine(); + // CPU and RAM info + logCPUAndMemoryInfo(); + logPlatformInfo(); + cemuLog_log(LogType::Force, "Used CPU extensions: {}", g_CPUFeatures.GetCommaSeparatedExtensionList()); + // misc systems + rplSymbolStorage_init(); // allocate memory for all SysAllocators - // must happen before all COS modules, but also before iosu::kernel::Init() + // must happen before COS module init, but also before iosu::kernel::Initialize() SysAllocatorContainer::GetInstance().Initialize(); - // init IOSU - iosu::kernel::Initialize(); + // init IOSU modules + for(auto& module : s_iosuModules) + module->SystemLaunch(); + // init IOSU (deprecated manual init) + iosuCrypto_init(); iosu::fsa::Initialize(); iosuIoctl_init(); iosuAct_init_depr(); iosu::act::Initialize(); - iosu::fpd::Initialize(); iosu::iosuMcp_init(); iosu::mcp::Init(); iosu::iosuAcp_init(); iosu::boss_init(); iosu::nim::Initialize(); - iosu::pdm::Initialize(); + iosu::odm::Initialize(); // init Cafe OS avm::Initialize(); drmapp::Initialize(); @@ -480,49 +592,86 @@ namespace CafeSystem H264::Initialize(); snd_core::Initialize(); mic::Initialize(); + nfc::Initialize(); + ntag::Initialize(); // init hardware register interfaces HW_SI::Initialize(); } + void SetImplementation(SystemImplementation* impl) + { + s_implementation = impl; + } + + void Shutdown() + { + cemu_assert_debug(s_initialized); + // if a title is running, shut it down + if (sSystemRunning) + ShutdownTitle(); + // shutdown persistent subsystems (deprecated manual shutdown) + iosu::odm::Shutdown(); + iosu::act::Stop(); + iosu::mcp::Shutdown(); + iosu::fsa::Shutdown(); + // shutdown IOSU modules + for(auto it = s_iosuModules.rbegin(); it != s_iosuModules.rend(); ++it) + (*it)->SystemExit(); + s_initialized = false; + } + std::string GetInternalVirtualCodeFolder() { return "/internal/current_title/code/"; } - STATUS_CODE LoadAndMountForegroundTitle(TitleId titleId) + void MountBaseDirectories() + { + const auto mlc = ActiveSettings::GetMlcPath(); + FSCDeviceHostFS_Mount("/cemuBossStorage/", _pathToUtf8(mlc / "usr/boss/"), FSC_PRIORITY_BASE); + FSCDeviceHostFS_Mount("/vol/storage_mlc01/", _pathToUtf8(mlc / ""), FSC_PRIORITY_BASE); + } + + void UnmountBaseDirectories() + { + fsc_unmount("/vol/storage_mlc01/", FSC_PRIORITY_BASE); + fsc_unmount("/cemuBossStorage/", FSC_PRIORITY_BASE); + } + + PREPARE_STATUS_CODE LoadAndMountForegroundTitle(TitleId titleId) { - cemuLog_log(LogType::Force, "Mounting title {:016x}", (uint64)titleId); + cemuLog_log(LogType::Force, "Mounting title {:016x}", (uint64)titleId); sGameInfo_ForegroundTitle = CafeTitleList::GetGameInfo(titleId); if (!sGameInfo_ForegroundTitle.IsValid()) { cemuLog_log(LogType::Force, "Mounting failed: Game meta information is either missing, inaccessible or not valid (missing or invalid .xml files in code and meta folder)"); - return STATUS_CODE::UNABLE_TO_MOUNT; + return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT; } // check base TitleInfo& titleBase = sGameInfo_ForegroundTitle.GetBase(); if (!titleBase.IsValid()) - return STATUS_CODE::UNABLE_TO_MOUNT; + return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT; if(!titleBase.ParseXmlInfo()) - return STATUS_CODE::UNABLE_TO_MOUNT; + return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT; cemuLog_log(LogType::Force, "Base: {}", titleBase.GetPrintPath()); // mount base if (!titleBase.Mount("/vol/content", "content", FSC_PRIORITY_BASE) || !titleBase.Mount(GetInternalVirtualCodeFolder(), "code", FSC_PRIORITY_BASE)) { cemuLog_log(LogType::Force, "Mounting failed"); - return STATUS_CODE::UNABLE_TO_MOUNT; + return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT; } // check update TitleInfo& titleUpdate = sGameInfo_ForegroundTitle.GetUpdate(); if (titleUpdate.IsValid()) { if (!titleUpdate.ParseXmlInfo()) - return STATUS_CODE::UNABLE_TO_MOUNT; + return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT; cemuLog_log(LogType::Force, "Update: {}", titleUpdate.GetPrintPath()); // mount update if (!titleUpdate.Mount("/vol/content", "content", FSC_PRIORITY_PATCH) || !titleUpdate.Mount(GetInternalVirtualCodeFolder(), "code", FSC_PRIORITY_PATCH)) { cemuLog_log(LogType::Force, "Mounting failed"); - return STATUS_CODE::UNABLE_TO_MOUNT; + return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT; } } else @@ -534,26 +683,49 @@ namespace CafeSystem // todo - support for multi-title AOC TitleInfo& titleAOC = aocList[0]; if (!titleAOC.ParseXmlInfo()) - return STATUS_CODE::UNABLE_TO_MOUNT; + return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT; cemu_assert_debug(titleAOC.IsValid()); cemuLog_log(LogType::Force, "DLC: {}", titleAOC.GetPrintPath()); // mount AOC if (!titleAOC.Mount(fmt::format("/vol/aoc{:016x}", titleAOC.GetAppTitleId()), "content", FSC_PRIORITY_PATCH)) { cemuLog_log(LogType::Force, "Mounting failed"); - return STATUS_CODE::UNABLE_TO_MOUNT; + return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT; } } else cemuLog_log(LogType::Force, "DLC: Not present"); sForegroundTitleId = titleId; - return STATUS_CODE::SUCCESS; + return PREPARE_STATUS_CODE::SUCCESS; } - STATUS_CODE SetupExecutable() + void UnmountForegroundTitle() + { + if(sLaunchModeIsStandalone) + return; + cemu_assert_debug(sGameInfo_ForegroundTitle.IsValid()); // unmounting title which was never mounted? + if (!sGameInfo_ForegroundTitle.IsValid()) + return; + sGameInfo_ForegroundTitle.GetBase().Unmount("/vol/content"); + sGameInfo_ForegroundTitle.GetBase().Unmount(GetInternalVirtualCodeFolder()); + if (sGameInfo_ForegroundTitle.HasUpdate()) + { + if(auto& update = sGameInfo_ForegroundTitle.GetUpdate(); update.IsValid()) + { + update.Unmount("/vol/content"); + update.Unmount(GetInternalVirtualCodeFolder()); + } + } + auto aocList = sGameInfo_ForegroundTitle.GetAOC(); + if (!aocList.empty()) + { + TitleInfo& titleAOC = aocList[0]; + titleAOC.Unmount(fmt::format("/vol/aoc{:016x}", titleAOC.GetAppTitleId())); + } + } + + PREPARE_STATUS_CODE SetupExecutable() { - // mount mlc directories - fscDeviceHostFS_mapBaseDirectories_deprecated(); // set rpx path from cos.xml if available _pathToBaseExecutable = _pathToExecutable; if (!sLaunchModeIsStandalone) @@ -577,42 +749,53 @@ namespace CafeSystem const auto file = fsc_open(rpxPath.c_str(), FSC_ACCESS_FLAG::OPEN_FILE | FSC_ACCESS_FLAG::READ_PERMISSION, &status); if (file) { - _pathToExecutable = rpxPath; + _pathToExecutable = std::move(rpxPath); fsc_close(file); } } } } LoadMainExecutable(); - gameProfile_load(); - return STATUS_CODE::SUCCESS; + return PREPARE_STATUS_CODE::SUCCESS; } - STATUS_CODE PrepareForegroundTitle(TitleId titleId) + void SetupMemorySpace() + { + memory_mapForCurrentTitle(); + LoadSharedData(); + } + + void DestroyMemorySpace() + { + memory_unmapForCurrentTitle(); + } + + PREPARE_STATUS_CODE PrepareForegroundTitle(TitleId titleId) { CafeTitleList::WaitForMandatoryScan(); sLaunchModeIsStandalone = false; + _pathToExecutable.clear(); TitleIdParser tip(titleId); if (tip.GetType() == TitleIdParser::TITLE_TYPE::AOC || tip.GetType() == TitleIdParser::TITLE_TYPE::BASE_TITLE_UPDATE) cemuLog_log(LogType::Force, "Launched titleId is not the base of a title"); - - // mount title folders - STATUS_CODE r = LoadAndMountForegroundTitle(titleId); - if (r != STATUS_CODE::SUCCESS) + // mount mlc storage + MountBaseDirectories(); + // mount title folders + PREPARE_STATUS_CODE r = LoadAndMountForegroundTitle(titleId); + if (r != PREPARE_STATUS_CODE::SUCCESS) return r; - // map memory - memory_mapForCurrentTitle(); - // load RPX - r = SetupExecutable(); - if (r != STATUS_CODE::SUCCESS) + gameProfile_load(); + // setup memory space and PPC recompiler + SetupMemorySpace(); + PPCRecompiler_init(); + r = SetupExecutable(); // load RPX + if (r != PREPARE_STATUS_CODE::SUCCESS) return r; - - loadSharedData(); InitVirtualMlcStorage(); - return STATUS_CODE::SUCCESS; + return PREPARE_STATUS_CODE::SUCCESS; } - STATUS_CODE PrepareForegroundTitleFromStandaloneRPX(const fs::path& path) + PREPARE_STATUS_CODE PrepareForegroundTitleFromStandaloneRPX(const fs::path& path) { sLaunchModeIsStandalone = true; cemuLog_log(LogType::Force, "Launching executable in standalone mode due to incorrect layout or missing meta files"); @@ -630,7 +813,7 @@ namespace CafeSystem if (!r) { cemuLog_log(LogType::Force, "Failed to mount {}", _pathToUtf8(contentPath)); - return STATUS_CODE::UNABLE_TO_MOUNT; + return PREPARE_STATUS_CODE::UNABLE_TO_MOUNT; } } } @@ -642,28 +825,29 @@ namespace CafeSystem // since a lot of systems (including save folder location) rely on a TitleId, we derive a placeholder id from the executable hash auto execData = fsc_extractFile(_pathToExecutable.c_str()); if (!execData) - return STATUS_CODE::INVALID_RPX; + return PREPARE_STATUS_CODE::INVALID_RPX; uint32 h = generateHashFromRawRPXData(execData->data(), execData->size()); sForegroundTitleId = 0xFFFFFFFF00000000ULL | (uint64)h; cemuLog_log(LogType::Force, "Generated placeholder TitleId: {:016x}", sForegroundTitleId); - // load executable - memory_mapForCurrentTitle(); - SetupExecutable(); - loadSharedData(); + // setup memory space and ppc recompiler + SetupMemorySpace(); + PPCRecompiler_init(); + // load executable + SetupExecutable(); InitVirtualMlcStorage(); - return STATUS_CODE::SUCCESS; + return PREPARE_STATUS_CODE::SUCCESS; } void _LaunchTitleThread() { - // init + for(auto& module : s_iosuModules) + module->TitleStart(); cemu_initForGame(); // enter scheduler - if (ActiveSettings::GetCPUMode() == CPUMode::MulticoreRecompiler) + if ((ActiveSettings::GetCPUMode() == CPUMode::MulticoreRecompiler || LaunchSettings::ForceMultiCoreInterpreter()) && !LaunchSettings::ForceInterpreter()) coreinit::OSSchedulerBegin(3); else coreinit::OSSchedulerBegin(1); - iosu::pdm::StartTrackingTime(GetForegroundTitleId()); } void LaunchForegroundTitle() @@ -694,6 +878,13 @@ namespace CafeSystem return sGameInfo_ForegroundTitle.GetVersion(); } + uint32 GetForegroundTitleSDKVersion() + { + if (sLaunchModeIsStandalone) + return 999999; + return sGameInfo_ForegroundTitle.GetSDKVersion(); + } + CafeConsoleRegion GetForegroundTitleRegion() { if (sLaunchModeIsStandalone) @@ -704,9 +895,21 @@ namespace CafeSystem std::string GetForegroundTitleName() { if (sLaunchModeIsStandalone) - return "Missing meta data"; - // todo - use language based on Cemu console language - return sGameInfo_ForegroundTitle.GetBase().GetMetaInfo()->GetShortName(CafeConsoleLanguage::EN); + return "Unknown Game"; + std::string applicationName; + applicationName = sGameInfo_ForegroundTitle.GetBase().GetMetaInfo()->GetShortName(GetConfig().console_language); + if (applicationName.empty()) //Try to get the English Title + applicationName = sGameInfo_ForegroundTitle.GetBase().GetMetaInfo()->GetShortName(CafeConsoleLanguage::EN); + if (applicationName.empty()) //Unknown Game + applicationName = "Unknown Game"; + return applicationName; + } + + uint32 GetForegroundTitleOlvAccesskey() + { + if (sLaunchModeIsStandalone) + return -1; + return sGameInfo_ForegroundTitle.GetBase().GetMetaInfo()->GetOlvAccesskey(); } std::string GetForegroundTitleArgStr() @@ -719,6 +922,47 @@ namespace CafeSystem return sGameInfo_ForegroundTitle.GetBase().GetArgStr(); } + CosCapabilityBits GetForegroundTitleCosCapabilities(CosCapabilityGroup group) + { + if (sLaunchModeIsStandalone) + return CosCapabilityBits::All; + auto& update = sGameInfo_ForegroundTitle.GetUpdate(); + if (update.IsValid()) + { + ParsedCosXml* cosXml = update.GetCosInfo(); + if (cosXml) + return cosXml->GetCapabilityBits(group); + } + auto& base = sGameInfo_ForegroundTitle.GetBase(); + if(base.IsValid()) + { + ParsedCosXml* cosXml = base.GetCosInfo(); + if (cosXml) + return cosXml->GetCapabilityBits(group); + } + return CosCapabilityBits::All; + } + + // when switching titles custom parameters can be passed, returns true if override args are used + bool GetOverrideArgStr(std::vector& args) + { + args.clear(); + if(!s_overrideArgs) + return false; + args = *s_overrideArgs; + return true; + } + + void SetOverrideArgs(std::span args) + { + s_overrideArgs = std::vector(args.begin(), args.end()); + } + + void UnsetOverrideArgs() + { + s_overrideArgs = std::nullopt; + } + // pick platform region based on title region CafeConsoleRegion GetPlatformRegion() { @@ -735,39 +979,32 @@ namespace CafeSystem void UnmountCurrentTitle() { - TitleInfo& titleBase = sGameInfo_ForegroundTitle.GetBase(); - if (titleBase.IsValid()) - titleBase.UnmountAll(); - if (sGameInfo_ForegroundTitle.HasUpdate()) - { - TitleInfo& titleUpdate = sGameInfo_ForegroundTitle.GetUpdate(); - if (titleUpdate.IsValid()) - titleUpdate.UnmountAll(); - } - if (sGameInfo_ForegroundTitle.HasAOC()) - { - auto titleInfoList = sGameInfo_ForegroundTitle.GetAOC(); - for(auto& it : titleInfoList) - { - if (it.IsValid()) - it.UnmountAll(); - } - } - fsc_unmount("/internal/code/", FSC_PRIORITY_BASE); + UnmountForegroundTitle(); + fsc_unmount("/internal/code/", FSC_PRIORITY_BASE); } void ShutdownTitle() { if(!sSystemRunning) return; - coreinit::OSSchedulerEnd(); - Latte_Stop(); - iosu::pdm::Stop(); - iosu::act::Stop(); - iosu::mcp::Shutdown(); - iosu::fsa::Shutdown(); - GraphicPack2::Reset(); - UnmountCurrentTitle(); + coreinit::OSSchedulerEnd(); + Latte_Stop(); + // reset Cafe OS userspace modules + snd_core::reset(); + coreinit::OSAlarm_Shutdown(); + GX2::_GX2DriverReset(); + nn::save::ResetToDefaultState(); + coreinit::__OSDeleteAllActivePPCThreads(); + RPLLoader_ResetState(); + for(auto it = s_iosuModules.rbegin(); it != s_iosuModules.rend(); ++it) + (*it)->TitleStop(); + // reset Cemu subsystems + PPCRecompiler_Shutdown(); + GraphicPack2::Reset(); + UnmountCurrentTitle(); + MlcStorageUnmountAllTitles(); + UnmountBaseDirectories(); + DestroyMemorySpace(); sSystemRunning = false; } @@ -817,10 +1054,7 @@ namespace CafeSystem } TitleId titleId = titleInfo.GetAppTitleId(); if (m_mlcMountedTitles.find(titleId) != m_mlcMountedTitles.end()) - { - cemu_assert_suspicious(); // already mounted return; - } std::string mlcStoragePath = GetMlcStoragePath(titleId); TitleInfo* mountTitleInfo = new TitleInfo(titleInfo); if (!mountTitleInfo->Mount(mlcStoragePath, "", FSC_PRIORITY_BASE)) @@ -847,6 +1081,16 @@ namespace CafeSystem MlcStorageMountTitle(it); } + void MlcStorageUnmountAllTitles() + { + for(auto& it : m_mlcMountedTitles) + { + std::string mlcStoragePath = GetMlcStoragePath(it.first); + it.second->Unmount(mlcStoragePath); + } + m_mlcMountedTitles.clear(); + } + uint32 GetRPXHashBase() { return currentBaseApplicationHash; @@ -857,4 +1101,9 @@ namespace CafeSystem return currentUpdatedApplicationHash; } -} \ No newline at end of file + void RequestRecreateCanvas() + { + s_implementation->CafeRecreateCanvas(); + } + +} diff --git a/src/Cafe/CafeSystem.h b/src/Cafe/CafeSystem.h index dce0b940..e9de8d7d 100644 --- a/src/Cafe/CafeSystem.h +++ b/src/Cafe/CafeSystem.h @@ -4,28 +4,46 @@ #include "Cafe/TitleList/TitleId.h" #include "config/CemuConfig.h" +enum class CosCapabilityBits : uint64; +enum class CosCapabilityGroup : uint32; + namespace CafeSystem { - enum class STATUS_CODE + class SystemImplementation + { + public: + virtual void CafeRecreateCanvas() = 0; + }; + + enum class PREPARE_STATUS_CODE { SUCCESS, INVALID_RPX, UNABLE_TO_MOUNT, // failed to mount through TitleInfo (most likely caused by an invalid or outdated path) - //BAD_META_DATA, - the title list only stores titles with valid meta, so this error code is impossible }; void Initialize(); - STATUS_CODE PrepareForegroundTitle(TitleId titleId); - STATUS_CODE PrepareForegroundTitleFromStandaloneRPX(const fs::path& path); + void SetImplementation(SystemImplementation* impl); + void Shutdown(); + + PREPARE_STATUS_CODE PrepareForegroundTitle(TitleId titleId); + PREPARE_STATUS_CODE PrepareForegroundTitleFromStandaloneRPX(const fs::path& path); void LaunchForegroundTitle(); bool IsTitleRunning(); + bool GetOverrideArgStr(std::vector& args); + void SetOverrideArgs(std::span args); + void UnsetOverrideArgs(); + TitleId GetForegroundTitleId(); uint16 GetForegroundTitleVersion(); + uint32 GetForegroundTitleSDKVersion(); CafeConsoleRegion GetForegroundTitleRegion(); CafeConsoleRegion GetPlatformRegion(); std::string GetForegroundTitleName(); std::string GetForegroundTitleArgStr(); + uint32 GetForegroundTitleOlvAccesskey(); + CosCapabilityBits GetForegroundTitleCosCapabilities(CosCapabilityGroup group); void ShutdownTitle(); @@ -36,6 +54,8 @@ namespace CafeSystem uint32 GetRPXHashBase(); uint32 GetRPXHashUpdated(); + + void RequestRecreateCanvas(); }; extern RPLModule* applicationRPX; diff --git a/src/Cafe/Filesystem/FST/FST.cpp b/src/Cafe/Filesystem/FST/FST.cpp index a4bbfeed..ec112b9a 100644 --- a/src/Cafe/Filesystem/FST/FST.cpp +++ b/src/Cafe/Filesystem/FST/FST.cpp @@ -3,8 +3,7 @@ #include "Cemu/ncrypto/ncrypto.h" #include "Cafe/Filesystem/WUD/wud.h" #include "util/crypto/aes128.h" -#include "openssl/evp.h" /* EVP_Digest */ -#include "openssl/sha.h" /* SHA1 / SHA256_DIGEST_LENGTH */ +#include "openssl/sha.h" /* SHA1 / SHA256 */ #include "fstUtil.h" #include "FST.h" @@ -12,6 +11,10 @@ #include "boost/range/adaptor/reversed.hpp" +#define SET_FST_ERROR(__code) if (errorCodeOut) *errorCodeOut = ErrorCode::__code + +static_assert(sizeof(NCrypto::AesIv) == 16); // make sure IV is actually 16 bytes + class FSTDataSource { public: @@ -139,7 +142,7 @@ struct DiscPartitionTableHeader static constexpr uint32 MAGIC_VALUE = 0xCCA6E67B; /* +0x00 */ uint32be magic; - /* +0x04 */ uint32be sectorSize; // must be 0x8000? + /* +0x04 */ uint32be blockSize; // must be 0x8000? /* +0x08 */ uint8 partitionTableHash[20]; // hash of the data range at +0x800 to end of sector (0x8000) /* +0x1C */ uint32be numPartitions; }; @@ -162,10 +165,10 @@ struct DiscPartitionHeader static constexpr uint32 MAGIC_VALUE = 0xCC93A4F5; /* +0x00 */ uint32be magic; - /* +0x04 */ uint32be sectorSize; // must match DISC_SECTOR_SIZE + /* +0x04 */ uint32be sectorSize; // must match DISC_SECTOR_SIZE for hashed blocks /* +0x08 */ uint32be ukn008; - /* +0x0C */ uint32be ukn00C; + /* +0x0C */ uint32be ukn00C; // h3 array size? /* +0x10 */ uint32be h3HashNum; /* +0x14 */ uint32be fstSize; // in bytes /* +0x18 */ uint32be fstSector; // relative to partition start @@ -176,13 +179,15 @@ struct DiscPartitionHeader /* +0x24 */ uint8 fstHashType; /* +0x25 */ uint8 fstEncryptionType; // purpose of this isn't really understood. Maybe it controls which key is being used? (1 -> disc key, 2 -> partition key) - /* +0x26 */ uint8 versionA; - /* +0x27 */ uint8 ukn027; // also a version field? + /* +0x26 */ uint8be versionA; + /* +0x27 */ uint8be ukn027; // also a version field? // there is an array at +0x40 ? Related to H3 list. Also related to value at +0x0C and h3HashNum + /* +0x28 */ uint8be _uknOrPadding028[0x18]; + /* +0x40 */ uint8be h3HashArray[32]; // dynamic size. Only present if fstHashType != 0 }; -static_assert(sizeof(DiscPartitionHeader) == 0x28); +static_assert(sizeof(DiscPartitionHeader) == 0x40+0x20); bool FSTVolume::FindDiscKey(const fs::path& path, NCrypto::AesKey& discTitleKey) { @@ -215,23 +220,22 @@ bool FSTVolume::FindDiscKey(const fs::path& path, NCrypto::AesKey& discTitleKey) // open WUD image using key cache // if no matching key is found then keyFound will return false -FSTVolume* FSTVolume::OpenFromDiscImage(const fs::path& path, bool* keyFound) +FSTVolume* FSTVolume::OpenFromDiscImage(const fs::path& path, ErrorCode* errorCodeOut) { + SET_FST_ERROR(UNKNOWN_ERROR); KeyCache_Prepare(); NCrypto::AesKey discTitleKey; if (!FindDiscKey(path, discTitleKey)) { - if(keyFound) - *keyFound = false; + SET_FST_ERROR(DISC_KEY_MISSING); return nullptr; } - if(keyFound) - *keyFound = true; - return OpenFromDiscImage(path, discTitleKey); + return OpenFromDiscImage(path, discTitleKey, errorCodeOut); } // open WUD image -FSTVolume* FSTVolume::OpenFromDiscImage(const fs::path& path, NCrypto::AesKey& discTitleKey) +FSTVolume* FSTVolume::OpenFromDiscImage(const fs::path& path, NCrypto::AesKey& discTitleKey, ErrorCode* errorCodeOut) + { // WUD images support multiple partitions, each with their own key and FST // the process for loading game data FSTVolume from a WUD image is as follows: @@ -240,6 +244,7 @@ FSTVolume* FSTVolume::OpenFromDiscImage(const fs::path& path, NCrypto::AesKey& d // 3) find main GM partition // 4) use SI information to get titleKey for GM partition // 5) Load FST for GM + SET_FST_ERROR(UNKNOWN_ERROR); std::unique_ptr dataSource(FSTDataSourceWUD::Open(path)); if (!dataSource) return nullptr; @@ -267,7 +272,7 @@ FSTVolume* FSTVolume::OpenFromDiscImage(const fs::path& path, NCrypto::AesKey& d cemuLog_log(LogType::Force, "Disc image rejected because decryption failed"); return nullptr; } - if (partitionHeader->sectorSize != DISC_SECTOR_SIZE) + if (partitionHeader->blockSize != DISC_SECTOR_SIZE) { cemuLog_log(LogType::Force, "Disc image rejected because partition sector size is invalid"); return nullptr; @@ -334,6 +339,9 @@ FSTVolume* FSTVolume::OpenFromDiscImage(const fs::path& path, NCrypto::AesKey& d cemu_assert_debug(partitionHeaderSI.fstEncryptionType == 1); // todo - check other fields? + if(partitionHeaderSI.fstHashType == 0 && partitionHeaderSI.h3HashNum != 0) + cemuLog_log(LogType::Force, "FST: Partition uses unhashed blocks but stores a non-zero amount of H3 hashes"); + // GM partition DiscPartitionHeader partitionHeaderGM{}; if (!readPartitionHeader(partitionHeaderGM, gmPartitionIndex)) @@ -347,9 +355,10 @@ FSTVolume* FSTVolume::OpenFromDiscImage(const fs::path& path, NCrypto::AesKey& d // if decryption is necessary // load SI FST dataSource->SetBaseOffset((uint64)partitionArray[siPartitionIndex].partitionAddress * DISC_SECTOR_SIZE); - auto siFST = OpenFST(dataSource.get(), (uint64)partitionHeaderSI.fstSector * DISC_SECTOR_SIZE, partitionHeaderSI.fstSize, &discTitleKey, static_cast(partitionHeaderSI.fstHashType)); + auto siFST = OpenFST(dataSource.get(), (uint64)partitionHeaderSI.fstSector * DISC_SECTOR_SIZE, partitionHeaderSI.fstSize, &discTitleKey, static_cast(partitionHeaderSI.fstHashType), nullptr); if (!siFST) return nullptr; + cemu_assert_debug(!(siFST->HashIsDisabled() && partitionHeaderSI.h3HashNum != 0)); // if hash is disabled, no H3 data may be present // load ticket file for partition that we want to decrypt NCrypto::ETicketParser ticketParser; std::vector ticketData = siFST->ExtractFile(fmt::format("{:02x}/title.tik", gmPartitionIndex)); @@ -358,18 +367,38 @@ FSTVolume* FSTVolume::OpenFromDiscImage(const fs::path& path, NCrypto::AesKey& d cemuLog_log(LogType::Force, "Disc image ticket file is invalid"); return nullptr; } +#if 0 + // each SI partition seems to contain a title.tmd that we could parse and which should have information about the associated GM partition + // but the console seems to ignore this file for disc images, at least when mounting, so we shouldn't rely on it either + std::vector tmdData = siFST->ExtractFile(fmt::format("{:02x}/title.tmd", gmPartitionIndex)); + if (tmdData.empty()) + { + cemuLog_log(LogType::Force, "Disc image TMD file is missing"); + return nullptr; + } + // parse TMD + NCrypto::TMDParser tmdParser; + if (!tmdParser.parse(tmdData.data(), tmdData.size())) + { + cemuLog_log(LogType::Force, "Disc image TMD file is invalid"); + return nullptr; + } +#endif delete siFST; - NCrypto::AesKey gmTitleKey; ticketParser.GetTitleKey(gmTitleKey); - // load GM partition dataSource->SetBaseOffset((uint64)partitionArray[gmPartitionIndex].partitionAddress * DISC_SECTOR_SIZE); - return OpenFST(std::move(dataSource), (uint64)partitionHeaderGM.fstSector * DISC_SECTOR_SIZE, partitionHeaderGM.fstSize, &gmTitleKey, static_cast(partitionHeaderGM.fstHashType)); + FSTVolume* r = OpenFST(std::move(dataSource), (uint64)partitionHeaderGM.fstSector * DISC_SECTOR_SIZE, partitionHeaderGM.fstSize, &gmTitleKey, static_cast(partitionHeaderGM.fstHashType), nullptr); + if (r) + SET_FST_ERROR(OK); + cemu_assert_debug(!(r->HashIsDisabled() && partitionHeaderGM.h3HashNum != 0)); // if hash is disabled, no H3 data may be present + return r; } -FSTVolume* FSTVolume::OpenFromContentFolder(fs::path folderPath) +FSTVolume* FSTVolume::OpenFromContentFolder(fs::path folderPath, ErrorCode* errorCodeOut) { + SET_FST_ERROR(UNKNOWN_ERROR); // load TMD FileStream* tmdFile = FileStream::openFile2(folderPath / "title.tmd"); if (!tmdFile) @@ -379,17 +408,26 @@ FSTVolume* FSTVolume::OpenFromContentFolder(fs::path folderPath) delete tmdFile; NCrypto::TMDParser tmdParser; if (!tmdParser.parse(tmdData.data(), tmdData.size())) + { + SET_FST_ERROR(BAD_TITLE_TMD); return nullptr; + } // load ticket FileStream* ticketFile = FileStream::openFile2(folderPath / "title.tik"); if (!ticketFile) + { + SET_FST_ERROR(TITLE_TIK_MISSING); return nullptr; + } std::vector ticketData; ticketFile->extract(ticketData); delete ticketFile; NCrypto::ETicketParser ticketParser; if (!ticketParser.parse(ticketData.data(), ticketData.size())) + { + SET_FST_ERROR(BAD_TITLE_TIK); return nullptr; + } NCrypto::AesKey titleKey; ticketParser.GetTitleKey(titleKey); // open data source @@ -411,13 +449,15 @@ FSTVolume* FSTVolume::OpenFromContentFolder(fs::path folderPath) } // load FST // fstSize = size of first cluster? - FSTVolume* fstVolume = FSTVolume::OpenFST(std::move(dataSource), 0, fstSize, &titleKey, fstHashMode); + FSTVolume* fstVolume = FSTVolume::OpenFST(std::move(dataSource), 0, fstSize, &titleKey, fstHashMode, &tmdParser); + if (fstVolume) + SET_FST_ERROR(OK); return fstVolume; } -FSTVolume* FSTVolume::OpenFST(FSTDataSource* dataSource, uint64 fstOffset, uint32 fstSize, NCrypto::AesKey* partitionTitleKey, ClusterHashMode fstHashMode) +FSTVolume* FSTVolume::OpenFST(FSTDataSource* dataSource, uint64 fstOffset, uint32 fstSize, NCrypto::AesKey* partitionTitleKey, ClusterHashMode fstHashMode, NCrypto::TMDParser* optionalTMD) { - cemu_assert_debug(fstHashMode != ClusterHashMode::RAW || fstHashMode != ClusterHashMode::RAW2); + cemu_assert_debug(fstHashMode != ClusterHashMode::RAW || fstHashMode != ClusterHashMode::RAW_STREAM); if (fstSize < sizeof(FSTHeader)) return nullptr; constexpr uint64 FST_CLUSTER_OFFSET = 0; @@ -448,6 +488,34 @@ FSTVolume* FSTVolume::OpenFST(FSTDataSource* dataSource, uint64 fstOffset, uint3 clusterTable[i].offset = clusterDataTable[i].offset; clusterTable[i].size = clusterDataTable[i].size; clusterTable[i].hashMode = static_cast((uint8)clusterDataTable[i].hashMode); + clusterTable[i].hasContentHash = false; // from the TMD file (H4?) + } + // if the TMD is available (when opening .app files) we can use the extra info from it to validate unhashed clusters + // each content entry in the TMD corresponds to one cluster used by the FST + if(optionalTMD) + { + if(numCluster != optionalTMD->GetContentList().size()) + { + cemuLog_log(LogType::Force, "FST: Number of clusters does not match TMD content list"); + return nullptr; + } + auto& contentList = optionalTMD->GetContentList(); + for(size_t i=0; im_offsetFactor = fstHeader->offsetFactor; fstVolume->m_sectorSize = DISC_SECTOR_SIZE; fstVolume->m_partitionTitlekey = *partitionTitleKey; - std::swap(fstVolume->m_cluster, clusterTable); - std::swap(fstVolume->m_entries, fstEntries); - std::swap(fstVolume->m_nameStringTable, nameStringTable); + fstVolume->m_hashIsDisabled = fstHeader->hashIsDisabled != 0; + fstVolume->m_cluster = std::move(clusterTable); + fstVolume->m_entries = std::move(fstEntries); + fstVolume->m_nameStringTable = std::move(nameStringTable); return fstVolume; } -FSTVolume* FSTVolume::OpenFST(std::unique_ptr dataSource, uint64 fstOffset, uint32 fstSize, NCrypto::AesKey* partitionTitleKey, ClusterHashMode fstHashMode) +FSTVolume* FSTVolume::OpenFST(std::unique_ptr dataSource, uint64 fstOffset, uint32 fstSize, NCrypto::AesKey* partitionTitleKey, ClusterHashMode fstHashMode, NCrypto::TMDParser* optionalTMD) { FSTDataSource* ds = dataSource.release(); - FSTVolume* fstVolume = OpenFST(ds, fstOffset, fstSize, partitionTitleKey, fstHashMode); + FSTVolume* fstVolume = OpenFST(ds, fstOffset, fstSize, partitionTitleKey, fstHashMode, optionalTMD); if (!fstVolume) { delete ds; @@ -669,25 +738,25 @@ bool FSTVolume::OpenFile(std::string_view path, FSTFileHandle& fileHandleOut, bo return true; } -bool FSTVolume::IsDirectory(FSTFileHandle& fileHandle) const +bool FSTVolume::IsDirectory(const FSTFileHandle& fileHandle) const { cemu_assert_debug(fileHandle.m_fstIndex < m_entries.size()); return m_entries[fileHandle.m_fstIndex].GetType() == FSTEntry::TYPE::DIRECTORY; }; -bool FSTVolume::IsFile(FSTFileHandle& fileHandle) const +bool FSTVolume::IsFile(const FSTFileHandle& fileHandle) const { cemu_assert_debug(fileHandle.m_fstIndex < m_entries.size()); return m_entries[fileHandle.m_fstIndex].GetType() == FSTEntry::TYPE::FILE; }; -bool FSTVolume::HasLinkFlag(FSTFileHandle& fileHandle) const +bool FSTVolume::HasLinkFlag(const FSTFileHandle& fileHandle) const { cemu_assert_debug(fileHandle.m_fstIndex < m_entries.size()); return HAS_FLAG(m_entries[fileHandle.m_fstIndex].GetFlags(), FSTEntry::FLAGS::FLAG_LINK); }; -std::string_view FSTVolume::GetName(FSTFileHandle& fileHandle) const +std::string_view FSTVolume::GetName(const FSTFileHandle& fileHandle) const { if (fileHandle.m_fstIndex > m_entries.size()) return ""; @@ -695,7 +764,7 @@ std::string_view FSTVolume::GetName(FSTFileHandle& fileHandle) const return entryName; } -std::string FSTVolume::GetPath(FSTFileHandle& fileHandle) const +std::string FSTVolume::GetPath(const FSTFileHandle& fileHandle) const { std::string path; auto& entry = m_entries[fileHandle.m_fstIndex]; @@ -726,7 +795,7 @@ std::string FSTVolume::GetPath(FSTFileHandle& fileHandle) const return path; } -uint32 FSTVolume::GetFileSize(FSTFileHandle& fileHandle) const +uint32 FSTVolume::GetFileSize(const FSTFileHandle& fileHandle) const { if (m_entries[fileHandle.m_fstIndex].GetType() != FSTEntry::TYPE::FILE) return 0; @@ -740,7 +809,7 @@ uint32 FSTVolume::ReadFile(FSTFileHandle& fileHandle, uint32 offset, uint32 size return 0; cemu_assert_debug(!HAS_FLAG(entry.GetFlags(), FSTEntry::FLAGS::FLAG_LINK)); FSTCluster& cluster = m_cluster[entry.fileInfo.clusterIndex]; - if (cluster.hashMode == ClusterHashMode::RAW || cluster.hashMode == ClusterHashMode::RAW2) + if (cluster.hashMode == ClusterHashMode::RAW || cluster.hashMode == ClusterHashMode::RAW_STREAM) return ReadFile_HashModeRaw(entry.fileInfo.clusterIndex, entry, offset, size, dataOut); else if (cluster.hashMode == ClusterHashMode::HASH_INTERLEAVED) return ReadFile_HashModeHashed(entry.fileInfo.clusterIndex, entry, offset, size, dataOut); @@ -748,87 +817,15 @@ uint32 FSTVolume::ReadFile(FSTFileHandle& fileHandle, uint32 offset, uint32 size return 0; } -uint32 FSTVolume::ReadFile_HashModeRaw(uint32 clusterIndex, FSTEntry& entry, uint32 readOffset, uint32 readSize, void* dataOut) -{ - const uint32 readSizeInput = readSize; - uint8* dataOutU8 = (uint8*)dataOut; - if (readOffset >= entry.fileInfo.fileSize) - return 0; - else if ((readOffset + readSize) >= entry.fileInfo.fileSize) - readSize = (entry.fileInfo.fileSize - readOffset); - - const FSTCluster& cluster = m_cluster[clusterIndex]; - uint64 clusterOffset = (uint64)cluster.offset * m_sectorSize; - uint64 absFileOffset = entry.fileInfo.fileOffset * m_offsetFactor + readOffset; - - // make sure the raw range we read is aligned to AES block size (16) - uint64 readAddrStart = absFileOffset & ~0xF; - uint64 readAddrEnd = (absFileOffset + readSize + 0xF) & ~0xF; - - bool usesInitialIV = readOffset < 16; - if (!usesInitialIV) - readAddrStart -= 16; // read previous AES block since we require it for the IV - uint32 prePadding = (uint32)(absFileOffset - readAddrStart); // number of extra bytes we read before readOffset (for AES alignment and IV calculation) - uint32 postPadding = (uint32)(readAddrEnd - (absFileOffset + readSize)); - - uint8 readBuffer[64 * 1024]; - // read first chunk - // if file read offset (readOffset) is within the first AES-block then use initial IV calculated from cluster index - // otherwise read previous AES-block is the IV (AES-CBC) - uint64 readAddrCurrent = readAddrStart; - uint32 rawBytesToRead = (uint32)std::min((readAddrEnd - readAddrStart), (uint64)sizeof(readBuffer)); - if (m_dataSource->readData(clusterIndex, clusterOffset, readAddrCurrent, readBuffer, rawBytesToRead) != rawBytesToRead) - { - cemuLog_log(LogType::Force, "FST read error in raw content"); - return 0; - } - readAddrCurrent += rawBytesToRead; - - uint8 iv[16]{}; - if (usesInitialIV) - { - // for the first AES block, the IV is initialized from cluster index - iv[0] = (uint8)(clusterIndex >> 8); - iv[1] = (uint8)(clusterIndex >> 0); - AES128_CBC_decrypt_updateIV(readBuffer, readBuffer, rawBytesToRead, m_partitionTitlekey.b, iv); - std::memcpy(dataOutU8, readBuffer + prePadding, rawBytesToRead - prePadding - postPadding); - dataOutU8 += (rawBytesToRead - prePadding - postPadding); - readSize -= (rawBytesToRead - prePadding - postPadding); - } - else - { - // IV is initialized from previous AES block (AES-CBC) - std::memcpy(iv, readBuffer, 16); - AES128_CBC_decrypt_updateIV(readBuffer + 16, readBuffer + 16, rawBytesToRead - 16, m_partitionTitlekey.b, iv); - std::memcpy(dataOutU8, readBuffer + prePadding, rawBytesToRead - prePadding - postPadding); - dataOutU8 += (rawBytesToRead - prePadding - postPadding); - readSize -= (rawBytesToRead - prePadding - postPadding); - } - - // read remaining chunks - while (readSize > 0) - { - uint32 bytesToRead = (uint32)std::min((uint32)sizeof(readBuffer), readSize); - uint32 alignedBytesToRead = (bytesToRead + 15) & ~0xF; - if (m_dataSource->readData(clusterIndex, clusterOffset, readAddrCurrent, readBuffer, alignedBytesToRead) != alignedBytesToRead) - { - cemuLog_log(LogType::Force, "FST read error in raw content"); - return 0; - } - AES128_CBC_decrypt_updateIV(readBuffer, readBuffer, alignedBytesToRead, m_partitionTitlekey.b, iv); - std::memcpy(dataOutU8, readBuffer, bytesToRead); - dataOutU8 += bytesToRead; - readSize -= bytesToRead; - readAddrCurrent += alignedBytesToRead; - } - - return readSizeInput - readSize; -} - constexpr size_t BLOCK_SIZE = 0x10000; constexpr size_t BLOCK_HASH_SIZE = 0x0400; constexpr size_t BLOCK_FILE_SIZE = 0xFC00; +struct FSTRawBlock +{ + std::vector rawData; // unhashed block size depends on sector size field in partition header +}; + struct FSTHashedBlock { uint8 rawData[BLOCK_SIZE]; @@ -870,12 +867,160 @@ struct FSTHashedBlock static_assert(sizeof(FSTHashedBlock) == BLOCK_SIZE); +struct FSTCachedRawBlock +{ + FSTRawBlock blockData; + NCrypto::AesIv ivForNextBlock; + uint64 lastAccess; +}; + struct FSTCachedHashedBlock { FSTHashedBlock blockData; uint64 lastAccess; }; +// Checks cache fill state and if necessary drops least recently accessed block from the cache. Optionally allows to recycle the released cache entry to cut down cost of memory allocation and clearing +void FSTVolume::TrimCacheIfRequired(FSTCachedRawBlock** droppedRawBlock, FSTCachedHashedBlock** droppedHashedBlock) +{ + // calculate size used by cache + size_t cacheSize = 0; + for (auto& itr : m_cacheDecryptedRawBlocks) + cacheSize += itr.second->blockData.rawData.size(); + for (auto& itr : m_cacheDecryptedHashedBlocks) + cacheSize += sizeof(FSTCachedHashedBlock) + sizeof(FSTHashedBlock); + // only trim if cache is full (larger than 2MB) + if (cacheSize < 2*1024*1024) // 2MB + return; + // scan both cache lists to find least recently accessed block to drop + auto dropRawItr = std::min_element(m_cacheDecryptedRawBlocks.begin(), m_cacheDecryptedRawBlocks.end(), [](const auto& a, const auto& b) -> bool + { return a.second->lastAccess < b.second->lastAccess; }); + auto dropHashedItr = std::min_element(m_cacheDecryptedHashedBlocks.begin(), m_cacheDecryptedHashedBlocks.end(), [](const auto& a, const auto& b) -> bool + { return a.second->lastAccess < b.second->lastAccess; }); + uint64 lastAccess = std::numeric_limits::max(); + if(dropRawItr != m_cacheDecryptedRawBlocks.end()) + lastAccess = dropRawItr->second->lastAccess; + if(dropHashedItr != m_cacheDecryptedHashedBlocks.end()) + lastAccess = std::min(lastAccess, dropHashedItr->second->lastAccess); + if(dropRawItr != m_cacheDecryptedRawBlocks.end() && dropRawItr->second->lastAccess == lastAccess) + { + if (droppedRawBlock) + *droppedRawBlock = dropRawItr->second; + else + delete dropRawItr->second; + m_cacheDecryptedRawBlocks.erase(dropRawItr); + return; + } + else if(dropHashedItr != m_cacheDecryptedHashedBlocks.end() && dropHashedItr->second->lastAccess == lastAccess) + { + if (droppedHashedBlock) + *droppedHashedBlock = dropHashedItr->second; + else + delete dropHashedItr->second; + m_cacheDecryptedHashedBlocks.erase(dropHashedItr); + } +} + +void FSTVolume::DetermineUnhashedBlockIV(uint32 clusterIndex, uint32 blockIndex, NCrypto::AesIv& ivOut) +{ + ivOut = {}; + if(blockIndex == 0) + { + ivOut.iv[0] = (uint8)(clusterIndex >> 8); + ivOut.iv[1] = (uint8)(clusterIndex >> 0); + } + else + { + // the last 16 encrypted bytes of the previous block are the IV (AES CBC) + // if the previous block is cached we can grab the IV from there. Otherwise we have to read the 16 bytes from the data source + uint32 prevBlockIndex = blockIndex - 1; + uint64 cacheBlockId = ((uint64)clusterIndex << (64 - 16)) | (uint64)prevBlockIndex; + auto itr = m_cacheDecryptedRawBlocks.find(cacheBlockId); + if (itr != m_cacheDecryptedRawBlocks.end()) + { + ivOut = itr->second->ivForNextBlock; + } + else + { + cemu_assert(m_sectorSize >= NCrypto::AesIv::SIZE); + uint64 clusterOffset = (uint64)m_cluster[clusterIndex].offset * m_sectorSize; + NCrypto::AesIv prevIV{}; + if (m_dataSource->readData(clusterIndex, clusterOffset, blockIndex * m_sectorSize - NCrypto::AesIv::SIZE, prevIV.iv, NCrypto::AesIv::SIZE) != NCrypto::AesIv::SIZE) + { + cemuLog_log(LogType::Force, "Failed to read IV for raw FST block"); + m_detectedCorruption = true; + return; + } + ivOut = prevIV; + } + } +} + +FSTCachedRawBlock* FSTVolume::GetDecryptedRawBlock(uint32 clusterIndex, uint32 blockIndex) +{ + FSTCluster& cluster = m_cluster[clusterIndex]; + uint64 clusterOffset = (uint64)cluster.offset * m_sectorSize; + // generate id for cache + uint64 cacheBlockId = ((uint64)clusterIndex << (64 - 16)) | (uint64)blockIndex; + // lookup block in cache + FSTCachedRawBlock* block = nullptr; + auto itr = m_cacheDecryptedRawBlocks.find(cacheBlockId); + if (itr != m_cacheDecryptedRawBlocks.end()) + { + block = itr->second; + block->lastAccess = ++m_cacheAccessCounter; + return block; + } + // if cache already full, drop least recently accessed block and recycle FSTCachedRawBlock object if possible + TrimCacheIfRequired(&block, nullptr); + if (!block) + block = new FSTCachedRawBlock(); + block->blockData.rawData.resize(m_sectorSize); + // block not cached, read new + block->lastAccess = ++m_cacheAccessCounter; + if (m_dataSource->readData(clusterIndex, clusterOffset, blockIndex * m_sectorSize, block->blockData.rawData.data(), m_sectorSize) != m_sectorSize) + { + cemuLog_log(LogType::Force, "Failed to read raw FST block"); + delete block; + m_detectedCorruption = true; + return nullptr; + } + // decrypt hash data + NCrypto::AesIv iv{}; + DetermineUnhashedBlockIV(clusterIndex, blockIndex, iv); + std::copy(block->blockData.rawData.data() + m_sectorSize - NCrypto::AesIv::SIZE, block->blockData.rawData.data() + m_sectorSize, block->ivForNextBlock.iv); + AES128_CBC_decrypt(block->blockData.rawData.data(), block->blockData.rawData.data(), m_sectorSize, m_partitionTitlekey.b, iv.iv); + // if this is the next block, then hash it + if(cluster.hasContentHash) + { + if(cluster.singleHashNumBlocksHashed == blockIndex) + { + cemu_assert_debug(!(cluster.contentSize % m_sectorSize)); // size should be multiple of sector size? Regardless, the hashing code below can handle non-aligned sizes + bool isLastBlock = blockIndex == (std::max(cluster.contentSize / m_sectorSize, 1) - 1); + uint32 hashSize = m_sectorSize; + if(isLastBlock) + hashSize = cluster.contentSize - (uint64)blockIndex*m_sectorSize; + EVP_DigestUpdate(cluster.singleHashCtx.get(), block->blockData.rawData.data(), hashSize); + cluster.singleHashNumBlocksHashed++; + if(isLastBlock) + { + uint8 hash[32]; + EVP_DigestFinal_ex(cluster.singleHashCtx.get(), hash, nullptr); + if(memcmp(hash, cluster.contentHash32, cluster.contentHashIsSHA1 ? 20 : 32) != 0) + { + cemuLog_log(LogType::Force, "FST: Raw section hash mismatch"); + delete block; + m_detectedCorruption = true; + return nullptr; + } + } + } + } + // register in cache + m_cacheDecryptedRawBlocks.emplace(cacheBlockId, block); + return block; +} + FSTCachedHashedBlock* FSTVolume::GetDecryptedHashedBlock(uint32 clusterIndex, uint32 blockIndex) { const FSTCluster& cluster = m_cluster[clusterIndex]; @@ -891,22 +1036,17 @@ FSTCachedHashedBlock* FSTVolume::GetDecryptedHashedBlock(uint32 clusterIndex, ui block->lastAccess = ++m_cacheAccessCounter; return block; } - // if cache already full, drop least recently accessed block (but recycle the FSTHashedBlock* object) - if (m_cacheDecryptedHashedBlocks.size() >= 16) - { - auto dropItr = std::min_element(m_cacheDecryptedHashedBlocks.begin(), m_cacheDecryptedHashedBlocks.end(), [](const auto& a, const auto& b) -> bool - { return a.second->lastAccess < b.second->lastAccess; }); - block = dropItr->second; - m_cacheDecryptedHashedBlocks.erase(dropItr); - } - else + // if cache already full, drop least recently accessed block and recycle FSTCachedHashedBlock object if possible + TrimCacheIfRequired(nullptr, &block); + if (!block) block = new FSTCachedHashedBlock(); // block not cached, read new block->lastAccess = ++m_cacheAccessCounter; if (m_dataSource->readData(clusterIndex, clusterOffset, blockIndex * BLOCK_SIZE, block->blockData.rawData, BLOCK_SIZE) != BLOCK_SIZE) { - cemuLog_log(LogType::Force, "Failed to read FST block"); + cemuLog_log(LogType::Force, "Failed to read hashed FST block"); delete block; + m_detectedCorruption = true; return nullptr; } // decrypt hash data @@ -914,11 +1054,46 @@ FSTCachedHashedBlock* FSTVolume::GetDecryptedHashedBlock(uint32 clusterIndex, ui AES128_CBC_decrypt(block->blockData.getHashData(), block->blockData.getHashData(), BLOCK_HASH_SIZE, m_partitionTitlekey.b, iv); // decrypt file data AES128_CBC_decrypt(block->blockData.getFileData(), block->blockData.getFileData(), BLOCK_FILE_SIZE, m_partitionTitlekey.b, block->blockData.getH0Hash(blockIndex%16)); + // compare with H0 to verify data integrity + NCrypto::CHash160 h0; + SHA1(block->blockData.getFileData(), BLOCK_FILE_SIZE, h0.b); + uint32 h0Index = (blockIndex % 4096); + if (memcmp(h0.b, block->blockData.getH0Hash(h0Index & 0xF), sizeof(h0.b)) != 0) + { + cemuLog_log(LogType::Force, "FST: Hash H0 mismatch in hashed block (section {} index {})", clusterIndex, blockIndex); + delete block; + m_detectedCorruption = true; + return nullptr; + } // register in cache m_cacheDecryptedHashedBlocks.emplace(cacheBlockId, block); return block; } +uint32 FSTVolume::ReadFile_HashModeRaw(uint32 clusterIndex, FSTEntry& entry, uint32 readOffset, uint32 readSize, void* dataOut) +{ + uint8* dataOutU8 = (uint8*)dataOut; + if (readOffset >= entry.fileInfo.fileSize) + return 0; + else if ((readOffset + readSize) >= entry.fileInfo.fileSize) + readSize = (entry.fileInfo.fileSize - readOffset); + uint64 absFileOffset = entry.fileInfo.fileOffset * m_offsetFactor + readOffset; + uint32 remainingReadSize = readSize; + while (remainingReadSize > 0) + { + const FSTCachedRawBlock* rawBlock = this->GetDecryptedRawBlock(clusterIndex, absFileOffset/m_sectorSize); + if (!rawBlock) + break; + uint32 blockOffset = (uint32)(absFileOffset % m_sectorSize); + uint32 bytesToRead = std::min(remainingReadSize, m_sectorSize - blockOffset); + std::memcpy(dataOutU8, rawBlock->blockData.rawData.data() + blockOffset, bytesToRead); + dataOutU8 += bytesToRead; + remainingReadSize -= bytesToRead; + absFileOffset += bytesToRead; + } + return readSize - remainingReadSize; +} + uint32 FSTVolume::ReadFile_HashModeHashed(uint32 clusterIndex, FSTEntry& entry, uint32 readOffset, uint32 readSize, void* dataOut) { /* @@ -949,7 +1124,6 @@ uint32 FSTVolume::ReadFile_HashModeHashed(uint32 clusterIndex, FSTEntry& entry, */ const FSTCluster& cluster = m_cluster[clusterIndex]; - uint64 clusterBaseOffset = (uint64)cluster.offset * m_sectorSize; uint64 fileReadOffset = entry.fileInfo.fileOffset * m_offsetFactor + readOffset; uint32 blockIndex = (uint32)(fileReadOffset / BLOCK_FILE_SIZE); uint32 bytesRemaining = readSize; @@ -977,6 +1151,7 @@ bool FSTVolume::OpenDirectoryIterator(std::string_view path, FSTDirectoryIterato if (!IsDirectory(fileHandle)) return false; auto const& fstEntry = m_entries[fileHandle.m_fstIndex]; + directoryIteratorOut.dirHandle = fileHandle; directoryIteratorOut.startIndex = fileHandle.m_fstIndex + 1; directoryIteratorOut.endIndex = fstEntry.dirInfo.endIndex; directoryIteratorOut.currentIndex = directoryIteratorOut.startIndex; @@ -1001,6 +1176,8 @@ bool FSTVolume::Next(FSTDirectoryIterator& directoryIterator, FSTFileHandle& fil FSTVolume::~FSTVolume() { + for (auto& itr : m_cacheDecryptedRawBlocks) + delete itr.second; for (auto& itr : m_cacheDecryptedHashedBlocks) delete itr.second; if (m_sourceIsOwned) @@ -1097,4 +1274,4 @@ bool FSTVerifier::VerifyHashedContentFile(FileStream* fileContent, const NCrypto void FSTVolumeTest() { FSTPathUnitTest(); -} \ No newline at end of file +} diff --git a/src/Cafe/Filesystem/FST/FST.h b/src/Cafe/Filesystem/FST/FST.h index 3f59152f..26201c32 100644 --- a/src/Cafe/Filesystem/FST/FST.h +++ b/src/Cafe/Filesystem/FST/FST.h @@ -1,5 +1,6 @@ #pragma once #include "Cemu/ncrypto/ncrypto.h" +#include "openssl/evp.h" struct FSTFileHandle { @@ -11,7 +12,13 @@ private: struct FSTDirectoryIterator { friend class FSTVolume; + + const FSTFileHandle& GetDirHandle() const + { + return dirHandle; + } private: + FSTFileHandle dirHandle; uint32 startIndex; uint32 endIndex; uint32 currentIndex; @@ -20,28 +27,39 @@ private: class FSTVolume { public: + enum class ErrorCode + { + OK = 0, + UNKNOWN_ERROR = 1, + DISC_KEY_MISSING = 2, + TITLE_TIK_MISSING = 3, + BAD_TITLE_TMD = 4, + BAD_TITLE_TIK = 5, + }; + static bool FindDiscKey(const fs::path& path, NCrypto::AesKey& discTitleKey); - static FSTVolume* OpenFromDiscImage(const fs::path& path, NCrypto::AesKey& discTitleKey); - static FSTVolume* OpenFromDiscImage(const fs::path& path, bool* keyFound = nullptr); - static FSTVolume* OpenFromContentFolder(fs::path folderPath); + static FSTVolume* OpenFromDiscImage(const fs::path& path, NCrypto::AesKey& discTitleKey, ErrorCode* errorCodeOut = nullptr); + static FSTVolume* OpenFromDiscImage(const fs::path& path, ErrorCode* errorCodeOut = nullptr); + static FSTVolume* OpenFromContentFolder(fs::path folderPath, ErrorCode* errorCodeOut = nullptr); ~FSTVolume(); uint32 GetFileCount() const; + bool HasCorruption() const { return m_detectedCorruption; } bool OpenFile(std::string_view path, FSTFileHandle& fileHandleOut, bool openOnlyFiles = false); // file and directory functions - bool IsDirectory(FSTFileHandle& fileHandle) const; - bool IsFile(FSTFileHandle& fileHandle) const; - bool HasLinkFlag(FSTFileHandle& fileHandle) const; + bool IsDirectory(const FSTFileHandle& fileHandle) const; + bool IsFile(const FSTFileHandle& fileHandle) const; + bool HasLinkFlag(const FSTFileHandle& fileHandle) const; - std::string_view GetName(FSTFileHandle& fileHandle) const; - std::string GetPath(FSTFileHandle& fileHandle) const; + std::string_view GetName(const FSTFileHandle& fileHandle) const; + std::string GetPath(const FSTFileHandle& fileHandle) const; // file functions - uint32 GetFileSize(FSTFileHandle& fileHandle) const; + uint32 GetFileSize(const FSTFileHandle& fileHandle) const; uint32 ReadFile(FSTFileHandle& fileHandle, uint32 offset, uint32 size, void* dataOut); // directory iterator @@ -65,20 +83,29 @@ public: } private: - /* FST data (in memory) */ enum class ClusterHashMode : uint8 { RAW = 0, // raw data + encryption, no hashing? - RAW2 = 1, // raw data + encryption, with hash stored in tmd? + RAW_STREAM = 1, // raw data + encryption, with hash stored in tmd? HASH_INTERLEAVED = 2, // hashes + raw interleaved in 0x10000 blocks (0x400 bytes of hashes at the beginning, followed by 0xFC00 bytes of data) }; struct FSTCluster { + FSTCluster() : singleHashCtx(nullptr, &EVP_MD_CTX_free) {} + uint32 offset; uint32 size; ClusterHashMode hashMode; + // extra data if TMD is available + bool hasContentHash; + uint8 contentHash32[32]; + bool contentHashIsSHA1; // if true then it's SHA1 (with extra bytes zeroed out), otherwise it's SHA256 + uint64 contentSize; // size of the content (in blocks) + // hash context for single hash mode (content hash must be available) + std::unique_ptr singleHashCtx; // unique_ptr to make this move-only + uint32 singleHashNumBlocksHashed{0}; }; struct FSTEntry @@ -148,17 +175,30 @@ private: bool m_sourceIsOwned{}; uint32 m_sectorSize{}; // for cluster offsets uint32 m_offsetFactor{}; // for file offsets + bool m_hashIsDisabled{}; // disables hash verification (for all clusters of this volume?) std::vector m_cluster; std::vector m_entries; std::vector m_nameStringTable; NCrypto::AesKey m_partitionTitlekey; + bool m_detectedCorruption{false}; - /* Cache for decrypted hashed blocks */ + bool HashIsDisabled() const + { + return m_hashIsDisabled; + } + + /* Cache for decrypted raw and hashed blocks */ + std::unordered_map m_cacheDecryptedRawBlocks; std::unordered_map m_cacheDecryptedHashedBlocks; uint64 m_cacheAccessCounter{}; + void DetermineUnhashedBlockIV(uint32 clusterIndex, uint32 blockIndex, NCrypto::AesIv& ivOut); + + struct FSTCachedRawBlock* GetDecryptedRawBlock(uint32 clusterIndex, uint32 blockIndex); struct FSTCachedHashedBlock* GetDecryptedHashedBlock(uint32 clusterIndex, uint32 blockIndex); + void TrimCacheIfRequired(struct FSTCachedRawBlock** droppedRawBlock, struct FSTCachedHashedBlock** droppedHashedBlock); + /* File reading */ uint32 ReadFile_HashModeRaw(uint32 clusterIndex, FSTEntry& entry, uint32 readOffset, uint32 readSize, void* dataOut); uint32 ReadFile_HashModeHashed(uint32 clusterIndex, FSTEntry& entry, uint32 readOffset, uint32 readSize, void* dataOut); @@ -169,7 +209,10 @@ private: /* +0x00 */ uint32be magic; /* +0x04 */ uint32be offsetFactor; /* +0x08 */ uint32be numCluster; - /* +0x0C */ uint32be ukn0C; + /* +0x0C */ uint8be hashIsDisabled; + /* +0x0D */ uint8be ukn0D; + /* +0x0E */ uint8be ukn0E; + /* +0x0F */ uint8be ukn0F; /* +0x10 */ uint32be ukn10; /* +0x14 */ uint32be ukn14; /* +0x18 */ uint32be ukn18; @@ -246,8 +289,8 @@ private: static_assert(sizeof(FSTHeader_FileEntry) == 0x10); - static FSTVolume* OpenFST(FSTDataSource* dataSource, uint64 fstOffset, uint32 fstSize, NCrypto::AesKey* partitionTitleKey, ClusterHashMode fstHashMode); - static FSTVolume* OpenFST(std::unique_ptr dataSource, uint64 fstOffset, uint32 fstSize, NCrypto::AesKey* partitionTitleKey, ClusterHashMode fstHashMode); + static FSTVolume* OpenFST(FSTDataSource* dataSource, uint64 fstOffset, uint32 fstSize, NCrypto::AesKey* partitionTitleKey, ClusterHashMode fstHashMode, NCrypto::TMDParser* optionalTMD); + static FSTVolume* OpenFST(std::unique_ptr dataSource, uint64 fstOffset, uint32 fstSize, NCrypto::AesKey* partitionTitleKey, ClusterHashMode fstHashMode, NCrypto::TMDParser* optionalTMD); static bool ProcessFST(FSTHeader_FileEntry* fileTable, uint32 numFileEntries, uint32 numCluster, std::vector& nameStringTable, std::vector& fstEntries); bool MatchFSTEntryName(FSTEntry& entry, std::string_view comparedName) diff --git a/src/Cafe/Filesystem/FST/KeyCache.cpp b/src/Cafe/Filesystem/FST/KeyCache.cpp index 587a5dd6..29903e84 100644 --- a/src/Cafe/Filesystem/FST/KeyCache.cpp +++ b/src/Cafe/Filesystem/FST/KeyCache.cpp @@ -1,5 +1,6 @@ #include #include +#include #include "config/ActiveSettings.h" #include "util/crypto/aes128.h" @@ -59,7 +60,7 @@ void KeyCache_Prepare() sKeyCachePrepared = true; g_keyCache.clear(); // load keys - auto keysPath = ActiveSettings::GetPath("keys.txt"); + auto keysPath = ActiveSettings::GetUserDataPath("keys.txt"); FileStream* fs_keys = FileStream::openFile2(keysPath); if( !fs_keys ) { @@ -74,7 +75,7 @@ void KeyCache_Prepare() } else { - wxMessageBox("Unable to create file keys.txt\nThis can happen if Cemu does not have write permission to it's own directory, the disk is full or if anti-virus software is blocking Cemu.", "Error", wxOK | wxCENTRE | wxICON_ERROR); + wxMessageBox(_("Unable to create file keys.txt\nThis can happen if Cemu does not have write permission to its own directory, the disk is full or if anti-virus software is blocking Cemu."), _("Error"), wxOK | wxCENTRE | wxICON_ERROR); } mtxKeyCache.unlock(); return; @@ -107,10 +108,8 @@ void KeyCache_Prepare() continue; if( strishex(line) == false ) { - // show error message - char errorMsg[512]; - sprintf(errorMsg, "Error in keys.txt in line %d\n", lineNumber); - wxMessageBox(errorMsg, "Error", wxOK | wxCENTRE | wxICON_ERROR); + auto errorMsg = formatWxString(_("Error in keys.txt at line {}"), lineNumber); + wxMessageBox(errorMsg, _("Error"), wxOK | wxCENTRE | wxICON_ERROR); continue; } if(line.size() == 32 ) diff --git a/src/Cafe/Filesystem/FST/fstUtil.h b/src/Cafe/Filesystem/FST/fstUtil.h index 4ea9465d..a432cc95 100644 --- a/src/Cafe/Filesystem/FST/fstUtil.h +++ b/src/Cafe/Filesystem/FST/fstUtil.h @@ -3,6 +3,8 @@ #include +#include "../fsc.h" + // path parser and utility class for Wii U paths // optimized to be allocation-free for common path lengths class FSCPath @@ -28,6 +30,8 @@ class FSCPath { if (m_names.size() > 0xFFFF) return; + if (nameLen == 1 && *name == '.') + return; m_nodes.emplace_back((uint16)m_names.size(), nameLen); m_names.insert(m_names.end(), name, name + nameLen); } @@ -117,9 +121,7 @@ public: template class FSAFileTree { -public: - -private: + private: enum NODETYPE : uint8 { @@ -131,6 +133,7 @@ private: { std::string name; std::vector subnodes; + size_t fileSize; F* custom; NODETYPE type; }; @@ -177,13 +180,54 @@ private: return newNode; } + class DirectoryIterator : public FSCVirtualFile + { + public: + DirectoryIterator(node_t* node) + : m_node(node), m_subnodeIndex(0) + { + } + + sint32 fscGetType() override + { + return FSC_TYPE_DIRECTORY; + } + + bool fscDirNext(FSCDirEntry* dirEntry) override + { + if (m_subnodeIndex >= m_node->subnodes.size()) + return false; + + const node_t* subnode = m_node->subnodes[m_subnodeIndex]; + + strncpy(dirEntry->path, subnode->name.c_str(), sizeof(dirEntry->path) - 1); + dirEntry->path[sizeof(dirEntry->path) - 1] = '\0'; + dirEntry->isDirectory = subnode->type == FSAFileTree::NODETYPE_DIRECTORY; + dirEntry->isFile = subnode->type == FSAFileTree::NODETYPE_FILE; + dirEntry->fileSize = subnode->type == FSAFileTree::NODETYPE_FILE ? subnode->fileSize : 0; + + ++m_subnodeIndex; + return true; + } + + bool fscRewindDir() override + { + m_subnodeIndex = 0; + return true; + } + + private: + node_t* m_node; + size_t m_subnodeIndex; + }; + public: FSAFileTree() { rootNode.type = NODETYPE_DIRECTORY; } - bool addFile(std::string_view path, F* custom) + bool addFile(std::string_view path, size_t fileSize, F* custom) { FSCPath p(path); if (p.GetNodeCount() == 0) @@ -194,6 +238,7 @@ public: return false; // node already exists // add file node node_t* fileNode = newNode(directoryNode, NODETYPE_FILE, p.GetNodeName(p.GetNodeCount() - 1)); + fileNode->fileSize = fileSize; fileNode->custom = custom; return true; } @@ -212,6 +257,20 @@ public: return true; } + bool getDirectory(std::string_view path, FSCVirtualFile*& dirIterator) + { + FSCPath p(path); + if (p.GetNodeCount() == 0) + return false; + node_t* node = getByNodePath(p, p.GetNodeCount(), false); + if (node == nullptr) + return false; + if (node->type != NODETYPE_DIRECTORY) + return false; + dirIterator = new DirectoryIterator(node); + return true; + } + bool removeFile(std::string_view path) { FSCPath p(path); @@ -297,6 +356,12 @@ static void FSTPathUnitTest() cemu_assert_debug(p6.GetNodeCount() == 0); p6 = FSCPath("/////////////"); cemu_assert_debug(p6.GetNodeCount() == 0); + // test 7 - periods in path + FSCPath p7("/vol/content/./.."); + cemu_assert_debug(p7.GetNodeCount() == 3); + cemu_assert_debug(p7.MatchNodeName(0, "vol")); + cemu_assert_debug(p7.MatchNodeName(1, "content")); + cemu_assert_debug(p7.MatchNodeName(2, "..")); } diff --git a/src/Cafe/Filesystem/WUHB/RomFSStructs.h b/src/Cafe/Filesystem/WUHB/RomFSStructs.h new file mode 100644 index 00000000..59ef503f --- /dev/null +++ b/src/Cafe/Filesystem/WUHB/RomFSStructs.h @@ -0,0 +1,40 @@ +#pragma once + +struct romfs_header_t +{ + uint32 header_magic; + uint32be header_size; + uint64be dir_hash_table_ofs; + uint64be dir_hash_table_size; + uint64be dir_table_ofs; + uint64be dir_table_size; + uint64be file_hash_table_ofs; + uint64be file_hash_table_size; + uint64be file_table_ofs; + uint64be file_table_size; + uint64be file_partition_ofs; +}; + +struct romfs_direntry_t +{ + uint32be parent; + uint32be listNext; // offset to next directory entry in linked list of parent directory (aka "sibling") + uint32be dirListHead; // offset to first entry in linked list of directory entries (aka "child") + uint32be fileListHead; // offset to first entry in linked list of file entries (aka "file") + uint32be hash; + uint32be name_size; + std::string name; +}; + +struct romfs_fentry_t +{ + uint32be parent; + uint32be listNext; // offset to next file entry in linked list of parent directory (aka "sibling") + uint64be offset; + uint64be size; + uint32be hash; + uint32be name_size; + std::string name; +}; + +#define ROMFS_ENTRY_EMPTY 0xFFFFFFFF diff --git a/src/Cafe/Filesystem/WUHB/WUHBReader.cpp b/src/Cafe/Filesystem/WUHB/WUHBReader.cpp new file mode 100644 index 00000000..e7a4c9be --- /dev/null +++ b/src/Cafe/Filesystem/WUHB/WUHBReader.cpp @@ -0,0 +1,224 @@ +#include "WUHBReader.h" +WUHBReader* WUHBReader::FromPath(const fs::path& path) +{ + FileStream* fileIn{FileStream::openFile2(path)}; + if (!fileIn) + return nullptr; + + WUHBReader* ret = new WUHBReader(fileIn); + if (!ret->CheckMagicValue()) + { + delete ret; + return nullptr; + } + + if (!ret->ReadHeader()) + { + delete ret; + return nullptr; + } + + return ret; +} + +static const romfs_direntry_t fallbackDirEntry{ + .parent = ROMFS_ENTRY_EMPTY, + .listNext = ROMFS_ENTRY_EMPTY, + .dirListHead = ROMFS_ENTRY_EMPTY, + .fileListHead = ROMFS_ENTRY_EMPTY, + .hash = ROMFS_ENTRY_EMPTY, + .name_size = 0, + .name = "" +}; +static const romfs_fentry_t fallbackFileEntry{ + .parent = ROMFS_ENTRY_EMPTY, + .listNext = ROMFS_ENTRY_EMPTY, + .offset = 0, + .size = 0, + .hash = ROMFS_ENTRY_EMPTY, + .name_size = 0, + .name = "" +}; +template +const WUHBReader::EntryType& WUHBReader::GetFallback() +{ + if constexpr (File) + return fallbackFileEntry; + else + return fallbackDirEntry; +} + +template +WUHBReader::EntryType WUHBReader::GetEntry(uint32 offset) const +{ + auto fallback = GetFallback(); + if(offset == ROMFS_ENTRY_EMPTY) + return fallback; + + const char* typeName = File ? "fentry" : "direntry"; + EntryType ret; + if (offset >= (File ? m_header.file_table_size : m_header.dir_table_size)) + { + cemuLog_log(LogType::Force, "WUHB {} offset exceeds table size declared in header", typeName); + return fallback; + } + + // read the entry + m_fileIn->SetPosition((File ? m_header.file_table_ofs : m_header.dir_table_ofs) + offset); + auto read = m_fileIn->readData(&ret, offsetof(EntryType, name)); + if (read != offsetof(EntryType, name)) + { + cemuLog_log(LogType::Force, "failed to read WUHB {} at offset: {}", typeName, offset); + return fallback; + } + + // read the name + ret.name.resize(ret.name_size); + read = m_fileIn->readData(ret.name.data(), ret.name_size); + if (read != ret.name_size) + { + cemuLog_log(LogType::Force, "failed to read WUHB {} name", typeName); + return fallback; + } + + return ret; +} + +romfs_direntry_t WUHBReader::GetDirEntry(uint32 offset) const +{ + return GetEntry(offset); +} +romfs_fentry_t WUHBReader::GetFileEntry(uint32 offset) const +{ + return GetEntry(offset); +} + +uint64 WUHBReader::GetFileSize(uint32 entryOffset) const +{ + return GetFileEntry(entryOffset).size; +} + +uint64 WUHBReader::ReadFromFile(uint32 entryOffset, uint64 fileOffset, uint64 length, void* buffer) const +{ + const auto fileEntry = GetFileEntry(entryOffset); + if (fileOffset >= fileEntry.size) + return 0; + const uint64 readAmount = std::min(length, fileEntry.size - fileOffset); + const uint64 wuhbOffset = m_header.file_partition_ofs + fileEntry.offset + fileOffset; + m_fileIn->SetPosition(wuhbOffset); + return m_fileIn->readData(buffer, readAmount); +} + +uint32 WUHBReader::GetHashTableEntryOffset(uint32 hash, bool isFile) const +{ + const uint64 hash_table_size = (isFile ? m_header.file_hash_table_size : m_header.dir_hash_table_size); + const uint64 hash_table_ofs = (isFile ? m_header.file_hash_table_ofs : m_header.dir_hash_table_ofs); + + const uint64 hash_table_entry_count = hash_table_size / sizeof(uint32); + const uint64 hash_table_entry_offset = hash_table_ofs + (hash % hash_table_entry_count) * sizeof(uint32); + + m_fileIn->SetPosition(hash_table_entry_offset); + uint32 tableOffset; + if (!m_fileIn->readU32(tableOffset)) + { + cemuLog_log(LogType::Force, "failed to read WUHB hash table entry at file offset: {}", hash_table_entry_offset); + return ROMFS_ENTRY_EMPTY; + } + + return uint32be::from_bevalue(tableOffset); +} + +template +bool WUHBReader::SearchHashList(uint32& entryOffset, const fs::path& targetName) const +{ + for (;;) + { + if (entryOffset == ROMFS_ENTRY_EMPTY) + return false; + auto entry = GetEntry(entryOffset); + + if (entry.name == targetName) + return true; + entryOffset = entry.hash; + } + return false; +} + +uint32 WUHBReader::Lookup(const std::filesystem::path& path, bool isFile) const +{ + uint32 currentEntryOffset = 0; + auto look = [&](const fs::path& part, bool lookInFileHT) { + const auto partString = part.string(); + currentEntryOffset = GetHashTableEntryOffset(CalcPathHash(currentEntryOffset, partString.c_str(), 0, partString.size()), lookInFileHT); + if (lookInFileHT) + return SearchHashList(currentEntryOffset, part); + else + return SearchHashList(currentEntryOffset, part); + }; + // look for the root entry + if (!look("", false)) + return ROMFS_ENTRY_EMPTY; + + auto it = path.begin(); + while (it != path.end()) + { + fs::path part = *it; + ++it; + // no need to recurse after trailing forward slash (e.g. directory/) + if (part.empty() && !isFile) + break; + // skip leading forward slash + if (part == "/") + continue; + + // if the lookup target is a file and this is the last iteration, look in the file hash table instead. + if (!look(part, it == path.end() && isFile)) + return ROMFS_ENTRY_EMPTY; + } + return currentEntryOffset; +} +bool WUHBReader::CheckMagicValue() const +{ + uint8 magic[4]; + m_fileIn->SetPosition(0); + int read = m_fileIn->readData(magic, 4); + if (read != 4) + { + cemuLog_log(LogType::Force, "Failed to read WUHB magic numbers"); + return false; + } + static_assert(sizeof(magic) == s_headerMagicValue.size()); + return std::memcmp(&magic, s_headerMagicValue.data(), sizeof(magic)) == 0; +} +bool WUHBReader::ReadHeader() +{ + m_fileIn->SetPosition(0); + auto read = m_fileIn->readData(&m_header, sizeof(m_header)); + auto readSuccess = read == sizeof(m_header); + if (!readSuccess) + cemuLog_log(LogType::Force, "Failed to read WUHB header"); + return readSuccess; +} +unsigned char WUHBReader::NormalizeChar(unsigned char c) +{ + if (c >= 'a' && c <= 'z') + { + return c + 'A' - 'a'; + } + else + { + return c; + } +} +uint32 WUHBReader::CalcPathHash(uint32 parent, const char* path, uint32 start, size_t path_len) +{ + cemu_assert(path != nullptr || path_len == 0); + uint32 hash = parent ^ 123456789; + for (uint32 i = 0; i < path_len; i++) + { + hash = (hash >> 5) | (hash << 27); + hash ^= NormalizeChar(path[start + i]); + } + + return hash; +} diff --git a/src/Cafe/Filesystem/WUHB/WUHBReader.h b/src/Cafe/Filesystem/WUHB/WUHBReader.h new file mode 100644 index 00000000..9187f05a --- /dev/null +++ b/src/Cafe/Filesystem/WUHB/WUHBReader.h @@ -0,0 +1,45 @@ +#pragma once +#include +#include "RomFSStructs.h" +class WUHBReader +{ + public: + static WUHBReader* FromPath(const fs::path& path); + + romfs_direntry_t GetDirEntry(uint32 offset) const; + romfs_fentry_t GetFileEntry(uint32 offset) const; + + uint64 GetFileSize(uint32 entryOffset) const; + + uint64 ReadFromFile(uint32 entryOffset, uint64 fileOffset, uint64 length, void* buffer) const; + + uint32 Lookup(const std::filesystem::path& path, bool isFile) const; + + private: + WUHBReader(FileStream* file) + : m_fileIn(file) + { + cemu_assert_debug(file != nullptr); + }; + WUHBReader() = delete; + + romfs_header_t m_header; + std::unique_ptr m_fileIn; + constexpr static std::string_view s_headerMagicValue = "WUHB"; + bool ReadHeader(); + bool CheckMagicValue() const; + + static inline unsigned char NormalizeChar(unsigned char c); + static uint32 CalcPathHash(uint32 parent, const char* path, uint32 start, size_t path_len); + + template + using EntryType = std::conditional_t; + template + static const EntryType& GetFallback(); + template + EntryType GetEntry(uint32 offset) const; + + template + bool SearchHashList(uint32& entryOffset, const fs::path& targetName) const; + uint32 GetHashTableEntryOffset(uint32 hash, bool isFile) const; +}; diff --git a/src/Cafe/Filesystem/fsc.cpp b/src/Cafe/Filesystem/fsc.cpp index d7d9971f..031f2fb2 100644 --- a/src/Cafe/Filesystem/fsc.cpp +++ b/src/Cafe/Filesystem/fsc.cpp @@ -6,7 +6,7 @@ struct FSCMountPathNode std::string path; std::vector subnodes; FSCMountPathNode* parent; - // device target and path (if subnodes is empty) + // associated device target and path fscDeviceC* device{ nullptr }; void* ctx{ nullptr }; std::string deviceTargetPath; // the destination base path for the device, utf8 @@ -17,6 +17,25 @@ struct FSCMountPathNode { } + void AssignDevice(fscDeviceC* device, void* ctx, std::string_view deviceBasePath) + { + this->device = device; + this->ctx = ctx; + this->deviceTargetPath = deviceBasePath; + } + + void UnassignDevice() + { + this->device = nullptr; + this->ctx = nullptr; + this->deviceTargetPath.clear(); + } + + bool IsRootNode() const + { + return !parent; + } + ~FSCMountPathNode() { for (auto& itr : subnodes) @@ -141,9 +160,7 @@ sint32 fsc_mount(std::string_view mountPath, std::string_view targetPath, fscDev fscLeave(); return FSC_STATUS_INVALID_PATH; } - node->device = fscDevice; - node->ctx = ctx; - node->deviceTargetPath = targetPathWithSlash; + node->AssignDevice(fscDevice, ctx, targetPathWithSlash); fscLeave(); return FSC_STATUS_OK; } @@ -160,14 +177,13 @@ bool fsc_unmount(std::string_view mountPath, sint32 priority) } cemu_assert(mountPathNode->priority == priority); cemu_assert(mountPathNode->device); - // delete node - while (mountPathNode && mountPathNode->parent) + // unassign device + mountPathNode->UnassignDevice(); + // prune empty branch + while (mountPathNode && !mountPathNode->IsRootNode() && mountPathNode->subnodes.empty() && !mountPathNode->device) { FSCMountPathNode* parent = mountPathNode->parent; - cemu_assert(!(!mountPathNode->subnodes.empty() && mountPathNode->device)); - if (!mountPathNode->subnodes.empty()) - break; - parent->subnodes.erase(std::find(parent->subnodes.begin(), parent->subnodes.end(), mountPathNode)); + std::erase(parent->subnodes, mountPathNode); delete mountPathNode; mountPathNode = parent; } @@ -302,6 +318,15 @@ public: return true; } + bool fscRewindDir() override + { + if (!dirIterator) + return true; + + dirIterator->index = 0; + return true; + } + void addUniqueDirEntry(const FSCDirEntry& dirEntry) { // skip if already in list @@ -378,6 +403,7 @@ FSCVirtualFile* fsc_open(const char* path, FSC_ACCESS_FLAG accessFlags, sint32* { // return first found file cemu_assert_debug(HAS_FLAG(accessFlags, FSC_ACCESS_FLAG::OPEN_FILE)); + fscVirtualFile->m_isAppend = HAS_FLAG(accessFlags, FSC_ACCESS_FLAG::IS_APPEND); fscLeave(); return fscVirtualFile; } @@ -441,7 +467,7 @@ bool fsc_nextDir(FSCVirtualFile* fscFile, FSCDirEntry* dirEntry) /* * Create directory */ -bool fsc_createDir(char* path, sint32* fscStatus) +bool fsc_createDir(const char* path, sint32* fscStatus) { fscDeviceC* fscDevice = NULL; *fscStatus = FSC_STATUS_UNDEFINED; @@ -461,7 +487,7 @@ bool fsc_createDir(char* path, sint32* fscStatus) /* * Rename file or directory */ -bool fsc_rename(char* srcPath, char* dstPath, sint32* fscStatus) +bool fsc_rename(const char* srcPath, const char* dstPath, sint32* fscStatus) { std::string srcDevicePath; std::string dstDevicePath; @@ -481,7 +507,7 @@ bool fsc_rename(char* srcPath, char* dstPath, sint32* fscStatus) /* * Delete file or subdirectory */ -bool fsc_remove(char* path, sint32* fscStatus) +bool fsc_remove(const char* path, sint32* fscStatus) { std::string devicePath; fscDeviceC* fscDevice = NULL; @@ -541,7 +567,7 @@ void fsc_setFileLength(FSCVirtualFile* fscFile, uint32 newEndOffset) uint32 fileSize = fsc_getFileSize(fscFile); if (!fsc_isWritable(fscFile)) { - cemuLog_force("TruncateFile called on read-only file"); + cemuLog_log(LogType::Force, "TruncateFile called on read-only file"); } else { @@ -598,6 +624,9 @@ uint32 fsc_writeFile(FSCVirtualFile* fscFile, void* buffer, uint32 size) fscLeave(); return 0; } + if (fscFile->m_isAppend) + fsc_setFileSeek(fscFile, fsc_getFileSize(fscFile)); + uint32 fscStatus = fscFile->fscWriteData(buffer, size); fscLeave(); return fscStatus; diff --git a/src/Cafe/Filesystem/fsc.h b/src/Cafe/Filesystem/fsc.h index 9420d69d..8b8ed5ef 100644 --- a/src/Cafe/Filesystem/fsc.h +++ b/src/Cafe/Filesystem/fsc.h @@ -24,7 +24,10 @@ enum class FSC_ACCESS_FLAG : uint8 // which types can be opened // invalid operation if neither is set OPEN_DIR = (1 << 4), - OPEN_FILE = (1 << 5) + OPEN_FILE = (1 << 5), + + // Writing seeks to the end of the file if set + IS_APPEND = (1 << 6) }; DEFINE_ENUM_FLAG_OPERATORS(FSC_ACCESS_FLAG); @@ -149,7 +152,15 @@ struct FSCVirtualFile return false; } + virtual bool fscRewindDir() + { + cemu_assert_unimplemented(); + return false; + } + FSCDirIteratorState* dirIterator{}; + + bool m_isAppend{ false }; }; #define FSC_PRIORITY_BASE (0) @@ -167,9 +178,9 @@ void fsc_unmountAll(); FSCVirtualFile* fsc_open(const char* path, FSC_ACCESS_FLAG accessFlags, sint32* fscStatus, sint32 maxPriority=FSC_PRIORITY_MAX); FSCVirtualFile* fsc_openDirIterator(const char* path, sint32* fscStatus); -bool fsc_createDir(char* path, sint32* fscStatus); -bool fsc_rename(char* srcPath, char* dstPath, sint32* fscStatus); -bool fsc_remove(char* path, sint32* fscStatus); +bool fsc_createDir(const char* path, sint32* fscStatus); +bool fsc_rename(const char* srcPath, const char* dstPath, sint32* fscStatus); +bool fsc_remove(const char* path, sint32* fscStatus); bool fsc_nextDir(FSCVirtualFile* fscFile, FSCDirEntry* dirEntry); void fsc_close(FSCVirtualFile* fscFile); uint32 fsc_getFileSize(FSCVirtualFile* fscFile); @@ -193,10 +204,12 @@ bool FSCDeviceWUD_Mount(std::string_view mountPath, std::string_view destination // wua device bool FSCDeviceWUA_Mount(std::string_view mountPath, std::string_view destinationBaseDir, class ZArchiveReader* archive, sint32 priority); +// wuhb device +bool FSCDeviceWUHB_Mount(std::string_view mountPath, std::string_view destinationBaseDir, class WUHBReader* wuhbReader, sint32 priority); + // hostFS device -void fscDeviceHostFS_mapBaseDirectories_deprecated(); bool FSCDeviceHostFS_Mount(std::string_view mountPath, std::string_view hostTargetPath, sint32 priority); // redirect device void fscDeviceRedirect_map(); -void fscDeviceRedirect_add(std::string_view virtualSourcePath, const fs::path& targetFilePath, sint32 priority); +void fscDeviceRedirect_add(std::string_view virtualSourcePath, size_t fileSize, const fs::path& targetFilePath, sint32 priority); diff --git a/src/Cafe/Filesystem/fscDeviceHostFS.cpp b/src/Cafe/Filesystem/fscDeviceHostFS.cpp index da28700d..85a04afe 100644 --- a/src/Cafe/Filesystem/fscDeviceHostFS.cpp +++ b/src/Cafe/Filesystem/fscDeviceHostFS.cpp @@ -113,7 +113,7 @@ void FSCVirtualFile_Host::fscSetFileLength(uint64 endOffset) m_fileSize = m_seek; m_fs->SetPosition(m_seek); if (!r) - cemuLog_force("fscSetFileLength: Failed to set size to 0x{:x}", endOffset); + cemuLog_log(LogType::Force, "fscSetFileLength: Failed to set size to 0x{:x}", endOffset); } bool FSCVirtualFile_Host::fscDirNext(FSCDirEntry* dirEntry) @@ -127,7 +127,7 @@ bool FSCVirtualFile_Host::fscDirNext(FSCDirEntry* dirEntry) m_dirIterator.reset(new fs::directory_iterator(*m_path)); if (!m_dirIterator) { - cemuLog_force("Failed to iterate directory: {}", _pathToUtf8(*m_path)); + cemuLog_log(LogType::Force, "Failed to iterate directory: {}", _pathToUtf8(*m_path)); return false; } } @@ -175,14 +175,14 @@ FSCVirtualFile* FSCVirtualFile_Host::OpenFile(const fs::path& path, FSC_ACCESS_F cemu_assert_debug(writeAccessRequested); fs = FileStream::createFile2(path); if (!fs) - cemuLog_force("FSC: File create failed for {}", _pathToUtf8(path)); + cemuLog_log(LogType::Force, "FSC: File create failed for {}", _pathToUtf8(path)); } } else if (HAS_FLAG(accessFlags, FSC_ACCESS_FLAG::FILE_ALWAYS_CREATE)) { fs = FileStream::createFile2(path); if (!fs) - cemuLog_force("FSC: File create failed for {}", _pathToUtf8(path)); + cemuLog_log(LogType::Force, "FSC: File create failed for {}", _pathToUtf8(path)); } else { @@ -235,14 +235,14 @@ public: if (fs::exists(dirPath)) { if (!fs::is_directory(dirPath)) - cemuLog_force("CreateDir: {} already exists but is not a directory", path); + cemuLog_log(LogType::Force, "CreateDir: {} already exists but is not a directory", path); *fscStatus = FSC_STATUS_ALREADY_EXISTS; return false; } std::error_code ec; bool r = fs::create_directories(dirPath, ec); if (!r) - cemuLog_force("CreateDir: Failed to create {}", path); + cemuLog_log(LogType::Force, "CreateDir: Failed to create {}", path); *fscStatus = FSC_STATUS_OK; return true; } @@ -262,7 +262,6 @@ public: cemu_assert_unimplemented(); // return correct error (e.g. if directory is non-empty) *fscStatus = FSC_STATUS_FILE_NOT_FOUND; } - *fscStatus = FSC_STATUS_FILE_NOT_FOUND; return true; } @@ -290,13 +289,6 @@ public: } }; -void fscDeviceHostFS_mapBaseDirectories_deprecated() -{ - const auto mlc = ActiveSettings::GetMlcPath(); - fsc_mount("/cemuBossStorage/", _pathToUtf8(mlc / "usr/boss/"), &fscDeviceHostFSC::instance(), NULL, FSC_PRIORITY_BASE); - fsc_mount("/vol/storage_mlc01/", _pathToUtf8(mlc / ""), &fscDeviceHostFSC::instance(), NULL, FSC_PRIORITY_BASE); -} - bool FSCDeviceHostFS_Mount(std::string_view mountPath, std::string_view hostTargetPath, sint32 priority) { return fsc_mount(mountPath, hostTargetPath, &fscDeviceHostFSC::instance(), nullptr, priority) == FSC_STATUS_OK; diff --git a/src/Cafe/Filesystem/fscDeviceRedirect.cpp b/src/Cafe/Filesystem/fscDeviceRedirect.cpp index d25bff86..9c62d37a 100644 --- a/src/Cafe/Filesystem/fscDeviceRedirect.cpp +++ b/src/Cafe/Filesystem/fscDeviceRedirect.cpp @@ -11,7 +11,7 @@ struct RedirectEntry FSAFileTree redirectTree; -void fscDeviceRedirect_add(std::string_view virtualSourcePath, const fs::path& targetFilePath, sint32 priority) +void fscDeviceRedirect_add(std::string_view virtualSourcePath, size_t fileSize, const fs::path& targetFilePath, sint32 priority) { // check if source already has a redirection RedirectEntry* existingEntry; @@ -24,7 +24,7 @@ void fscDeviceRedirect_add(std::string_view virtualSourcePath, const fs::path& t delete existingEntry; } RedirectEntry* entry = new RedirectEntry(targetFilePath, priority); - redirectTree.addFile(virtualSourcePath, entry); + redirectTree.addFile(virtualSourcePath, fileSize, entry); } class fscDeviceTypeRedirect : public fscDeviceC @@ -32,8 +32,15 @@ class fscDeviceTypeRedirect : public fscDeviceC FSCVirtualFile* fscDeviceOpenByPath(std::string_view path, FSC_ACCESS_FLAG accessFlags, void* ctx, sint32* fscStatus) override { RedirectEntry* redirectionEntry; - if (redirectTree.getFile(path, redirectionEntry)) + + if (HAS_FLAG(accessFlags, FSC_ACCESS_FLAG::OPEN_FILE) && redirectTree.getFile(path, redirectionEntry)) return FSCVirtualFile_Host::OpenFile(redirectionEntry->dstPath, accessFlags, *fscStatus); + + FSCVirtualFile* dirIterator; + + if (HAS_FLAG(accessFlags, FSC_ACCESS_FLAG::OPEN_DIR) && redirectTree.getDirectory(path, dirIterator)) + return dirIterator; + return nullptr; } diff --git a/src/Cafe/Filesystem/fscDeviceWud.cpp b/src/Cafe/Filesystem/fscDeviceWud.cpp index bf43bf3e..517c8573 100644 --- a/src/Cafe/Filesystem/fscDeviceWud.cpp +++ b/src/Cafe/Filesystem/fscDeviceWud.cpp @@ -128,7 +128,7 @@ class fscDeviceWUDC : public fscDeviceC if (HAS_FLAG(accessFlags, FSC_ACCESS_FLAG::OPEN_FILE)) { FSTFileHandle fstFileHandle; - if (mountedVolume->OpenFile(path, fstFileHandle, true)) + if (mountedVolume->OpenFile(path, fstFileHandle, true) && !mountedVolume->HasLinkFlag(fstFileHandle)) { *fscStatus = FSC_STATUS_OK; return new FSCDeviceWudFileCtx(mountedVolume, fstFileHandle); @@ -137,7 +137,7 @@ class fscDeviceWUDC : public fscDeviceC if (HAS_FLAG(accessFlags, FSC_ACCESS_FLAG::OPEN_DIR)) { FSTDirectoryIterator dirIterator; - if (mountedVolume->OpenDirectoryIterator(path, dirIterator)) + if (mountedVolume->OpenDirectoryIterator(path, dirIterator) && !mountedVolume->HasLinkFlag(dirIterator.GetDirHandle())) { *fscStatus = FSC_STATUS_OK; return new FSCDeviceWudFileCtx(mountedVolume, dirIterator); diff --git a/src/Cafe/Filesystem/fscDeviceWuhb.cpp b/src/Cafe/Filesystem/fscDeviceWuhb.cpp new file mode 100644 index 00000000..5e8e6484 --- /dev/null +++ b/src/Cafe/Filesystem/fscDeviceWuhb.cpp @@ -0,0 +1,151 @@ +#include "Filesystem/WUHB/WUHBReader.h" +#include "Cafe/Filesystem/fsc.h" +#include "Cafe/Filesystem/FST/FST.h" + +class FSCDeviceWuhbFileCtx : public FSCVirtualFile +{ + public: + FSCDeviceWuhbFileCtx(WUHBReader* reader, uint32 entryOffset, uint32 fscType) + : m_wuhbReader(reader), m_entryOffset(entryOffset), m_fscType(fscType) + { + cemu_assert(entryOffset != ROMFS_ENTRY_EMPTY); + if (fscType == FSC_TYPE_DIRECTORY) + { + romfs_direntry_t entry = reader->GetDirEntry(entryOffset); + m_dirIterOffset = entry.dirListHead; + m_fileIterOffset = entry.fileListHead; + } + } + sint32 fscGetType() override + { + return m_fscType; + } + uint64 fscQueryValueU64(uint32 id) override + { + if (m_fscType == FSC_TYPE_FILE) + { + if (id == FSC_QUERY_SIZE) + return m_wuhbReader->GetFileSize(m_entryOffset); + else if (id == FSC_QUERY_WRITEABLE) + return 0; // WUHB images are read-only + else + cemu_assert_error(); + } + else + { + cemu_assert_unimplemented(); + } + return 0; + } + uint32 fscWriteData(void* buffer, uint32 size) override + { + cemu_assert_error(); + return 0; + } + uint32 fscReadData(void* buffer, uint32 size) override + { + if (m_fscType != FSC_TYPE_FILE) + return 0; + auto read = m_wuhbReader->ReadFromFile(m_entryOffset, m_seek, size, buffer); + m_seek += read; + return read; + } + void fscSetSeek(uint64 seek) override + { + m_seek = seek; + } + uint64 fscGetSeek() override + { + if (m_fscType != FSC_TYPE_FILE) + return 0; + return m_seek; + } + void fscSetFileLength(uint64 endOffset) override + { + cemu_assert_error(); + } + bool fscDirNext(FSCDirEntry* dirEntry) override + { + if (m_dirIterOffset != ROMFS_ENTRY_EMPTY) + { + romfs_direntry_t entry = m_wuhbReader->GetDirEntry(m_dirIterOffset); + m_dirIterOffset = entry.listNext; + if(entry.name_size > 0) + { + dirEntry->isDirectory = true; + dirEntry->isFile = false; + dirEntry->fileSize = 0; + std::strncpy(dirEntry->path, entry.name.c_str(), FSC_MAX_DIR_NAME_LENGTH); + return true; + } + } + if (m_fileIterOffset != ROMFS_ENTRY_EMPTY) + { + romfs_fentry_t entry = m_wuhbReader->GetFileEntry(m_fileIterOffset); + m_fileIterOffset = entry.listNext; + if(entry.name_size > 0) + { + dirEntry->isDirectory = false; + dirEntry->isFile = true; + dirEntry->fileSize = entry.size; + std::strncpy(dirEntry->path, entry.name.c_str(), FSC_MAX_DIR_NAME_LENGTH); + return true; + } + } + + return false; + } + + private: + WUHBReader* m_wuhbReader{}; + uint32 m_fscType; + uint32 m_entryOffset = ROMFS_ENTRY_EMPTY; + uint32 m_dirIterOffset = ROMFS_ENTRY_EMPTY; + uint32 m_fileIterOffset = ROMFS_ENTRY_EMPTY; + uint64 m_seek = 0; +}; + +class fscDeviceWUHB : public fscDeviceC +{ + FSCVirtualFile* fscDeviceOpenByPath(std::string_view path, FSC_ACCESS_FLAG accessFlags, void* ctx, sint32* fscStatus) override + { + WUHBReader* reader = (WUHBReader*)ctx; + cemu_assert_debug(!HAS_FLAG(accessFlags, FSC_ACCESS_FLAG::WRITE_PERMISSION)); // writing to WUHB is not supported + + bool isFile; + uint32 table_offset = ROMFS_ENTRY_EMPTY; + + if (table_offset == ROMFS_ENTRY_EMPTY && HAS_FLAG(accessFlags, FSC_ACCESS_FLAG::OPEN_DIR)) + { + table_offset = reader->Lookup(path, false); + isFile = false; + } + if (table_offset == ROMFS_ENTRY_EMPTY && HAS_FLAG(accessFlags, FSC_ACCESS_FLAG::OPEN_FILE)) + { + table_offset = reader->Lookup(path, true); + isFile = true; + } + + if (table_offset == ROMFS_ENTRY_EMPTY) + { + *fscStatus = FSC_STATUS_FILE_NOT_FOUND; + return nullptr; + } + + *fscStatus = FSC_STATUS_OK; + return new FSCDeviceWuhbFileCtx(reader, table_offset, isFile ? FSC_TYPE_FILE : FSC_TYPE_DIRECTORY); + } + + // singleton + public: + static fscDeviceWUHB& instance() + { + static fscDeviceWUHB _instance; + return _instance; + } +}; + +bool FSCDeviceWUHB_Mount(std::string_view mountPath, std::string_view destinationBaseDir, WUHBReader* wuhbReader, sint32 priority) +{ + return fsc_mount(mountPath, destinationBaseDir, &fscDeviceWUHB::instance(), wuhbReader, priority) == FSC_STATUS_OK; +} diff --git a/src/Cafe/GamePatch.cpp b/src/Cafe/GamePatch.cpp index e5a67d98..77eaff32 100644 --- a/src/Cafe/GamePatch.cpp +++ b/src/Cafe/GamePatch.cpp @@ -52,7 +52,7 @@ typedef struct void hleExport_xcx_enterCriticalSection(PPCInterpreter_t* hCPU) { ppcDefineParamStructPtr(xcxCS, xcxCS_t, 0); - uint32 threadId = coreinitThread_getCurrentThreadMPTRDepr(hCPU); + uint32 threadId = MEMPTR(coreinit::OSGetCurrentThread()).GetMPTR(); cemu_assert_debug(xcxCS->ukn08 != 0); cemu_assert_debug(threadId); if (xcxCS->ownerThreadId == (uint32be)threadId) @@ -83,7 +83,7 @@ void hleExport_xcx_enterCriticalSection(PPCInterpreter_t* hCPU) osLib_returnFromFunction(hCPU, 0); return; } - _mm_pause(); + _mm_pause(); } PPCCore_switchToScheduler(); } @@ -139,7 +139,7 @@ void hle_scan(uint8* data, sint32 dataLength, char* hleFunctionName) uint32 offset = (uint32)(scanCurrent - scanStart) + 0x01000000; debug_printf("HLE signature for '%s' found at 0x%08x\n", hleFunctionName, offset); uint32 opcode = (1<<26)|(functionIndex+0x1000); // opcode for HLE: 0x1000 + FunctionIndex - memory_writeU32Direct(offset, opcode); + memory_write(offset, opcode); break; } scanCurrent += 4; @@ -199,7 +199,7 @@ MPTR hle_locate(uint8* data, uint8* mask, sint32 dataLength) } else { -#ifndef PUBLIC_RELEASE +#ifdef CEMU_DEBUG_ASSERT if (mask[0] != 0xFF) assert_dbg(); #endif @@ -299,8 +299,8 @@ void GamePatch_scan() hleAddr = hle_locate(xcx_gpuHangDetection_degradeFramebuffer, NULL, sizeof(xcx_gpuHangDetection_degradeFramebuffer)); if( hleAddr ) { -#ifndef PUBLIC_RELEASE - forceLog_printf("HLE: XCX GPU hang detection"); +#ifdef CEMU_DEBUG_ASSERT + cemuLog_log(LogType::Force, "HLE: XCX GPU hang detection"); #endif // remove the ADDI r25, r25, 1 instruction memory_writeU32(hleAddr, memory_readU32(hleAddr+4)); @@ -309,8 +309,8 @@ void GamePatch_scan() hleAddr = hle_locate(xcx_framebufferReductionSignature, xcx_framebufferReductionMask, sizeof(xcx_framebufferReductionSignature)); if( hleAddr ) { -#ifndef PUBLIC_RELEASE - forceLog_printf("HLE: Prevent XCX rendertarget reduction"); +#ifdef CEMU_DEBUG_ASSERT + cemuLog_log(LogType::Force, "HLE: Prevent XCX rendertarget reduction"); #endif uint32 bl = memory_readU32(hleAddr+0x14); uint32 func_isReductionBuffer = hleAddr + 0x14 + (bl&0x3FFFFFC); @@ -325,23 +325,23 @@ void GamePatch_scan() hleAddr = hle_locate(botw_busyLoopSignature, botw_busyLoopMask, sizeof(botw_busyLoopSignature)); if (hleAddr) { -#ifndef PUBLIC_RELEASE - forceLog_printf("HLE: Patch BotW busy loop 1 at 0x%08x", hleAddr); +#ifdef CEMU_DEBUG_ASSERT + cemuLog_log(LogType::Force, "HLE: Patch BotW busy loop 1 at 0x{:08x}", hleAddr); #endif sint32 functionIndex = hleIndex_h000000001; uint32 opcode = (1 << 26) | (functionIndex); // opcode for HLE: 0x1000 + FunctionIndex - memory_writeU32Direct(hleAddr - 4, opcode); + memory_write(hleAddr - 4, opcode); } hleIndex_h000000002 = osLib_getFunctionIndex("hle", "h000000002"); hleAddr = hle_locate(botw_busyLoopSignature2, botw_busyLoopMask2, sizeof(botw_busyLoopSignature2)); if (hleAddr) { -#ifndef PUBLIC_RELEASE - forceLog_printf("HLE: Patch BotW busy loop 2 at 0x%08x", hleAddr); +#ifdef CEMU_DEBUG_ASSERT + cemuLog_log(LogType::Force, "HLE: Patch BotW busy loop 2 at 0x{:08x}", hleAddr); #endif sint32 functionIndex = hleIndex_h000000002; uint32 opcode = (1 << 26) | (functionIndex); // opcode for HLE: 0x1000 + FunctionIndex - memory_writeU32Direct(hleAddr - 4, opcode); + memory_write(hleAddr - 4, opcode); } // FFL library float array endian conversion @@ -350,17 +350,17 @@ void GamePatch_scan() hleAddr = hle_locate(ffl_floatArrayEndianSwap, NULL, sizeof(ffl_floatArrayEndianSwap)); if (hleAddr) { - forceLogDebug_printf("HLE: Hook FFL float array endian swap function at 0x%08x", hleAddr); + cemuLog_logDebug(LogType::Force, "HLE: Hook FFL float array endian swap function at 0x{:08x}", hleAddr); sint32 functionIndex = hleIndex_h000000003; uint32 opcode = (1 << 26) | (functionIndex); // opcode for HLE: 0x1000 + FunctionIndex - memory_writeU32Direct(hleAddr, opcode); + memory_write(hleAddr, opcode); } // XCX freeze workaround //hleAddr = hle_locate(xcx_enterCriticalSectionSignature, xcx_enterCriticalSectionMask, sizeof(xcx_enterCriticalSectionSignature)); //if (hleAddr) //{ - // forceLogDebug_printf("HLE: Hook XCX enterCriticalSection function at 0x%08x", hleAddr); + // cemuLog_logDebug(LogType::Force, "HLE: Hook XCX enterCriticalSection function at 0x{:08x}", hleAddr); // hleIndex_h000000004 = osLib_getFunctionIndex("hle", "h000000004"); // sint32 functionIndex = hleIndex_h000000004; // uint32 opcode = (1 << 26) | (functionIndex); // opcode for HLE: 0x1000 + FunctionIndex @@ -372,7 +372,7 @@ void GamePatch_scan() if (hleAddr) { uint32 patchAddr = hleAddr + 0x10; - forceLog_printf("HLE: Patch MH3U race condition candidate at 0x%08x", patchAddr); + cemuLog_log(LogType::Force, "HLE: Patch MH3U race condition candidate at 0x{:08x}", patchAddr); uint32 funcAddr = PPCInterpreter_makeCallableExportDepr(hleExport_mh3u_raceConditionWorkaround); // set absolute jump uint32 opc = 0x48000000; @@ -387,7 +387,7 @@ void GamePatch_scan() hleAddr = hle_locate(smash4_softlockFixV0Signature, smash4_softlockFixV0Mask, sizeof(smash4_softlockFixV0Signature)); if (hleAddr) { - forceLogDebug_printf("Smash softlock fix: 0x%08x", hleAddr); + cemuLog_logDebug(LogType::Force, "Smash softlock fix: 0x{:08x}", hleAddr); memory_writeU32(hleAddr+0x20, memory_readU32(hleAddr+0x1C)); } @@ -396,7 +396,7 @@ void GamePatch_scan() hleAddr = hle_locate(pmcs_yellowPaintStarCrashV0Signature, nullptr, sizeof(pmcs_yellowPaintStarCrashV0Signature)); if (hleAddr) { - forceLogDebug_printf("Color Splash crash fix: 0x%08x", hleAddr); + cemuLog_logDebug(LogType::Force, "Color Splash crash fix: 0x{:08x}", hleAddr); uint32 funcAddr = PPCInterpreter_makeCallableExportDepr(hleExport_pmcs_yellowPaintStarCrashWorkaround); // set absolute jump uint32 opc = 0x48000000; @@ -411,7 +411,7 @@ void GamePatch_scan() if (hleAddr) { // replace CMPL with CMP - forceLog_printf("Patching Bayonetta 2 audio bug at: 0x%08x", hleAddr+0x34); + cemuLog_log(LogType::Force, "Patching Bayonetta 2 audio bug at: 0x{:08x}", hleAddr+0x34); uint32 opc = memory_readU32(hleAddr + 0x34); opc &= ~(0x3FF << 1); // turn CMPL to CMP memory_writeU32(hleAddr + 0x34, opc); @@ -444,7 +444,7 @@ void GamePatch_scan() hleAddr = hle_locate(sm3dw_dynFrameBufferResScale, nullptr, sizeof(sm3dw_dynFrameBufferResScale)); if (hleAddr) { - forceLog_printf("Patching SM3DW dynamic resolution scaling at: 0x%08x", hleAddr); + cemuLog_log(LogType::Force, "Patching SM3DW dynamic resolution scaling at: 0x{:08x}", hleAddr); memory_writeU32(hleAddr, 0x4E800020); // BLR } @@ -453,7 +453,7 @@ void GamePatch_scan() hleAddr = hle_locate(tww_waitFunc, nullptr, sizeof(tww_waitFunc)); if (hleAddr) { - forceLog_printf("Patching TWW race conditon at: 0x%08x", hleAddr); + cemuLog_log(LogType::Force, "Patching TWW race conditon at: 0x{:08x}", hleAddr); // NOP calls to Lock/Unlock mutex memory_writeU32(hleAddr + 0x34, 0x60000000); memory_writeU32(hleAddr + 0x48, 0x60000000); @@ -462,7 +462,7 @@ void GamePatch_scan() } uint32 hleInstallEnd = GetTickCount(); - forceLog_printf("HLE scan time: %dms", hleInstallEnd-hleInstallStart); + cemuLog_log(LogType::Force, "HLE scan time: {}ms", hleInstallEnd-hleInstallStart); } RunAtCemuBoot _loadGamePatchAPI([]() diff --git a/src/Cafe/GameProfile/GameProfile.cpp b/src/Cafe/GameProfile/GameProfile.cpp index 9f2550c2..ea303226 100644 --- a/src/Cafe/GameProfile/GameProfile.cpp +++ b/src/Cafe/GameProfile/GameProfile.cpp @@ -43,7 +43,7 @@ bool gameProfile_loadBooleanOption(IniParser* iniParser, char* optionName, gameP return true; } else - cemuLog_force("Unknown value '{}' for option '{}' in game profile", *option_value, optionName); + cemuLog_log(LogType::Force, "Unknown value '{}' for option '{}' in game profile", *option_value, optionName); return false; } @@ -64,7 +64,7 @@ bool gameProfile_loadBooleanOption2(IniParser& iniParser, const char* optionName return true; } else - cemuLog_force("Unknown value '{}' for option '{}' in game profile", *option_value, optionName); + cemuLog_log(LogType::Force, "Unknown value '{}' for option '{}' in game profile", *option_value, optionName); return false; } @@ -94,7 +94,7 @@ bool gameProfile_loadIntegerOption(IniParser* iniParser, const char* optionName, sint32 val = StringHelpers::ToInt(*option_value, defaultValue); if (val < minVal || val > maxVal) { - cemuLog_force("Value '{}' is out of range for option '{}' in game profile", *option_value, optionName); + cemuLog_log(LogType::Force, "Value '{}' is out of range for option '{}' in game profile", *option_value, optionName); option->value = defaultValue; return false; } @@ -116,7 +116,7 @@ bool gameProfile_loadIntegerOption(IniParser& iniParser, const char* optionName, T val = ConvertString(*option_value); if (val < minVal || val > maxVal) { - cemuLog_force("Value '{}' is out of range for option '{}' in game profile", *option_value, optionName); + cemuLog_log(LogType::Force, "Value '{}' is out of range for option '{}' in game profile", *option_value, optionName); return false; } @@ -125,7 +125,7 @@ bool gameProfile_loadIntegerOption(IniParser& iniParser, const char* optionName, } catch(std::exception&) { - cemuLog_force("Value '{}' is out of range for option '{}' in game profile", *option_value, optionName); + cemuLog_log(LogType::Force, "Value '{}' is out of range for option '{}' in game profile", *option_value, optionName); return false; } } @@ -140,7 +140,7 @@ bool gameProfile_loadEnumOption(IniParser& iniParser, const char* optionName, T& for(const T& v : T()) { // test integer option - if (boost::iequals(fmt::format("{}", static_cast::type>(v)), *option_value)) + if (boost::iequals(fmt::format("{}", fmt::underlying(v)), *option_value)) { option = v; return true; @@ -175,17 +175,17 @@ void gameProfile_load() ppcThreadQuantum = g_current_game_profile->GetThreadQuantum(); if (ppcThreadQuantum != GameProfile::kThreadQuantumDefault) - cemuLog_force("Thread quantum set to {}", ppcThreadQuantum); + cemuLog_log(LogType::Force, "Thread quantum set to {}", ppcThreadQuantum); } bool GameProfile::Load(uint64_t title_id) { - auto gameProfilePath = ActiveSettings::GetPath("gameProfiles/{:016x}.ini", title_id); + auto gameProfilePath = ActiveSettings::GetConfigPath("gameProfiles/{:016x}.ini", title_id); std::optional> profileContents = FileStream::LoadIntoMemory(gameProfilePath); if (!profileContents) { - gameProfilePath = ActiveSettings::GetPath("gameProfiles/default/{:016x}.ini", title_id); + gameProfilePath = ActiveSettings::GetDataPath("gameProfiles/default/{:016x}.ini", title_id); profileContents = FileStream::LoadIntoMemory(gameProfilePath); if (!profileContents) return false; @@ -209,7 +209,7 @@ bool GameProfile::Load(uint64_t title_id) m_gameName = std::string(game_name.begin(), game_name.end()); trim(m_gameName.value()); } - IniParser iniParser(*profileContents, gameProfilePath.string()); + IniParser iniParser(*profileContents, _pathToUtf8(gameProfilePath)); // parse ini while (iniParser.NextSection()) { @@ -276,11 +276,14 @@ bool GameProfile::Load(uint64_t title_id) void GameProfile::Save(uint64_t title_id) { - auto gameProfilePath = ActiveSettings::GetPath("gameProfiles/{:016x}.ini", title_id); + auto gameProfileDir = ActiveSettings::GetConfigPath("gameProfiles"); + if (std::error_code ex_ec; !fs::exists(gameProfileDir, ex_ec)) + fs::create_directories(gameProfileDir, ex_ec); + auto gameProfilePath = gameProfileDir / fmt::format("{:016x}.ini", title_id); FileStream* fs = FileStream::createFile2(gameProfilePath); if (!fs) { - cemuLog_force("Failed to write game profile"); + cemuLog_log(LogType::Force, "Failed to write game profile"); return; } @@ -304,16 +307,11 @@ void GameProfile::Save(uint64_t title_id) fs->writeLine(""); fs->writeLine("[Graphics]"); - //WRITE_OPTIONAL_ENTRY(gpuBufferCacheAccuracy); WRITE_ENTRY(accurateShaderMul); WRITE_OPTIONAL_ENTRY(precompiledShaders); WRITE_OPTIONAL_ENTRY(graphics_api); fs->writeLine(""); - /*stream_writeLine(stream_gameProfile, "[Audio]"); - WRITE_ENTRY(disableAudio); - stream_writeLine(stream_gameProfile, "");*/ - fs->writeLine("[Controller]"); for (int i = 0; i < 8; ++i) { diff --git a/src/Cafe/GraphicPack/GraphicPack2.cpp b/src/Cafe/GraphicPack/GraphicPack2.cpp index 808536d5..6ae05c5b 100644 --- a/src/Cafe/GraphicPack/GraphicPack2.cpp +++ b/src/Cafe/GraphicPack/GraphicPack2.cpp @@ -11,6 +11,7 @@ #include "util/IniParser/IniParser.h" #include "util/helpers/StringHelpers.h" #include "Cafe/CafeSystem.h" +#include std::vector GraphicPack2::s_graphic_packs; std::vector GraphicPack2::s_active_graphic_packs; @@ -27,16 +28,16 @@ void GraphicPack2::LoadGraphicPack(fs::path graphicPackPath) return; std::vector rulesData; fs_rules->extract(rulesData); - IniParser iniParser(rulesData, rulesPath.string()); + IniParser iniParser(rulesData, _pathToUtf8(rulesPath)); if (!iniParser.NextSection()) { - cemuLog_force("{}: Does not contain any sections", _pathToUtf8(rulesPath)); + cemuLog_log(LogType::Force, "{}: Does not contain any sections", _pathToUtf8(rulesPath)); return; } if (!boost::iequals(iniParser.GetCurrentSectionName(), "Definition")) { - cemuLog_force("{}: [Definition] must be the first section", _pathToUtf8(rulesPath)); + cemuLog_log(LogType::Force, "{}: [Definition] must be the first section", _pathToUtf8(rulesPath)); return; } @@ -47,23 +48,22 @@ void GraphicPack2::LoadGraphicPack(fs::path graphicPackPath) auto [ptr, ec] = std::from_chars(option_version->data(), option_version->data() + option_version->size(), versionNum); if (ec != std::errc{}) { - cemuLog_force("{}: Unable to parse version", _pathToUtf8(rulesPath)); + cemuLog_log(LogType::Force, "{}: Unable to parse version", _pathToUtf8(rulesPath)); return; } - if (versionNum > GP_LEGACY_VERSION) { - GraphicPack2::LoadGraphicPack(rulesPath.generic_wstring(), iniParser); + GraphicPack2::LoadGraphicPack(rulesPath, iniParser); return; } } - cemuLog_force("{}: Outdated graphic pack", _pathToUtf8(rulesPath)); + cemuLog_log(LogType::Force, "{}: Outdated graphic pack", _pathToUtf8(rulesPath)); } void GraphicPack2::LoadAll() { std::error_code ec; - fs::path basePath = ActiveSettings::GetPath("graphicPacks"); + fs::path basePath = ActiveSettings::GetUserDataPath("graphicPacks"); for (fs::recursive_directory_iterator it(basePath, ec); it != end(it); ++it) { if (!it->is_directory(ec)) @@ -78,22 +78,22 @@ void GraphicPack2::LoadAll() } } -bool GraphicPack2::LoadGraphicPack(const std::wstring& filename, IniParser& rules) +bool GraphicPack2::LoadGraphicPack(const fs::path& rulesPath, IniParser& rules) { try { - auto gp = std::make_shared(filename, rules); + auto gp = std::make_shared(rulesPath, rules); // check if enabled and preset set const auto& config_entries = g_config.data().graphic_pack_entries; // legacy absolute path checking for not breaking compatibility - auto file = gp->GetFilename2(); + auto file = gp->GetRulesPath(); auto it = config_entries.find(file.lexically_normal()); if (it == config_entries.cend()) { // check for relative path - it = config_entries.find(MakeRelativePath(gp->GetFilename2()).lexically_normal()); + it = config_entries.find(_utf8ToPath(gp->GetNormalizedPathString())); } if (it != config_entries.cend()) @@ -144,7 +144,7 @@ bool GraphicPack2::DeactivateGraphicPack(const std::shared_ptr& gr const auto it = std::find_if(s_active_graphic_packs.begin(), s_active_graphic_packs.end(), [graphic_pack](const GraphicPackPtr& gp) { - return gp->GetFilename() == graphic_pack->GetFilename(); + return gp->GetNormalizedPathString() == graphic_pack->GetNormalizedPathString(); } ); @@ -172,12 +172,12 @@ void GraphicPack2::ActivateForCurrentTitle() { if (gp->GetPresets().empty()) { - forceLog_printf("Activate graphic pack: %s", gp->GetPath().c_str()); + cemuLog_log(LogType::Force, "Activate graphic pack: {}", gp->GetVirtualPath()); } else { std::string logLine; - logLine.assign(fmt::format("Activate graphic pack: {} [Presets: ", gp->GetPath())); + logLine.assign(fmt::format("Activate graphic pack: {} [Presets: ", gp->GetVirtualPath())); bool isFirst = true; for (auto& itr : gp->GetPresets()) { @@ -215,12 +215,6 @@ void GraphicPack2::WaitUntilReady() std::this_thread::sleep_for(std::chrono::milliseconds(5)); } -GraphicPack2::GraphicPack2(std::wstring filename) - : m_filename(std::move(filename)) -{ - // unused for now -} - std::unordered_map GraphicPack2::ParsePresetVars(IniParser& rules) const { ExpressionParser parser; @@ -254,8 +248,8 @@ std::unordered_map GraphicPack2::ParsePres return vars; } -GraphicPack2::GraphicPack2(std::wstring filename, IniParser& rules) - : m_filename(std::move(filename)) +GraphicPack2::GraphicPack2(fs::path rulesPath, IniParser& rules) + : m_rulesPath(std::move(rulesPath)) { // we're already in [Definition] auto option_version = rules.FindOption("version"); @@ -264,7 +258,7 @@ GraphicPack2::GraphicPack2(std::wstring filename, IniParser& rules) m_version = StringHelpers::ToInt(*option_version, -1); if (m_version < 0) { - cemuLog_force(L"{}: Invalid version", m_filename); + cemuLog_log(LogType::Force, "{}: Invalid version", _pathToUtf8(m_rulesPath)); throw std::exception(); } @@ -276,7 +270,7 @@ GraphicPack2::GraphicPack2(std::wstring filename, IniParser& rules) else if (boost::iequals(*option_rendererFilter, "opengl")) m_renderer_api = RendererAPI::OpenGL; else - cemuLog_force("Unknown value '{}' for rendererFilter option", *option_rendererFilter); + cemuLog_log(LogType::Force, "Unknown value '{}' for rendererFilter option", *option_rendererFilter); } auto option_defaultEnabled = rules.FindOption("default"); @@ -286,6 +280,10 @@ GraphicPack2::GraphicPack2(std::wstring filename, IniParser& rules) m_enabled = m_default_enabled; } + auto option_allowRendertargetSizeOptimization = rules.FindOption("colorbufferOptimizationAware"); + if (option_allowRendertargetSizeOptimization) + m_allowRendertargetSizeOptimization = boost::iequals(*option_allowRendertargetSizeOptimization, "true") || boost::iequals(*option_allowRendertargetSizeOptimization, "1"); + auto option_vendorFilter = rules.FindOption("vendorFilter"); if (option_vendorFilter) { @@ -300,17 +298,17 @@ GraphicPack2::GraphicPack2(std::wstring filename, IniParser& rules) else if (boost::iequals(*option_vendorFilter, "apple")) m_gfx_vendor = GfxVendor::Apple; else - cemuLog_force("Unknown value '{}' for vendorFilter", *option_vendorFilter); + cemuLog_log(LogType::Force, "Unknown value '{}' for vendorFilter", *option_vendorFilter); } auto option_path = rules.FindOption("path"); if (!option_path) { auto gp_name_log = rules.FindOption("name"); - cemuLog_force("[Definition] section from '{}' graphic pack must contain option: path", gp_name_log.has_value() ? *gp_name_log : "Unknown"); + cemuLog_log(LogType::Force, "[Definition] section from '{}' graphic pack must contain option: path", gp_name_log.has_value() ? *gp_name_log : "Unknown"); throw std::exception(); } - m_path = *option_path; + m_virtualPath = *option_path; auto option_gp_name = rules.FindOption("name"); if (option_gp_name) @@ -347,7 +345,7 @@ GraphicPack2::GraphicPack2(std::wstring filename, IniParser& rules) const auto preset_name = rules.FindOption("name"); if (!preset_name) { - cemuLog_force("Graphic pack \"{}\": Preset in line {} skipped because it has no name option defined", m_name, rules.GetCurrentSectionLineNumber()); + cemuLog_log(LogType::Force, "Graphic pack \"{}\": Preset in line {} skipped because it has no name option defined", GetNormalizedPathString(), rules.GetCurrentSectionLineNumber()); continue; } @@ -371,7 +369,7 @@ GraphicPack2::GraphicPack2(std::wstring filename, IniParser& rules) } catch (const std::exception & ex) { - cemuLog_force("Graphic pack \"{}\": Can't parse preset \"{}\": {}", m_name, *preset_name, ex.what()); + cemuLog_log(LogType::Force, "Graphic pack \"{}\": Can't parse preset \"{}\": {}", GetNormalizedPathString(), *preset_name, ex.what()); } } else if (boost::iequals(currentSectionName, "RAM")) @@ -385,7 +383,7 @@ GraphicPack2::GraphicPack2(std::wstring filename, IniParser& rules) { if (m_version <= 5) { - cemuLog_force("Graphic pack \"{}\": [RAM] options are only available for graphic pack version 6 or higher", m_name, optionNameBuf); + cemuLog_log(LogType::Force, "Graphic pack \"{}\": [RAM] options are only available for graphic pack version 6 or higher", GetNormalizedPathString(), optionNameBuf); throw std::exception(); } @@ -395,12 +393,12 @@ GraphicPack2::GraphicPack2(std::wstring filename, IniParser& rules) { if (addrEnd <= addrStart) { - cemuLog_force("Graphic pack \"{}\": start address (0x{:08x}) must be greater than end address (0x{:08x}) for {}", m_name, addrStart, addrEnd, optionNameBuf); + cemuLog_log(LogType::Force, "Graphic pack \"{}\": start address (0x{:08x}) must be greater than end address (0x{:08x}) for {}", GetNormalizedPathString(), addrStart, addrEnd, optionNameBuf); throw std::exception(); } else if ((addrStart & 0xFFF) != 0 || (addrEnd & 0xFFF) != 0) { - cemuLog_force("Graphic pack \"{}\": addresses for %s are not aligned to 0x1000", m_name, optionNameBuf); + cemuLog_log(LogType::Force, "Graphic pack \"{}\": addresses for %s are not aligned to 0x1000", GetNormalizedPathString(), optionNameBuf); throw std::exception(); } else @@ -410,7 +408,7 @@ GraphicPack2::GraphicPack2(std::wstring filename, IniParser& rules) } else { - cemuLog_force("Graphic pack \"{}\": has invalid syntax for option {}", m_name, optionNameBuf); + cemuLog_log(LogType::Force, "Graphic pack \"{}\": has invalid syntax for option {}", GetNormalizedPathString(), optionNameBuf); throw std::exception(); } } @@ -424,24 +422,32 @@ GraphicPack2::GraphicPack2(std::wstring filename, IniParser& rules) std::unordered_map> tmp_map; // all vars must be defined in the default preset vars before - for (const auto& entry : m_presets) + std::vector> mismatchingPresetVars; + for (const auto& presetEntry : m_presets) { - tmp_map[entry->category].emplace_back(entry); + tmp_map[presetEntry->category].emplace_back(presetEntry); - for (auto& kv : entry->variables) + for (auto& presetVar : presetEntry->variables) { - const auto it = m_preset_vars.find(kv.first); + const auto it = m_preset_vars.find(presetVar.first); if (it == m_preset_vars.cend()) { - cemuLog_force("Graphic pack: \"{}\" contains preset variables which are not defined in the default section", m_name); - throw std::exception(); + mismatchingPresetVars.emplace_back(presetEntry->name, presetVar.first); + continue; } - // overwrite var type with default var type - kv.second.first = it->second.first; + presetVar.second.first = it->second.first; } } + if(!mismatchingPresetVars.empty()) + { + cemuLog_log(LogType::Force, "Graphic pack \"{}\" contains preset variables which are not defined in the [Default] section:", GetNormalizedPathString()); + for (const auto& [presetName, varName] : mismatchingPresetVars) + cemuLog_log(LogType::Force, "Preset: {} Variable: {}", presetName, varName); + throw std::exception(); + } + // have first entry be default active for every category if no default= is set for(auto entry : get_values(tmp_map)) { @@ -471,7 +477,7 @@ GraphicPack2::GraphicPack2(std::wstring filename, IniParser& rules) auto& p2 = kv.second[i + 1]; if (p1->variables.size() != p2->variables.size()) { - cemuLog_force("Graphic pack: \"{}\" contains inconsistent preset variables", m_name); + cemuLog_log(LogType::Force, "Graphic pack: \"{}\" contains inconsistent preset variables", GetNormalizedPathString()); throw std::exception(); } @@ -479,14 +485,14 @@ GraphicPack2::GraphicPack2(std::wstring filename, IniParser& rules) std::set keys2(get_keys(p2->variables).begin(), get_keys(p2->variables).end()); if (keys1 != keys2) { - cemuLog_force("Graphic pack: \"{}\" contains inconsistent preset variables", m_name); + cemuLog_log(LogType::Force, "Graphic pack: \"{}\" contains inconsistent preset variables", GetNormalizedPathString()); throw std::exception(); } if(p1->is_default) { if(has_default) - cemuLog_force("Graphic pack: \"{}\" has more than one preset with the default key set for the same category \"{}\"", m_name, p1->name); + cemuLog_log(LogType::Force, "Graphic pack: \"{}\" has more than one preset with the default key set for the same category \"{}\"", GetNormalizedPathString(), p1->name); p1->active = true; has_default = true; } @@ -499,12 +505,25 @@ GraphicPack2::GraphicPack2(std::wstring filename, IniParser& rules) } } +// returns true if enabling, disabling (changeEnableState) or changing presets (changePreset) for the graphic pack requires restarting if the game is already running +bool GraphicPack2::RequiresRestart(bool changeEnableState, bool changePreset) +{ + if (!GetTextureRules().empty()) + return true; + return false; +} + bool GraphicPack2::Reload() { Deactivate(); return Activate(); } +std::string GraphicPack2::GetNormalizedPathString() const +{ + return _pathToUtf8(MakeRelativePath(ActiveSettings::GetUserDataPath(), GetRulesPath()).lexically_normal()); +} + bool GraphicPack2::ContainsTitleId(uint64_t title_id) const { const auto it = std::find_if(m_title_ids.begin(), m_title_ids.end(), [title_id](uint64 id) { return id == title_id; }); @@ -647,7 +666,7 @@ bool GraphicPack2::SetActivePreset(std::string_view category, std::string_view n void GraphicPack2::LoadShaders() { - fs::path path(m_filename); + fs::path path = GetRulesPath(); for (auto& it : fs::directory_iterator(path.remove_filename())) { if (!is_regular_file(it)) @@ -660,7 +679,7 @@ void GraphicPack2::LoadShaders() uint64 shader_base_hash = 0; uint64 shader_aux_hash = 0; wchar_t shader_type[256]{}; - if (filename.size() < 256 && swscanf(filename.c_str(), L"%I64x_%I64x_%ls", &shader_base_hash, &shader_aux_hash, shader_type) == 3) + if (filename.size() < 256 && swscanf(filename.c_str(), L"%" SCNx64 "_%" SCNx64 "_%ls", &shader_base_hash, &shader_aux_hash, shader_type) == 3) { if (shader_type[0] == 'p' && shader_type[1] == 's') m_custom_shaders.emplace_back(LoadShader(p, shader_base_hash, shader_aux_hash, GP_SHADER_TYPE::PIXEL)); @@ -673,7 +692,7 @@ void GraphicPack2::LoadShaders() { std::ifstream file(p); if (!file.is_open()) - throw std::runtime_error(fmt::format("can't open graphic pack file: {}", p.filename().string()).c_str()); + throw std::runtime_error(fmt::format("can't open graphic pack file: {}", _pathToUtf8(p.filename()))); file.seekg(0, std::ios::end); m_output_shader_source.reserve(file.tellg()); @@ -686,7 +705,7 @@ void GraphicPack2::LoadShaders() { std::ifstream file(p); if (!file.is_open()) - throw std::runtime_error(fmt::format("can't open graphic pack file: {}", p.filename().string()).c_str()); + throw std::runtime_error(fmt::format("can't open graphic pack file: {}", _pathToUtf8(p.filename()))); file.seekg(0, std::ios::end); m_upscaling_shader_source.reserve(file.tellg()); @@ -699,7 +718,7 @@ void GraphicPack2::LoadShaders() { std::ifstream file(p); if (!file.is_open()) - throw std::runtime_error(fmt::format("can't open graphic pack file: {}", p.filename().string()).c_str()); + throw std::runtime_error(fmt::format("can't open graphic pack file: {}", _pathToUtf8(p.filename()))); file.seekg(0, std::ios::end); m_downscaling_shader_source.reserve(file.tellg()); @@ -711,7 +730,7 @@ void GraphicPack2::LoadShaders() } catch (const std::exception& ex) { - forceLog_printf("graphicPack: error while loading custom shader: %s", ex.what()); + cemuLog_log(LogType::Force, "graphicPack: error while loading custom shader: {}", ex.what()); } } } @@ -734,7 +753,7 @@ bool GraphicPack2::IsPresetVisible(const PresetPtr& preset) const } catch (const std::exception& ex) { - forceLog_printf("error when trying to check visiblity of preset: %s", ex.what()); + cemuLog_log(LogType::Force, "error when trying to check visiblity of preset: {}", ex.what()); return false; } } @@ -802,7 +821,7 @@ void GraphicPack2::AddConstantsForCurrentPreset(ExpressionParser& ep) } } -void GraphicPack2::_iterateReplacedFiles(const fs::path& currentPath, std::wstring& internalPath, bool isAOC) +void GraphicPack2::_iterateReplacedFiles(const fs::path& currentPath, bool isAOC, const char* virtualMountBase) { uint64 currentTitleId = CafeSystem::GetForegroundTitleId(); uint64 aocTitleId = (currentTitleId & 0xFFFFFFFFull) | 0x0005000c00000000ull; @@ -817,9 +836,9 @@ void GraphicPack2::_iterateReplacedFiles(const fs::path& currentPath, std::wstri } else { - virtualMountPath = fs::path("vol/content/") / virtualMountPath; + virtualMountPath = fs::path(virtualMountBase) / virtualMountPath; } - fscDeviceRedirect_add(virtualMountPath.generic_string(), it.path().generic_string(), m_fs_priority); + fscDeviceRedirect_add(virtualMountPath.generic_string(), it.file_size(), it.path().generic_string(), m_fs_priority); } } } @@ -830,7 +849,7 @@ void GraphicPack2::LoadReplacedFiles() return; m_patchedFilesLoaded = true; - fs::path gfxPackPath(m_filename.c_str()); + fs::path gfxPackPath = GetRulesPath(); gfxPackPath = gfxPackPath.remove_filename(); // /content/ @@ -840,10 +859,9 @@ void GraphicPack2::LoadReplacedFiles() std::error_code ec; if (fs::exists(contentPath, ec)) { - std::wstring internalPath(L"/vol/content/"); // setup redirections fscDeviceRedirect_map(); - _iterateReplacedFiles(contentPath, internalPath, false); + _iterateReplacedFiles(contentPath, false, "vol/content/"); } // /aoc/ fs::path aocPath(gfxPackPath); @@ -854,13 +872,20 @@ void GraphicPack2::LoadReplacedFiles() uint64 aocTitleId = CafeSystem::GetForegroundTitleId(); aocTitleId = aocTitleId & 0xFFFFFFFFULL; aocTitleId |= 0x0005000c00000000ULL; - wchar_t internalAocPath[128]; - swprintf(internalAocPath, sizeof(internalAocPath)/sizeof(wchar_t), L"/aoc/%016llx/", aocTitleId); - - std::wstring internalPath(internalAocPath); // setup redirections fscDeviceRedirect_map(); - _iterateReplacedFiles(aocPath, internalPath, true); + _iterateReplacedFiles(aocPath, true, nullptr); + } + + // /code/ + fs::path codePath(gfxPackPath); + codePath.append("code"); + + if (fs::exists(codePath, ec)) + { + // setup redirections + fscDeviceRedirect_map(); + _iterateReplacedFiles(codePath, false, CafeSystem::GetInternalVirtualCodeFolder().c_str()); } } @@ -876,21 +901,18 @@ bool GraphicPack2::Activate() if (m_gfx_vendor.has_value()) { auto vendor = g_renderer->GetVendor(); - if (vendor == GfxVendor::IntelLegacy || vendor == GfxVendor::IntelNoLegacy) - vendor = GfxVendor::Intel; - if (m_gfx_vendor.value() != vendor) return false; } - FileStream* fs_rules = FileStream::openFile2({ m_filename }); + FileStream* fs_rules = FileStream::openFile2(m_rulesPath); if (!fs_rules) return false; std::vector rulesData; fs_rules->extract(rulesData); delete fs_rules; - IniParser rules({ (char*)rulesData.data(), rulesData.size()}, boost::nowide::narrow(m_filename)); + IniParser rules({ (char*)rulesData.data(), rulesData.size()}, GetNormalizedPathString()); // load rules try @@ -944,7 +966,7 @@ bool GraphicPack2::Activate() else if (anisotropyValue == 16) rule.overwrite_settings.anistropic_value = 4; else - cemuLog_log(LogType::Force, fmt::format(L"Invalid value {} for overwriteAnisotropy in graphic pack {}. Only the values 1, 2, 4, 8 or 16 are allowed.", anisotropyValue, m_filename)); + cemuLog_log(LogType::Force, "Invalid value {} for overwriteAnisotropy in graphic pack {}. Only the values 1, 2, 4, 8 or 16 are allowed.", anisotropyValue, GetNormalizedPathString()); } m_texture_rules.emplace_back(rule); } @@ -957,7 +979,7 @@ bool GraphicPack2::Activate() auto option_upscale = rules.FindOption("upscaleMagFilter"); if(option_upscale && boost::iequals(*option_upscale, "NearestNeighbor")) m_output_settings.upscale_filter = LatteTextureView::MagFilter::kNearestNeighbor; - auto option_downscale = rules.FindOption("NearestNeighbor"); + auto option_downscale = rules.FindOption("downscaleMinFilter"); if (option_downscale && boost::iequals(*option_downscale, "NearestNeighbor")) m_output_settings.downscale_filter = LatteTextureView::MagFilter::kNearestNeighbor; } @@ -965,7 +987,7 @@ bool GraphicPack2::Activate() } catch(const std::exception& ex) { - forceLog_printf((char*)ex.what()); + cemuLog_log(LogType::Force, ex.what()); return false; } @@ -989,11 +1011,11 @@ bool GraphicPack2::Activate() if (LatteTiming_getCustomVsyncFrequency(globalCustomVsyncFreq)) { if (customVsyncFreq != globalCustomVsyncFreq) - forceLog_printf("rules.txt error: Mismatching vsync frequency %d in graphic pack \'%s\'", customVsyncFreq, GetPath().c_str()); + cemuLog_log(LogType::Force, "rules.txt error: Mismatching vsync frequency {} in graphic pack \'{}\'", customVsyncFreq, GetVirtualPath()); } else { - forceLog_printf("Set vsync frequency to %d (graphic pack %s)", customVsyncFreq, GetPath().c_str()); + cemuLog_log(LogType::Force, "Set vsync frequency to {} (graphic pack {})", customVsyncFreq, GetVirtualPath()); LatteTiming_setCustomVsyncFrequency(customVsyncFreq); } } @@ -1020,9 +1042,9 @@ bool GraphicPack2::Deactivate() m_upscaling_shader_ud.reset(); m_downscaling_shader_ud.reset(); - m_output_shader_source = ""; - m_upscaling_shader_source = ""; - m_downscaling_shader_source = ""; + m_output_shader_source.clear(); + m_upscaling_shader_source.clear(); + m_downscaling_shader_source.clear(); if (HasCustomVSyncFrequency()) { @@ -1178,7 +1200,7 @@ std::vector GraphicPack2::ParseTitleIds(IniParser& rules, const char* op void GraphicPack2::ApplyShaderPresets(std::string& shader_source) const { const auto active_presets = GetActivePresets(); - const std::regex regex(R"(\$[a-zA-Z\_0-9]+)"); + const std::regex regex(R"(\$[a-zA-Z_0-9]+)"); std::smatch match; size_t offset = 0; @@ -1246,4 +1268,4 @@ std::vector> GraphicPack2::GetActiveRAMMappings() return a.first < b.first; }); return v; -} \ No newline at end of file +} diff --git a/src/Cafe/GraphicPack/GraphicPack2.h b/src/Cafe/GraphicPack/GraphicPack2.h index 4c18b5c6..fc9603cd 100644 --- a/src/Cafe/GraphicPack/GraphicPack2.h +++ b/src/Cafe/GraphicPack/GraphicPack2.h @@ -97,22 +97,23 @@ public: }; using PresetPtr = std::shared_ptr; - GraphicPack2(std::wstring filename); - GraphicPack2(std::wstring filename, IniParser& rules); + GraphicPack2(fs::path rulesPath, IniParser& rules); bool IsEnabled() const { return m_enabled; } bool IsActivated() const { return m_activated; } sint32 GetVersion() const { return m_version; } - const std::wstring& GetFilename() const { return m_filename; } - const fs::path GetFilename2() const { return fs::path(m_filename); } + const fs::path GetRulesPath() const { return m_rulesPath; } + std::string GetNormalizedPathString() const; + bool RequiresRestart(bool changeEnableState, bool changePreset); bool Reload(); bool HasName() const { return !m_name.empty(); } - const std::string& GetName() const { return m_name.empty() ? m_path : m_name; } - const std::string& GetPath() const { return m_path; } + const std::string& GetName() const { return m_name.empty() ? m_virtualPath : m_name; } + const std::string& GetVirtualPath() const { return m_virtualPath; } // returns the path in the gfx tree hierarchy const std::string& GetDescription() const { return m_description; } bool IsDefaultEnabled() const { return m_default_enabled; } + bool AllowRendertargetSizeOptimization() const { return m_allowRendertargetSizeOptimization; } void SetEnabled(bool state) { m_enabled = state; } @@ -164,7 +165,7 @@ public: static const std::vector>& GetGraphicPacks() { return s_graphic_packs; } static const std::vector>& GetActiveGraphicPacks() { return s_active_graphic_packs; } static void LoadGraphicPack(fs::path graphicPackPath); - static bool LoadGraphicPack(const std::wstring& filename, class IniParser& rules); + static bool LoadGraphicPack(const fs::path& rulesPath, class IniParser& rules); static bool ActivateGraphicPack(const std::shared_ptr& graphic_pack); static bool DeactivateGraphicPack(const std::shared_ptr& graphic_pack); static void ClearGraphicPacks(); @@ -172,6 +173,7 @@ public: static void ActivateForCurrentTitle(); static void Reset(); + private: bool Activate(); bool Deactivate(); @@ -207,15 +209,17 @@ private: parser.TryAddConstant(var.first, (TType)var.second.second); } - std::wstring m_filename; + fs::path m_rulesPath; sint32 m_version; std::string m_name; - std::string m_path; + std::string m_virtualPath; std::string m_description; bool m_default_enabled = false; + bool m_allowRendertargetSizeOptimization = false; // gfx pack supports framebuffers with non-padded sizes, which is an optional optimization introduced with Cemu 2.0-74 + // filter std::optional m_renderer_api; std::optional m_gfx_vendor; @@ -256,7 +260,7 @@ private: CustomShader LoadShader(const fs::path& path, uint64 shader_base_hash, uint64 shader_aux_hash, GP_SHADER_TYPE shader_type) const; void ApplyShaderPresets(std::string& shader_source) const; void LoadReplacedFiles(); - void _iterateReplacedFiles(const fs::path& currentPath, std::wstring& internalPath, bool isAOC); + void _iterateReplacedFiles(const fs::path& currentPath, bool isAOC, const char* virtualMountBase); // ram mappings std::vector> m_ramMappings; diff --git a/src/Cafe/GraphicPack/GraphicPack2Patches.cpp b/src/Cafe/GraphicPack/GraphicPack2Patches.cpp index 5a3c85b9..2c067484 100644 --- a/src/Cafe/GraphicPack/GraphicPack2Patches.cpp +++ b/src/Cafe/GraphicPack/GraphicPack2Patches.cpp @@ -6,6 +6,7 @@ #include "boost/algorithm/string.hpp" #include "gui/wxgui.h" // for wxMessageBox +#include "gui/helpers/wxHelpers.h" // error handler void PatchErrorHandler::printError(class PatchGroup* patchGroup, sint32 lineNumber, std::string_view errorMsg) @@ -33,27 +34,29 @@ void PatchErrorHandler::printError(class PatchGroup* patchGroup, sint32 lineNumb cemuLog_writeLineToLog(msg, true, true); m_anyErrorTriggered = true; - if (cafeLog_isLoggingFlagEnabled(LOG_TYPE_PATCHES)) + if (cemuLog_isLoggingEnabled(LogType::Patches)) errorMessages.emplace_back(msg); } void PatchErrorHandler::showStageErrorMessageBox() { - std::string errorMsg; + wxString errorMsg; if (m_gp) { if (m_stage == STAGE::PARSER) - errorMsg.assign(fmt::format("Failed to load patches for graphic pack \'{}\'", m_gp->GetName())); + errorMsg.assign(formatWxString(_("Failed to load patches for graphic pack \'{}\'"), m_gp->GetName())); else - errorMsg.assign(fmt::format("Failed to apply patches for graphic pack \'{}\'", m_gp->GetName())); + errorMsg.assign(formatWxString(_("Failed to apply patches for graphic pack \'{}\'"), m_gp->GetName())); } else { cemu_assert_debug(false); // graphic pack should always be set } - if (cafeLog_isLoggingFlagEnabled(LOG_TYPE_PATCHES)) + if (cemuLog_isLoggingEnabled(LogType::Patches)) { - errorMsg.append("\n \nDetails:\n"); + errorMsg.append("\n \n") + .append(_("Details:")) + .append("\n"); for (auto& itr : errorMessages) { errorMsg.append(itr); @@ -61,26 +64,15 @@ void PatchErrorHandler::showStageErrorMessageBox() } } - wxMessageBox(errorMsg, "Graphic pack error"); + wxMessageBox(errorMsg, _("Graphic pack error")); } // loads Cemu-style patches (patch_.asm) // returns true if at least one file was found even if it could not be successfully parsed bool GraphicPack2::LoadCemuPatches() { - // todo - once we have updated to C++20 we can replace these with the new std::string functions - auto startsWith = [](const std::wstring& str, const std::wstring& prefix) - { - return str.size() >= prefix.size() && 0 == str.compare(0, prefix.size(), prefix); - }; - - auto endsWith = [](const std::wstring& str, const std::wstring& suffix) - { - return str.size() >= suffix.size() && 0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix); - }; - bool foundPatches = false; - fs::path path(m_filename); + fs::path path(m_rulesPath); path.remove_filename(); for (auto& p : fs::directory_iterator(path)) { @@ -88,10 +80,10 @@ bool GraphicPack2::LoadCemuPatches() if (fs::is_regular_file(p.status()) && path.has_filename()) { // check if filename matches - std::wstring filename = path.filename().generic_wstring(); - if (boost::istarts_with(filename, L"patch_") && boost::iends_with(filename, L".asm")) + std::string filename = _pathToUtf8(path.filename()); + if (boost::istarts_with(filename, "patch_") && boost::iends_with(filename, ".asm")) { - FileStream* patchFile = FileStream::openFile(path.generic_wstring().c_str()); + FileStream* patchFile = FileStream::openFile2(path); if (patchFile) { // read file @@ -102,14 +94,14 @@ bool GraphicPack2::LoadCemuPatches() // load Cemu style patch file if (!ParseCemuPatchesTxtInternal(patchesStream)) { - forceLog_printfW(L"Error while processing \"%s\". No patches for this graphic pack will be applied.", path.c_str()); + cemuLog_log(LogType::Force, "Error while processing \"{}\". No patches for this graphic pack will be applied.", _pathToUtf8(path)); cemu_assert_debug(list_patchGroups.empty()); return true; // return true since a .asm patch was found even if we could not parse it } } else { - forceLog_printfW(L"Unable to load patch file \"%s\"", path.c_str()); + cemuLog_log(LogType::Force, "Unable to load patch file \"{}\"", _pathToUtf8(path)); } foundPatches = true; } @@ -121,33 +113,22 @@ bool GraphicPack2::LoadCemuPatches() void GraphicPack2::LoadPatchFiles() { // order of loading patches: - // 1) If Cemuhook is loaded: - // 1.1) Check if patches.txt exists and if it does, stop here and do nothing (Cemuhook takes over patching) - // 1.2) Load Cemu-style patches (patch_.asm) - // 2) If Cemuhook is not loaded: - // 1.1) Load Cemu-style patches (patch_.asm), stop here if at least one patch file exists - // 1.2) Load Cemuhook patches.txt - - // update: As of 1.20.2b Cemu always takes over patching since Cemuhook patching broke due to other internal changes (memory allocation changed and some reordering on when graphic packs get loaded) + // 1) Load Cemu-style patches (patch_.asm), stop here if at least one patch file exists + // 2) Load Cemuhook patches.txt if (LoadCemuPatches()) return; // exit if at least one Cemu style patch file was found // fall back to Cemuhook patches.txt to guarantee backward compatibility - fs::path path(m_filename); + fs::path path(m_rulesPath); path.remove_filename(); path.append("patches.txt"); - - FileStream* patchFile = FileStream::openFile(path.generic_wstring().c_str()); - + FileStream* patchFile = FileStream::openFile2(path); if (patchFile == nullptr) return; - // read file std::vector fileData; patchFile->extract(fileData); delete patchFile; - cemu_assert_debug(list_patchGroups.empty()); - // parse MemStreamReader patchesStream(fileData.data(), (sint32)fileData.size()); ParseCemuhookPatchesTxtInternal(patchesStream); diff --git a/src/Cafe/GraphicPack/GraphicPack2PatchesApply.cpp b/src/Cafe/GraphicPack/GraphicPack2PatchesApply.cpp index 98a5b693..b6af542d 100644 --- a/src/Cafe/GraphicPack/GraphicPack2PatchesApply.cpp +++ b/src/Cafe/GraphicPack/GraphicPack2PatchesApply.cpp @@ -353,7 +353,7 @@ PATCH_RESOLVE_RESULT PatchEntryInstruction::resolveReloc(PatchContext_t& ctx, PP // absolute if (result >= 0x3FFFFFC) { - forceLog_printf("Target \'%s\' for branch at line %d out of range", reloc->m_expression.c_str(), m_lineNumber); + cemuLog_log(LogType::Force, "Target \'{}\' for branch at line {} out of range", reloc->m_expression, m_lineNumber); return PATCH_RESOLVE_RESULT::VALUE_ERROR; } opcode &= ~0x3FFFFFC; @@ -438,7 +438,7 @@ PATCH_RESOLVE_RESULT PatchEntryInstruction::resolve(PatchContext_t& ctx) { if (_relocateAddress(resolverState.currentGroup, &ctx, m_addr, m_relocatedAddr) == false) { - forceLog_printf("Patches: Address 0x%08x (line %d) is not within code cave or any module section", this->getAddr(), this->m_lineNumber); + cemuLog_log(LogType::Force, "Patches: Address 0x{:08x} (line {}) is not within code cave or any module section", this->getAddr(), this->m_lineNumber); cemu_assert_debug(false); return PATCH_RESOLVE_RESULT::INVALID_ADDRESS; } @@ -639,12 +639,12 @@ void GraphicPack2::ApplyPatchGroups(std::vector& groups, const RPLM if (patchGroup->codeCaveSize > 0) { auto codeCaveMem = RPLLoader_AllocateCodeCaveMem(256, patchGroup->codeCaveSize); - forceLog_printf("Applying patch group \'%s\' (Codecave: %08x-%08x)", patchGroup->name.c_str(), codeCaveMem.GetMPTR(), codeCaveMem.GetMPTR() + patchGroup->codeCaveSize); + cemuLog_log(LogType::Force, "Applying patch group \'{}\' (Codecave: {:08x}-{:08x})", patchGroup->name, codeCaveMem.GetMPTR(), codeCaveMem.GetMPTR() + patchGroup->codeCaveSize); patchGroup->codeCaveMem = codeCaveMem; } else { - forceLog_printf("Applying patch group \'%s\'", patchGroup->name.c_str()); + cemuLog_log(LogType::Force, "Applying patch group \'{}\'", patchGroup->name); patchGroup->codeCaveMem = nullptr; } } @@ -738,7 +738,7 @@ void GraphicPack2::UndoPatchGroups(std::vector& groups, const RPLMo void GraphicPack2::NotifyModuleLoaded(const RPLModule* rpl) { - cemuLog_force("Loaded module \'{}\' with checksum 0x{:08x}", rpl->moduleName2, rpl->patchCRC); + cemuLog_log(LogType::Force, "Loaded module \'{}\' with checksum 0x{:08x}", rpl->moduleName2, rpl->patchCRC); std::lock_guard lock(mtx_patches); list_modules.emplace_back(rpl); diff --git a/src/Cafe/GraphicPack/GraphicPack2PatchesParser.cpp b/src/Cafe/GraphicPack/GraphicPack2PatchesParser.cpp index 12f97ef7..05f8c696 100644 --- a/src/Cafe/GraphicPack/GraphicPack2PatchesParser.cpp +++ b/src/Cafe/GraphicPack/GraphicPack2PatchesParser.cpp @@ -25,7 +25,7 @@ sint32 GraphicPack2::GetLengthWithoutComment(const char* str, size_t length) void GraphicPack2::LogPatchesSyntaxError(sint32 lineNumber, std::string_view errorMsg) { - cemuLog_log(LogType::Force, fmt::format(L"Syntax error while parsing patch for graphic pack '{}':", this->GetFilename())); + cemuLog_log(LogType::Force, "Syntax error while parsing patch for graphic pack '{}':", _pathToUtf8(this->GetRulesPath())); if(lineNumber >= 0) cemuLog_log(LogType::Force, fmt::format("Line {0}: {1}", lineNumber, errorMsg)); else @@ -68,7 +68,7 @@ void GraphicPack2::AddPatchGroup(PatchGroup* group) if (group->list_patches.size() < (numEstimatedCodeCaveInstr / 8)) { // if less than 1/8th of the code cave is filled print a warning - forceLog_printf("Graphic pack patches: Code cave for group [%s] in gfx pack \"%s\" ranges from 0 to 0x%x but has only few instructions. Is this intentional?", group->name.c_str(), this->m_name.c_str(), codeCaveMaxAddr); + cemuLog_log(LogType::Force, "Graphic pack patches: Code cave for group [{}] in gfx pack \"{}\" ranges from 0 to 0x{:x} but has only few instructions. Is this intentional?", group->name, this->m_name, codeCaveMaxAddr); } group->codeCaveSize = codeCaveMaxAddr; list_patchGroups.emplace_back(group); diff --git a/src/Cafe/HW/ACR/ACR.cpp b/src/Cafe/HW/ACR/ACR.cpp index b8a48a8a..362aef5a 100644 --- a/src/Cafe/HW/ACR/ACR.cpp +++ b/src/Cafe/HW/ACR/ACR.cpp @@ -25,13 +25,13 @@ namespace HW_ACR /* 0x0D00021C | Accesses VI register currently selected by VIADDR */ HWREG::ACR_VI_DATA ACR_VIDATA_R32(PAddr addr) { - forceLogDebug_printf("ACR_VIDATA read with selected reg %08x", g_acr.viAddr.get_ADDR()); + cemuLog_logDebug(LogType::Force, "ACR_VIDATA read with selected reg {:08x}", g_acr.viAddr.get_ADDR()); return HWREG::ACR_VI_DATA(); } void ACR_VIDATA_W32(PAddr addr, HWREG::ACR_VI_DATA newValue) { - forceLogDebug_printf("ACR_VIDATA write %08x with selected reg %08x", newValue.get_DATA(), g_acr.viAddr.get_ADDR()); + cemuLog_logDebug(LogType::Force, "ACR_VIDATA write {:08x} with selected reg {:08x}", newValue.get_DATA(), g_acr.viAddr.get_ADDR()); } /* 0x0D000224 | Controls the selected VI register? */ diff --git a/src/Cafe/HW/Espresso/Const.h b/src/Cafe/HW/Espresso/Const.h index 88a1a9de..06959bb0 100644 --- a/src/Cafe/HW/Espresso/Const.h +++ b/src/Cafe/HW/Espresso/Const.h @@ -8,4 +8,5 @@ namespace Espresso constexpr inline uint64 BUS_CLOCK = 248625000; constexpr inline uint64 TIMER_CLOCK = BUS_CLOCK / 4; + constexpr inline uint32 MEM_PAGE_SIZE = 0x20000; }; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Debugger/DebugSymbolStorage.h b/src/Cafe/HW/Espresso/Debugger/DebugSymbolStorage.h index 0d7b7d49..0a46951d 100644 --- a/src/Cafe/HW/Espresso/Debugger/DebugSymbolStorage.h +++ b/src/Cafe/HW/Espresso/Debugger/DebugSymbolStorage.h @@ -24,40 +24,45 @@ class DebugSymbolStorage public: static void StoreDataType(MPTR address, DEBUG_SYMBOL_TYPE type) { - s_lock.acquire(); + s_lock.lock(); s_typeStorage[address] = type; - s_lock.release(); + s_lock.unlock(); } static DEBUG_SYMBOL_TYPE GetDataType(MPTR address) { - s_lock.acquire(); + s_lock.lock(); auto itr = s_typeStorage.find(address); if (itr == s_typeStorage.end()) { - s_lock.release(); + s_lock.unlock(); return DEBUG_SYMBOL_TYPE::UNDEFINED; } DEBUG_SYMBOL_TYPE t = itr->second; - s_lock.release(); + s_lock.unlock(); return t; } static void ClearRange(MPTR address, uint32 length) { - s_lock.acquire(); - while (length > 0) + if (length == 0) + return; + s_lock.lock(); + for (;;) { auto itr = s_typeStorage.find(address); if (itr != s_typeStorage.end()) s_typeStorage.erase(itr); + + if (length <= 4) + break; address += 4; length -= 4; } - s_lock.release(); + s_lock.unlock(); } private: static FSpinlock s_lock; static std::unordered_map s_typeStorage; -}; \ No newline at end of file +}; diff --git a/src/Cafe/HW/Espresso/Debugger/Debugger.cpp b/src/Cafe/HW/Espresso/Debugger/Debugger.cpp index fc015285..e84c9fda 100644 --- a/src/Cafe/HW/Espresso/Debugger/Debugger.cpp +++ b/src/Cafe/HW/Espresso/Debugger/Debugger.cpp @@ -1,5 +1,6 @@ #include "gui/guiWrapper.h" #include "Debugger.h" +#include "Cafe/OS/RPL/rpl_structs.h" #include "Cemu/PPCAssembler/ppcAssembler.h" #include "Cafe/HW/Espresso/Recompiler/PPCRecompiler.h" #include "Cemu/ExpressionParser/ExpressionParser.h" @@ -7,6 +8,7 @@ #include "gui/debugger/DebuggerWindow2.h" #include "Cafe/OS/libs/coreinit/coreinit.h" +#include "util/helpers/helpers.h" #if BOOST_OS_WINDOWS #include @@ -74,7 +76,7 @@ uint32 debugger_getAddressOriginalOpcode(uint32 address) auto bpItr = debugger_getFirstBP(address); while (bpItr) { - if (bpItr->bpType == DEBUGGER_BP_T_NORMAL || bpItr->bpType == DEBUGGER_BP_T_ONE_SHOT) + if (bpItr->isExecuteBP()) return bpItr->originalOpcodeValue; bpItr = bpItr->next; } @@ -103,7 +105,7 @@ void debugger_updateExecutionBreakpoint(uint32 address, bool forceRestore) if (bpItr->enabled && forceRestore == false) { // write TW instruction to memory - debugger_updateMemoryU32(address, (31 << 26) | (4 << 1)); + debugger_updateMemoryU32(address, DEBUGGER_BP_T_DEBUGGER_TW); return; } else @@ -121,30 +123,16 @@ void debugger_updateExecutionBreakpoint(uint32 address, bool forceRestore) } } -void debugger_createExecuteBreakpoint(uint32 address) +void debugger_createCodeBreakpoint(uint32 address, uint8 bpType) { // check if breakpoint already exists auto existingBP = debugger_getFirstBP(address); - if (existingBP && debuggerBPChain_hasType(existingBP, DEBUGGER_BP_T_NORMAL)) + if (existingBP && debuggerBPChain_hasType(existingBP, bpType)) return; // breakpoint already exists // get original opcode at address uint32 originalOpcode = debugger_getAddressOriginalOpcode(address); // init breakpoint object - DebuggerBreakpoint* bp = new DebuggerBreakpoint(address, originalOpcode, DEBUGGER_BP_T_NORMAL, true); - debuggerBPChain_add(address, bp); - debugger_updateExecutionBreakpoint(address); -} - -void debugger_createSingleShotExecuteBreakpoint(uint32 address) -{ - // check if breakpoint already exists - auto existingBP = debugger_getFirstBP(address); - if (existingBP && debuggerBPChain_hasType(existingBP, DEBUGGER_BP_T_ONE_SHOT)) - return; // breakpoint already exists - // get original opcode at address - uint32 originalOpcode = debugger_getAddressOriginalOpcode(address); - // init breakpoint object - DebuggerBreakpoint* bp = new DebuggerBreakpoint(address, originalOpcode, DEBUGGER_BP_T_ONE_SHOT, true); + DebuggerBreakpoint* bp = new DebuggerBreakpoint(address, originalOpcode, bpType, true); debuggerBPChain_add(address, bp); debugger_updateExecutionBreakpoint(address); } @@ -171,14 +159,25 @@ void debugger_updateMemoryBreakpoint(DebuggerBreakpoint* bp) { ctx.Dr0 = (DWORD64)memory_getPointerFromVirtualOffset(bp->address); ctx.Dr1 = (DWORD64)memory_getPointerFromVirtualOffset(bp->address); - ctx.Dr7 = 1 | (1 << 16) | (3 << 18); // enable dr0, track write, 4 byte length - ctx.Dr7 |= (4 | (3 << 20) | (3 << 22)); // enable dr1, track read+write, 4 byte length + // breakpoint 0 + SetBits(ctx.Dr7, 0, 1, 1); // breakpoint #0 enabled: true + SetBits(ctx.Dr7, 16, 2, 1); // breakpoint #0 condition: 1 (write) + SetBits(ctx.Dr7, 18, 2, 3); // breakpoint #0 length: 3 (4 bytes) + // breakpoint 1 + SetBits(ctx.Dr7, 2, 1, 1); // breakpoint #1 enabled: true + SetBits(ctx.Dr7, 20, 2, 3); // breakpoint #1 condition: 3 (read & write) + SetBits(ctx.Dr7, 22, 2, 3); // breakpoint #1 length: 3 (4 bytes) } else { - ctx.Dr0 = (DWORD64)0; - ctx.Dr1 = (DWORD64)0; - ctx.Dr7 = 0; // disable dr0 + // breakpoint 0 + SetBits(ctx.Dr7, 0, 1, 0); // breakpoint #0 enabled: false + SetBits(ctx.Dr7, 16, 2, 0); // breakpoint #0 condition: 1 (write) + SetBits(ctx.Dr7, 18, 2, 0); // breakpoint #0 length: 3 (4 bytes) + // breakpoint 1 + SetBits(ctx.Dr7, 2, 1, 0); // breakpoint #1 enabled: false + SetBits(ctx.Dr7, 20, 2, 0); // breakpoint #1 condition: 3 (read & write) + SetBits(ctx.Dr7, 22, 2, 0); // breakpoint #1 length: 3 (4 bytes) } SetThreadContext(hThread, &ctx); ResumeThread(hThread); @@ -188,10 +187,10 @@ void debugger_updateMemoryBreakpoint(DebuggerBreakpoint* bp) #endif } -void debugger_handleSingleStepException(uint32 drMask) +void debugger_handleSingleStepException(uint64 dr6) { - bool triggeredDR0 = (drMask & (1 << 0)) != 0; // write - bool triggeredDR1 = (drMask & (1 << 1)) != 0; // read + bool triggeredDR0 = GetBits(dr6, 0, 1); // write + bool triggeredDR1 = GetBits(dr6, 1, 1); // read and write bool catchBP = false; if (triggeredDR0 && triggeredDR1) { @@ -207,7 +206,8 @@ void debugger_handleSingleStepException(uint32 drMask) } if (catchBP) { - debugger_createSingleShotExecuteBreakpoint(ppcInterpreterCurrentInstance->instructionPointer + 4); + PPCInterpreter_t* hCPU = PPCInterpreter_getCurrentInstance(); + debugger_createCodeBreakpoint(hCPU->instructionPointer + 4, DEBUGGER_BP_T_ONE_SHOT); } } @@ -239,7 +239,7 @@ void debugger_handleEntryBreakpoint(uint32 address) if (!debuggerState.breakOnEntry) return; - debugger_createExecuteBreakpoint(address); + debugger_createCodeBreakpoint(address, DEBUGGER_BP_T_NORMAL); } void debugger_deleteBreakpoint(DebuggerBreakpoint* bp) @@ -287,10 +287,27 @@ void debugger_toggleExecuteBreakpoint(uint32 address) { // delete existing breakpoint debugger_deleteBreakpoint(existingBP); - return; } - // create new - debugger_createExecuteBreakpoint(address); + else + { + // create new execution breakpoint + debugger_createCodeBreakpoint(address, DEBUGGER_BP_T_NORMAL); + } +} + +void debugger_toggleLoggingBreakpoint(uint32 address) +{ + auto existingBP = debugger_getFirstBP(address, DEBUGGER_BP_T_LOGGING); + if (existingBP) + { + // delete existing breakpoint + debugger_deleteBreakpoint(existingBP); + } + else + { + // create new logging breakpoint + debugger_createCodeBreakpoint(address, DEBUGGER_BP_T_LOGGING); + } } void debugger_forceBreak() @@ -316,7 +333,7 @@ void debugger_toggleBreakpoint(uint32 address, bool state, DebuggerBreakpoint* b { if (bpItr == bp) { - if (bpItr->bpType == DEBUGGER_BP_T_NORMAL) + if (bpItr->bpType == DEBUGGER_BP_T_NORMAL || bpItr->bpType == DEBUGGER_BP_T_LOGGING) { bp->enabled = state; debugger_updateExecutionBreakpoint(address); @@ -441,6 +458,34 @@ bool debugger_hasPatch(uint32 address) return false; } +void debugger_removePatch(uint32 address) +{ + for (sint32 i = 0; i < debuggerState.patches.size(); i++) + { + auto& patch = debuggerState.patches[i]; + if (address < patch->address || address >= (patch->address + patch->length)) + continue; + MPTR startAddress = patch->address; + MPTR endAddress = patch->address + patch->length; + // remove any breakpoints overlapping with the patch + for (auto& bp : debuggerState.breakpoints) + { + if (bp->address + 4 > startAddress && bp->address < endAddress) + { + bp->enabled = false; + debugger_updateExecutionBreakpoint(bp->address); + } + } + // restore original data + memcpy(MEMPTR(startAddress).GetPtr(), patch->origData.data(), patch->length); + PPCRecompiler_invalidateRange(startAddress, endAddress); + // remove patch + delete patch; + debuggerState.patches.erase(debuggerState.patches.begin() + i); + return; + } +} + void debugger_stepInto(PPCInterpreter_t* hCPU, bool updateDebuggerWindow = true) { bool isRecEnabled = ppcRecompilerEnabled; @@ -475,7 +520,7 @@ bool debugger_stepOver(PPCInterpreter_t* hCPU) return false; } // create one-shot breakpoint at next instruction - debugger_createSingleShotExecuteBreakpoint(initialIP +4); + debugger_createCodeBreakpoint(initialIP + 4, DEBUGGER_BP_T_ONE_SHOT); // step over current instruction (to avoid breakpoint) debugger_stepInto(hCPU); debuggerWindow_moveIP(); @@ -497,8 +542,78 @@ void debugger_createPPCStateSnapshot(PPCInterpreter_t* hCPU) void debugger_enterTW(PPCInterpreter_t* hCPU) { + // handle logging points + DebuggerBreakpoint* bp = debugger_getFirstBP(hCPU->instructionPointer); + bool shouldBreak = debuggerBPChain_hasType(bp, DEBUGGER_BP_T_NORMAL) || debuggerBPChain_hasType(bp, DEBUGGER_BP_T_ONE_SHOT); + while (bp) + { + if (bp->bpType == DEBUGGER_BP_T_LOGGING && bp->enabled) + { + std::string comment = !bp->comment.empty() ? boost::nowide::narrow(bp->comment) : fmt::format("Breakpoint at 0x{:08X} (no comment)", bp->address); + + auto replacePlaceholders = [&](const std::string& prefix, const auto& formatFunc) + { + size_t pos = 0; + while ((pos = comment.find(prefix, pos)) != std::string::npos) + { + size_t endPos = comment.find('}', pos); + if (endPos == std::string::npos) + break; + + try + { + if (int regNum = ConvertString(comment.substr(pos + prefix.length(), endPos - pos - prefix.length())); regNum >= 0 && regNum < 32) + { + std::string replacement = formatFunc(regNum); + comment.replace(pos, endPos - pos + 1, replacement); + pos += replacement.length(); + } + else + { + pos = endPos + 1; + } + } + catch (...) + { + pos = endPos + 1; + } + } + }; + + // Replace integer register placeholders {rX} + replacePlaceholders("{r", [&](int regNum) { + return fmt::format("0x{:08X}", hCPU->gpr[regNum]); + }); + + // Replace floating point register placeholders {fX} + replacePlaceholders("{f", [&](int regNum) { + return fmt::format("{}", hCPU->fpr[regNum].fpr); + }); + + std::string logName = "Breakpoint '" + comment + "'"; + std::string logContext = fmt::format("Thread: {:08x} LR: 0x{:08x}", MEMPTR(coreinit::OSGetCurrentThread()).GetMPTR(), hCPU->spr.LR, cemuLog_advancedPPCLoggingEnabled() ? " Stack Trace:" : ""); + cemuLog_log(LogType::Force, "[Debugger] {} was executed! {}", logName, logContext); + if (cemuLog_advancedPPCLoggingEnabled()) + DebugLogStackTrace(coreinit::OSGetCurrentThread(), hCPU->gpr[1]); + break; + } + bp = bp->next; + } + + // return early if it's only a non-pausing logging breakpoint to prevent a modified debugger state and GUI updates + if (!shouldBreak) + { + uint32 backupIP = debuggerState.debugSession.instructionPointer; + debuggerState.debugSession.instructionPointer = hCPU->instructionPointer; + debugger_stepInto(hCPU, false); + PPCInterpreterSlim_executeInstruction(hCPU); + debuggerState.debugSession.instructionPointer = backupIP; + return; + } + + // handle breakpoints debuggerState.debugSession.isTrapped = true; - debuggerState.debugSession.debuggedThreadMPTR = coreinitThread_getCurrentThreadMPTRDepr(hCPU); + debuggerState.debugSession.debuggedThreadMPTR = MEMPTR(coreinit::OSGetCurrentThread()).GetMPTR(); debuggerState.debugSession.instructionPointer = hCPU->instructionPointer; debuggerState.debugSession.hCPU = hCPU; debugger_createPPCStateSnapshot(hCPU); @@ -512,7 +627,7 @@ void debugger_enterTW(PPCInterpreter_t* hCPU) debuggerState.debugSession.stepInto = false; debuggerState.debugSession.stepOver = false; debuggerState.debugSession.run = false; - while (true) + while (debuggerState.debugSession.isTrapped) { std::this_thread::sleep_for(std::chrono::milliseconds(1)); // check for step commands @@ -568,6 +683,20 @@ void debugger_shouldBreak(PPCInterpreter_t* hCPU) void debugger_addParserSymbols(class ExpressionParser& ep) { + const auto module_count = RPLLoader_GetModuleCount(); + const auto module_list = RPLLoader_GetModuleList(); + + std::vector module_tmp(module_count); + for (int i = 0; i < module_count; i++) + { + const auto module = module_list[i]; + if (module) + { + module_tmp[i] = (double)module->regionMappingBase_text.GetMPTR(); + ep.AddConstant(module->moduleName2, module_tmp[i]); + } + } + for (sint32 i = 0; i < 32; i++) ep.AddConstant(fmt::format("r{}", i), debuggerState.debugSession.ppcSnapshot.gpr[i]); } \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Debugger/Debugger.h b/src/Cafe/HW/Espresso/Debugger/Debugger.h index 3bb90bea..c220eb8a 100644 --- a/src/Cafe/HW/Espresso/Debugger/Debugger.h +++ b/src/Cafe/HW/Espresso/Debugger/Debugger.h @@ -3,19 +3,18 @@ #include #include "Cafe/HW/Espresso/PPCState.h" -//#define DEBUGGER_BP_TYPE_NORMAL (1<<0) // normal breakpoint -//#define DEBUGGER_BP_TYPE_ONE_SHOT (1<<1) // normal breakpoint -//#define DEBUGGER_BP_TYPE_MEMORY_READ (1<<2) // memory breakpoint -//#define DEBUGGER_BP_TYPE_MEMORY_WRITE (1<<3) // memory breakpoint - #define DEBUGGER_BP_T_NORMAL 0 // normal breakpoint #define DEBUGGER_BP_T_ONE_SHOT 1 // normal breakpoint, deletes itself after trigger (used for stepping) #define DEBUGGER_BP_T_MEMORY_READ 2 // memory breakpoint #define DEBUGGER_BP_T_MEMORY_WRITE 3 // memory breakpoint +#define DEBUGGER_BP_T_LOGGING 4 // logging breakpoint, prints the breakpoint comment and stack trace whenever hit #define DEBUGGER_BP_T_GDBSTUB 1 // breakpoint created by GDBStub #define DEBUGGER_BP_T_DEBUGGER 2 // breakpoint created by Cemu's debugger +#define DEBUGGER_BP_T_GDBSTUB_TW 0x7C010008 +#define DEBUGGER_BP_T_DEBUGGER_TW 0x7C020008 + struct DebuggerBreakpoint { @@ -44,7 +43,7 @@ struct DebuggerBreakpoint bool isExecuteBP() const { - return bpType == DEBUGGER_BP_T_NORMAL || bpType == DEBUGGER_BP_T_ONE_SHOT; + return bpType == DEBUGGER_BP_T_NORMAL || bpType == DEBUGGER_BP_T_LOGGING || bpType == DEBUGGER_BP_T_ONE_SHOT; } bool isMemBP() const @@ -100,8 +99,9 @@ extern debuggerState_t debuggerState; // new API DebuggerBreakpoint* debugger_getFirstBP(uint32 address); +void debugger_createCodeBreakpoint(uint32 address, uint8 bpType); void debugger_toggleExecuteBreakpoint(uint32 address); // create/remove execute breakpoint -void debugger_createExecuteBreakpoint(uint32 address); +void debugger_toggleLoggingBreakpoint(uint32 address); // create/remove logging breakpoint void debugger_toggleBreakpoint(uint32 address, bool state, DebuggerBreakpoint* bp); void debugger_createMemoryBreakpoint(uint32 address, bool onRead, bool onWrite); @@ -114,6 +114,7 @@ void debugger_updateExecutionBreakpoint(uint32 address, bool forceRestore = fals void debugger_createPatch(uint32 address, std::span patchData); bool debugger_hasPatch(uint32 address); +void debugger_removePatch(uint32 address); void debugger_forceBreak(); // force breakpoint at the next possible instruction bool debugger_isTrapped(); diff --git a/src/Cafe/HW/Espresso/Debugger/GDBBreakpoints.cpp b/src/Cafe/HW/Espresso/Debugger/GDBBreakpoints.cpp new file mode 100644 index 00000000..675050d3 --- /dev/null +++ b/src/Cafe/HW/Espresso/Debugger/GDBBreakpoints.cpp @@ -0,0 +1,304 @@ +#include "GDBBreakpoints.h" +#include "Debugger.h" +#include "Cafe/HW/Espresso/Recompiler/PPCRecompiler.h" + +#if defined(ARCH_X86_64) && BOOST_OS_LINUX +#include +#include +#include + +DRType _GetDR(pid_t tid, int drIndex) +{ + size_t drOffset = offsetof(struct user, u_debugreg) + drIndex * sizeof(user::u_debugreg[0]); + + long v; + v = ptrace(PTRACE_PEEKUSER, tid, drOffset, nullptr); + if (v == -1) + perror("ptrace(PTRACE_PEEKUSER)"); + + return (DRType)v; +} + +void _SetDR(pid_t tid, int drIndex, DRType newValue) +{ + size_t drOffset = offsetof(struct user, u_debugreg) + drIndex * sizeof(user::u_debugreg[0]); + + long rc = ptrace(PTRACE_POKEUSER, tid, drOffset, newValue); + if (rc == -1) + perror("ptrace(PTRACE_POKEUSER)"); +} + +DRType _ReadDR6() +{ + pid_t tid = gettid(); + + // linux doesn't let us attach to the current thread / threads in the current thread group + // we have to create a child process which then modifies the debug registers and quits + pid_t child = fork(); + if (child == -1) + { + perror("fork"); + return 0; + } + + if (child == 0) + { + if (ptrace(PTRACE_ATTACH, tid, nullptr, nullptr)) + { + perror("attach"); + _exit(0); + } + + waitpid(tid, NULL, 0); + + uint64_t dr6 = _GetDR(tid, 6); + + if (ptrace(PTRACE_DETACH, tid, nullptr, nullptr)) + perror("detach"); + + // since the status code only uses the lower 8 bits, we have to discard the rest of DR6 + // this should be fine though, since the lower 4 bits of DR6 contain all the bp conditions + _exit(dr6 & 0xff); + } + + // wait for child process + int wstatus; + waitpid(child, &wstatus, 0); + + return (DRType)WEXITSTATUS(wstatus); +} +#endif + +GDBServer::ExecutionBreakpoint::ExecutionBreakpoint(MPTR address, BreakpointType type, bool visible, std::string reason) + : m_address(address), m_removedAfterInterrupt(false), m_reason(std::move(reason)) +{ + if (type == BreakpointType::BP_SINGLE) + { + this->m_pauseThreads = true; + this->m_restoreAfterInterrupt = false; + this->m_deleteAfterAnyInterrupt = false; + this->m_pauseOnNextInterrupt = false; + this->m_visible = visible; + } + else if (type == BreakpointType::BP_PERSISTENT) + { + this->m_pauseThreads = true; + this->m_restoreAfterInterrupt = true; + this->m_deleteAfterAnyInterrupt = false; + this->m_pauseOnNextInterrupt = false; + this->m_visible = visible; + } + else if (type == BreakpointType::BP_RESTORE_POINT) + { + this->m_pauseThreads = false; + this->m_restoreAfterInterrupt = false; + this->m_deleteAfterAnyInterrupt = false; + this->m_pauseOnNextInterrupt = false; + this->m_visible = false; + } + else if (type == BreakpointType::BP_STEP_POINT) + { + this->m_pauseThreads = false; + this->m_restoreAfterInterrupt = false; + this->m_deleteAfterAnyInterrupt = true; + this->m_pauseOnNextInterrupt = true; + this->m_visible = false; + } + + this->m_origOpCode = memory_readU32(address); + memory_writeU32(address, DEBUGGER_BP_T_GDBSTUB_TW); + PPCRecompiler_invalidateRange(address, address + 4); +} + +GDBServer::ExecutionBreakpoint::~ExecutionBreakpoint() +{ + memory_writeU32(this->m_address, this->m_origOpCode); + PPCRecompiler_invalidateRange(this->m_address, this->m_address + 4); +} + +uint32 GDBServer::ExecutionBreakpoint::GetVisibleOpCode() const +{ + if (this->m_visible) + return memory_readU32(this->m_address); + else + return this->m_origOpCode; +} + +void GDBServer::ExecutionBreakpoint::RemoveTemporarily() +{ + memory_writeU32(this->m_address, this->m_origOpCode); + PPCRecompiler_invalidateRange(this->m_address, this->m_address + 4); + this->m_restoreAfterInterrupt = true; +} + +void GDBServer::ExecutionBreakpoint::Restore() +{ + memory_writeU32(this->m_address, DEBUGGER_BP_T_GDBSTUB_TW); + PPCRecompiler_invalidateRange(this->m_address, this->m_address + 4); + this->m_restoreAfterInterrupt = false; +} + +namespace coreinit +{ +#if BOOST_OS_LINUX + std::vector& OSGetSchedulerThreadIds(); +#endif + + std::vector& OSGetSchedulerThreads(); +} + +GDBServer::AccessBreakpoint::AccessBreakpoint(MPTR address, AccessPointType type) + : m_address(address), m_type(type) +{ +#if defined(ARCH_X86_64) && BOOST_OS_WINDOWS + for (auto& hThreadNH : coreinit::OSGetSchedulerThreads()) + { + HANDLE hThread = (HANDLE)hThreadNH; + CONTEXT ctx{}; + ctx.ContextFlags = CONTEXT_DEBUG_REGISTERS; + SuspendThread(hThread); + GetThreadContext(hThread, &ctx); + + // use BP 2/3 for gdb stub since cemu's internal debugger uses BP 0/1 already + ctx.Dr2 = (DWORD64)memory_getPointerFromVirtualOffset(address); + ctx.Dr3 = (DWORD64)memory_getPointerFromVirtualOffset(address); + // breakpoint 2 + SetBits(ctx.Dr7, 4, 1, 1); // breakpoint #3 enabled: true + SetBits(ctx.Dr7, 24, 2, 1); // breakpoint #3 condition: 1 (write) + SetBits(ctx.Dr7, 26, 2, 3); // breakpoint #3 length: 3 (4 bytes) + // breakpoint 3 + SetBits(ctx.Dr7, 6, 1, 1); // breakpoint #4 enabled: true + SetBits(ctx.Dr7, 28, 2, 3); // breakpoint #4 condition: 3 (read & write) + SetBits(ctx.Dr7, 30, 2, 3); // breakpoint #4 length: 3 (4 bytes) + + SetThreadContext(hThread, &ctx); + ResumeThread(hThread); + } +#elif defined(ARCH_X86_64) && BOOST_OS_LINUX + // linux doesn't let us attach to threads which are in the same thread group as our current thread + // we have to create a child process which then modifies the debug registers and quits + pid_t child = fork(); + if (child == -1) + { + perror("fork"); + return; + } + + if (child == 0) + { + for (pid_t tid : coreinit::OSGetSchedulerThreadIds()) + { + long rc = ptrace(PTRACE_ATTACH, tid, nullptr, nullptr); + if (rc == -1) + perror("ptrace(PTRACE_ATTACH)"); + + waitpid(tid, nullptr, 0); + + DRType dr7 = _GetDR(tid, 7); + // use BP 2/3 for gdb stub since cemu's internal debugger uses BP 0/1 already + DRType dr2 = (uint64)memory_getPointerFromVirtualOffset(address); + DRType dr3 = (uint64)memory_getPointerFromVirtualOffset(address); + // breakpoint 2 + SetBits(dr7, 4, 1, 1); // breakpoint #3 enabled: true + SetBits(dr7, 24, 2, 1); // breakpoint #3 condition: 1 (write) + SetBits(dr7, 26, 2, 3); // breakpoint #3 length: 3 (4 bytes) + // breakpoint 3 + SetBits(dr7, 6, 1, 1); // breakpoint #4 enabled: true + SetBits(dr7, 28, 2, 3); // breakpoint #4 condition: 3 (read & write) + SetBits(dr7, 30, 2, 3); // breakpoint #4 length: 3 (4 bytes) + + _SetDR(tid, 2, dr2); + _SetDR(tid, 3, dr3); + _SetDR(tid, 7, dr7); + + rc = ptrace(PTRACE_DETACH, tid, nullptr, nullptr); + if (rc == -1) + perror("ptrace(PTRACE_DETACH)"); + } + + // exit child process + _exit(0); + } + + // wait for child process + waitpid(child, nullptr, 0); +#else + cemuLog_log(LogType::Force, "Debugger read/write breakpoints are not supported on non-x86 CPUs yet."); +#endif +} + +GDBServer::AccessBreakpoint::~AccessBreakpoint() +{ +#if defined(ARCH_X86_64) && BOOST_OS_WINDOWS + for (auto& hThreadNH : coreinit::OSGetSchedulerThreads()) + { + HANDLE hThread = (HANDLE)hThreadNH; + CONTEXT ctx{}; + ctx.ContextFlags = CONTEXT_DEBUG_REGISTERS; + SuspendThread(hThread); + GetThreadContext(hThread, &ctx); + + // reset BP 2/3 to zero + ctx.Dr2 = (DWORD64)0; + ctx.Dr3 = (DWORD64)0; + // breakpoint 2 + SetBits(ctx.Dr7, 4, 1, 0); + SetBits(ctx.Dr7, 24, 2, 0); + SetBits(ctx.Dr7, 26, 2, 0); + // breakpoint 3 + SetBits(ctx.Dr7, 6, 1, 0); + SetBits(ctx.Dr7, 28, 2, 0); + SetBits(ctx.Dr7, 30, 2, 0); + SetThreadContext(hThread, &ctx); + ResumeThread(hThread); + } +#elif defined(ARCH_X86_64) && BOOST_OS_LINUX + // linux doesn't let us attach to threads which are in the same thread group as our current thread + // we have to create a child process which then modifies the debug registers and quits + pid_t child = fork(); + if (child == -1) + { + perror("fork"); + return; + } + + if (child == 0) + { + for (pid_t tid : coreinit::OSGetSchedulerThreadIds()) + { + long rc = ptrace(PTRACE_ATTACH, tid, nullptr, nullptr); + if (rc == -1) + perror("ptrace(PTRACE_ATTACH)"); + + waitpid(tid, nullptr, 0); + + DRType dr7 = _GetDR(tid, 7); + // reset BP 2/3 to zero + DRType dr2 = 0; + DRType dr3 = 0; + // breakpoint 2 + SetBits(dr7, 4, 1, 0); + SetBits(dr7, 24, 2, 0); + SetBits(dr7, 26, 2, 0); + // breakpoint 3 + SetBits(dr7, 6, 1, 0); + SetBits(dr7, 28, 2, 0); + SetBits(dr7, 30, 2, 0); + + _SetDR(tid, 2, dr2); + _SetDR(tid, 3, dr3); + _SetDR(tid, 7, dr7); + + rc = ptrace(PTRACE_DETACH, tid, nullptr, nullptr); + if (rc == -1) + perror("ptrace(PTRACE_DETACH)"); + } + + // exit child process + _exit(0); + } + + // wait for child process + waitpid(child, nullptr, 0); +#endif +} diff --git a/src/Cafe/HW/Espresso/Debugger/GDBBreakpoints.h b/src/Cafe/HW/Espresso/Debugger/GDBBreakpoints.h new file mode 100644 index 00000000..f94365c2 --- /dev/null +++ b/src/Cafe/HW/Espresso/Debugger/GDBBreakpoints.h @@ -0,0 +1,106 @@ +#pragma once +#include "GDBStub.h" +#include + +#if defined(ARCH_X86_64) && BOOST_OS_LINUX +#include + +// helpers for accessing debug register +typedef unsigned long DRType; + +DRType _GetDR(pid_t tid, int drIndex); +void _SetDR(pid_t tid, int drIndex, DRType newValue); +DRType _ReadDR6(); +#endif + +enum class BreakpointType +{ + BP_SINGLE, + BP_PERSISTENT, + BP_RESTORE_POINT, + BP_STEP_POINT +}; + +class GDBServer::ExecutionBreakpoint { +public: + ExecutionBreakpoint(MPTR address, BreakpointType type, bool visible, std::string reason); + ~ExecutionBreakpoint(); + + [[nodiscard]] uint32 GetVisibleOpCode() const; + [[nodiscard]] bool ShouldBreakThreads() const + { + return this->m_pauseThreads; + }; + [[nodiscard]] bool ShouldBreakThreadsOnNextInterrupt() + { + bool shouldPause = this->m_pauseOnNextInterrupt; + this->m_pauseOnNextInterrupt = false; + return shouldPause; + }; + [[nodiscard]] bool IsPersistent() const + { + return this->m_restoreAfterInterrupt; + }; + [[nodiscard]] bool IsSkipBreakpoint() const + { + return this->m_deleteAfterAnyInterrupt; + }; + [[nodiscard]] bool IsRemoved() const + { + return this->m_removedAfterInterrupt; + }; + [[nodiscard]] std::string GetReason() const + { + return m_reason; + }; + + void RemoveTemporarily(); + void Restore(); + void PauseOnNextInterrupt() + { + this->m_pauseOnNextInterrupt = true; + }; + + void WriteNewOpCode(uint32 newOpCode) + { + this->m_origOpCode = newOpCode; + }; + +private: + const MPTR m_address; + std::string m_reason; + uint32 m_origOpCode; + bool m_visible; + bool m_pauseThreads; + // type + bool m_pauseOnNextInterrupt; + bool m_restoreAfterInterrupt; + bool m_deleteAfterAnyInterrupt; + bool m_removedAfterInterrupt; +}; + +enum class AccessPointType +{ + BP_WRITE = 2, + BP_READ = 3, + BP_BOTH = 4 +}; + +class GDBServer::AccessBreakpoint { +public: + AccessBreakpoint(MPTR address, AccessPointType type); + ~AccessBreakpoint(); + + MPTR GetAddress() const + { + return m_address; + }; + AccessPointType GetType() const + { + return m_type; + }; + +private: + const MPTR m_address; + const AccessPointType m_type; +}; diff --git a/src/Cafe/HW/Espresso/Debugger/GDBStub.cpp b/src/Cafe/HW/Espresso/Debugger/GDBStub.cpp new file mode 100644 index 00000000..e54fae1b --- /dev/null +++ b/src/Cafe/HW/Espresso/Debugger/GDBStub.cpp @@ -0,0 +1,988 @@ +#include "GDBStub.h" +#include "Debugger.h" +#include "Cafe/HW/Espresso/Recompiler/PPCRecompiler.h" +#include "GDBBreakpoints.h" +#include "util/helpers/helpers.h" +#include "util/ThreadPool/ThreadPool.h" +#include "Cafe/OS/RPL/rpl.h" +#include "Cafe/OS/RPL/rpl_structs.h" +#include "Cafe/OS/libs/coreinit/coreinit_Scheduler.h" +#include "Cafe/OS/libs/coreinit/coreinit_Thread.h" +#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h" +#include "Cafe/HW/Espresso/EspressoISA.h" +#include "Common/socket.h" + +#define GET_THREAD_ID(threadPtr) memory_getVirtualOffsetFromPointer(threadPtr) +#define GET_THREAD_BY_ID(threadId) (OSThread_t*)memory_getPointerFromPhysicalOffset(threadId) + +static std::vector findNextInstruction(MPTR currAddress, uint32 lr, uint32 ctr) +{ + using namespace Espresso; + + uint32 nextInstr = memory_readU32(currAddress); + if (GetPrimaryOpcode(nextInstr) == PrimaryOpcode::B) + { + uint32 LI; + bool AA, LK; + decodeOp_B(nextInstr, LI, AA, LK); + if (!AA) + LI += currAddress; + return {LI}; + } + if (GetPrimaryOpcode(nextInstr) == PrimaryOpcode::BC) + { + uint32 BD, BI; + BOField BO{}; + bool AA, LK; + decodeOp_BC(nextInstr, BD, BO, BI, AA, LK); + if (!LK) + BD += currAddress; + return {currAddress + 4, BD}; + } + if (GetPrimaryOpcode(nextInstr) == PrimaryOpcode::GROUP_19 && GetGroup19Opcode(nextInstr) == Opcode19::BCLR) + { + return {currAddress + 4, lr}; + } + if (GetPrimaryOpcode(nextInstr) == PrimaryOpcode::GROUP_19 && GetGroup19Opcode(nextInstr) == Opcode19::BCCTR) + { + return {currAddress + 4, ctr}; + } + return {currAddress + 4}; +} + +template +static void selectThread(sint64 selectorId, F&& action_for_thread) +{ + __OSLockScheduler(); + cemu_assert_debug(activeThreadCount != 0); + + if (selectorId == -1) + { + for (sint32 i = 0; i < activeThreadCount; i++) + { + action_for_thread(GET_THREAD_BY_ID(activeThread[i])); + } + } + else if (selectorId == 0) + { + // Use first thread if attempted to be stopped + // todo: would this work better if it used main? + action_for_thread(coreinit::OSGetDefaultThread(1)); + } + else if (selectorId > 0) + { + for (sint32 i = 0; i < activeThreadCount; i++) + { + auto* thread = GET_THREAD_BY_ID(activeThread[i]); + if (GET_THREAD_ID(thread) == selectorId) + { + action_for_thread(thread); + break; + } + } + } + __OSUnlockScheduler(); +} + +template +static void selectAndBreakThread(sint64 selectorId, F&& action_for_thread) +{ + __OSLockScheduler(); + cemu_assert_debug(activeThreadCount != 0); + + std::vector pausedThreads; + if (selectorId == -1) + { + for (sint32 i = 0; i < activeThreadCount; i++) + { + coreinit::__OSSuspendThreadNolock(GET_THREAD_BY_ID(activeThread[i])); + pausedThreads.emplace_back(GET_THREAD_BY_ID(activeThread[i])); + } + } + else if (selectorId == 0) + { + // Use first thread if attempted to be stopped + OSThread_t* thread = GET_THREAD_BY_ID(activeThread[0]); + for (sint32 i = 0; i < activeThreadCount; i++) + { + if (GET_THREAD_ID(GET_THREAD_BY_ID(activeThread[i])) < GET_THREAD_ID(thread)) + { + thread = GET_THREAD_BY_ID(activeThread[i]); + } + } + coreinit::__OSSuspendThreadNolock(thread); + pausedThreads.emplace_back(thread); + } + else if (selectorId > 0) + { + for (sint32 i = 0; i < activeThreadCount; i++) + { + auto* thread = GET_THREAD_BY_ID(activeThread[i]); + if (GET_THREAD_ID(thread) == selectorId) + { + coreinit::__OSSuspendThreadNolock(thread); + pausedThreads.emplace_back(thread); + break; + } + } + } + __OSUnlockScheduler(); + + for (OSThread_t* thread : pausedThreads) + { + while (coreinit::OSIsThreadRunning(thread)) + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + action_for_thread(thread); + } +} + +static void selectAndResumeThread(sint64 selectorId) +{ + __OSLockScheduler(); + cemu_assert_debug(activeThreadCount != 0); + + if (selectorId == -1) + { + for (sint32 i = 0; i < activeThreadCount; i++) + { + coreinit::__OSResumeThreadInternal(GET_THREAD_BY_ID(activeThread[i]), 4); + } + } + else if (selectorId == 0) + { + // Use first thread if attempted to be stopped + coreinit::__OSResumeThreadInternal(coreinit::OSGetDefaultThread(1), 1); + } + else if (selectorId > 0) + { + for (sint32 i = 0; i < activeThreadCount; i++) + { + auto* thread = GET_THREAD_BY_ID(activeThread[i]); + if (GET_THREAD_ID(thread) == selectorId) + { + coreinit::__OSResumeThreadInternal(thread, 1); + break; + } + } + } + __OSUnlockScheduler(); +} + +static void waitForBrokenThreads(std::unique_ptr context, std::string_view reason) +{ + // This should pause all threads except trapped thread + // It should however wait for the trapped thread + // The trapped thread should be paused by the trap word instruction handler (aka the running thread) + std::vector threadsList; + __OSLockScheduler(); + for (sint32 i = 0; i < activeThreadCount; i++) + { + threadsList.emplace_back(GET_THREAD_BY_ID(activeThread[i])); + } + __OSUnlockScheduler(); + + for (OSThread_t* thread : threadsList) + { + while (coreinit::OSIsThreadRunning(thread)) + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + } + + context->QueueResponse(reason); +} + +static void breakThreads(sint64 trappedThread) +{ + __OSLockScheduler(); + cemu_assert_debug(activeThreadCount != 0); + + // First, break other threads + OSThread_t* mainThread = nullptr; + for (sint32 i = 0; i < activeThreadCount; i++) + { + if (GET_THREAD_ID(GET_THREAD_BY_ID(activeThread[i])) == trappedThread) + { + mainThread = GET_THREAD_BY_ID(activeThread[i]); + } + else + { + coreinit::__OSSuspendThreadNolock(GET_THREAD_BY_ID(activeThread[i])); + } + } + + // Second, break trapped thread itself which should also pause execution of this handler + // This will temporarily lift the scheduler lock until it's resumed from its suspension + coreinit::__OSSuspendThreadNolock(mainThread); + + __OSUnlockScheduler(); +} + +std::unique_ptr g_gdbstub; + +GDBServer::GDBServer(uint16 port) + : m_port(port) +{ +#if BOOST_OS_WINDOWS + WSADATA wsa; + WSAStartup(MAKEWORD(2, 2), &wsa); +#endif +} + +GDBServer::~GDBServer() +{ + if (m_client_socket != INVALID_SOCKET) + { + // close socket from other thread to forcefully stop accept() call + closesocket(m_client_socket); + m_client_socket = INVALID_SOCKET; + } + + if (m_server_socket != INVALID_SOCKET) + { + closesocket(m_server_socket); + } +#if BOOST_OS_WINDOWS + WSACleanup(); +#endif + + m_stopRequested = false; + m_thread.join(); +} + +bool GDBServer::Initialize() +{ + cemuLog_createLogFile(false); + + if (m_server_socket = socket(PF_INET, SOCK_STREAM, 0); m_server_socket == SOCKET_ERROR) + return false; + + int reuseEnabled = TRUE; + if (setsockopt(m_server_socket, SOL_SOCKET, SO_REUSEADDR, (char*)&reuseEnabled, sizeof(reuseEnabled)) == SOCKET_ERROR) + { + closesocket(m_server_socket); + m_server_socket = INVALID_SOCKET; + return false; + } + + int nodelayEnabled = TRUE; + if (setsockopt(m_server_socket, IPPROTO_TCP, TCP_NODELAY, (char*)&nodelayEnabled, sizeof(nodelayEnabled)) == SOCKET_ERROR) + { + closesocket(m_server_socket); + m_server_socket = INVALID_SOCKET; + return false; + } + + memset(&m_server_addr, 0, sizeof(m_server_addr)); + m_server_addr.sin_family = AF_INET; + m_server_addr.sin_addr.s_addr = htonl(INADDR_ANY); + m_server_addr.sin_port = htons(m_port); + + if (bind(m_server_socket, (sockaddr*)&m_server_addr, sizeof(m_server_addr)) == SOCKET_ERROR) + { + closesocket(m_server_socket); + m_server_socket = INVALID_SOCKET; + return false; + } + + if (listen(m_server_socket, s_maxGDBClients) == SOCKET_ERROR) + { + closesocket(m_server_socket); + m_server_socket = INVALID_SOCKET; + return false; + } + + m_thread = std::thread(std::bind(&GDBServer::ThreadFunc, this)); + + return true; +} + +void GDBServer::ThreadFunc() +{ + SetThreadName("GDBServer"); + + while (!m_stopRequested) + { + if (!m_client_connected) + { + cemuLog_logDebug(LogType::Force, "[GDBStub] Waiting for client to connect on port {}...", m_port); + socklen_t client_addr_size = sizeof(m_client_addr); + m_client_socket = accept(m_server_socket, (struct sockaddr*)&m_client_addr, &client_addr_size); + m_client_connected = m_client_socket != SOCKET_ERROR; + } + else + { + auto receiveMessage = [&](char* buffer, const int32_t length) -> bool { + if (recv(m_client_socket, buffer, length, 0) != SOCKET_ERROR) + return false; + return true; + }; + + auto readChar = [&]() -> char { + char ret = 0; + recv(m_client_socket, &ret, 1, 0); + return ret; + }; + + char packetPrefix = readChar(); + + switch (packetPrefix) + { + case '+': + case '-': + break; + case '\x03': + { + cemuLog_logDebug(LogType::Force, "[GDBStub] Received interrupt (pressed CTRL+C?) from client!"); + selectAndBreakThread(-1, [](OSThread_t* thread) { + }); + auto thread_status = fmt::format("T05thread:{:08X};", GET_THREAD_ID(coreinit::OSGetDefaultThread(1))); + if (this->m_resumed_context) + { + this->m_resumed_context->QueueResponse(thread_status); + this->m_resumed_context.reset(); + } + else + { + auto response_full = fmt::format("+${}#{:02x}", thread_status, CommandContext::CalculateChecksum(thread_status)); + send(m_client_socket, response_full.c_str(), (int)response_full.size(), 0); + } + break; + } + case '$': + { + std::string message; + uint8 checkedSum = 0; + for (uint32_t i = 1;; i++) + { + char c = readChar(); + if (c == '#') + break; + checkedSum += static_cast(c); + message.push_back(c); + + if (i >= s_maxPacketSize) + cemuLog_logDebug(LogType::Force, "[GDBStub] Received too big of a buffer: {}", message); + } + char checkSumStr[2]; + receiveMessage(checkSumStr, 2); + uint32_t checkSum = std::stoi(std::string(checkSumStr, sizeof(checkSumStr)), nullptr, 16); + assert((checkedSum & 0xFF) == checkSum); + + HandleCommand(message); + break; + } + default: + // cemuLog_logDebug(LogType::Force, "[GDBStub] Unknown packet start: {}", packetPrefix); + break; + } + } + } + + if (m_client_socket != INVALID_SOCKET) + closesocket(m_client_socket); +} + +void GDBServer::HandleCommand(const std::string& command_str) +{ + auto context = std::make_unique(this, command_str); + + if (context->IsValid()) + { + // cemuLog_logDebug(LogType::Force, "[GDBStub] Extracted Command {}", fmt::join(context->GetArgs(), ",")); + } + + switch (context->GetType()) + { + // Extended commands + case CMDType::QUERY_GET: + case CMDType::QUERY_SET: + return HandleQuery(context); + case CMDType::VCONT: + return HandleVCont(context); + // Regular commands + case CMDType::IS_THREAD_RUNNING: + return CMDIsThreadActive(context); + case CMDType::SET_ACTIVE_THREAD: + return CMDSetActiveThread(context); + case CMDType::ACTIVE_THREAD_STATUS: + return CMDGetThreadStatus(context); + case CMDType::CONTINUE: + return CMDContinue(context); + case CMDType::ACTIVE_THREAD_STEP: + break; + case CMDType::REGISTER_READ: + return CMDReadRegister(context); + case CMDType::REGISTER_SET: + return CMDWriteRegister(context); + case CMDType::REGISTERS_READ: + return CMDReadRegisters(context); + case CMDType::REGISTERS_WRITE: + return CMDWriteRegisters(context); + case CMDType::MEMORY_READ: + return CMDReadMemory(context); + case CMDType::MEMORY_WRITE: + return CMDWriteMemory(context); + case CMDType::BREAKPOINT_SET: + return CMDInsertBreakpoint(context); + case CMDType::BREAKPOINT_REMOVE: + return CMDDeleteBreakpoint(context); + case CMDType::INVALID: + default: + return CMDNotFound(context); + } + + CMDNotFound(context); +} + +void GDBServer::HandleQuery(std::unique_ptr& context) const +{ + if (!context->IsValid()) + return context->QueueResponse(RESPONSE_EMPTY); + + const auto& query_cmd = context->GetArgs()[0]; + const auto& query_args = context->GetArgs().begin() + 1; + + if (query_cmd == "qSupported") + { + context->QueueResponse(s_supportedFeatures); + } + else if (query_cmd == "qAttached") + { + context->QueueResponse("1"); + } + else if (query_cmd == "qRcmd") + { + } + else if (query_cmd == "qC") + { + context->QueueResponse("QC"); + context->QueueResponse(std::to_string(m_activeThreadContinueSelector)); + } + else if (query_cmd == "qOffsets") + { + const auto module_count = RPLLoader_GetModuleCount(); + const auto module_list = RPLLoader_GetModuleList(); + for (sint32 i = 0; i < module_count; i++) + { + const RPLModule* rpl = module_list[i]; + if (rpl->entrypoint == m_entry_point) + { + context->QueueResponse(fmt::format("TextSeg={:08X};DataSeg={:08X}", rpl->regionMappingBase_text.GetMPTR(), rpl->regionMappingBase_data)); + } + } + } + else if (query_cmd == "qfThreadInfo") + { + std::vector threadIds; + selectThread(-1, [&threadIds](OSThread_t* thread) { + threadIds.emplace_back(fmt::format("{:08X}", memory_getVirtualOffsetFromPointer(thread))); + }); + context->QueueResponse(fmt::format("m{}", fmt::join(threadIds, ","))); + } + else if (query_cmd == "qsThreadInfo") + { + context->QueueResponse("l"); + } + else if (query_cmd == "qXfer") + { + auto& type = query_args[0]; + + if (type == "features") + { + auto& annex = query_args[1]; + sint64 read_offset = std::stoul(query_args[2], nullptr, 16); + sint64 read_length = std::stoul(query_args[3], nullptr, 16); + if (annex == "target.xml") + { + if (read_offset >= GDBTargetXML.size()) + context->QueueResponse("l"); + else + { + auto paginated_str = GDBTargetXML.substr(read_offset, read_length); + context->QueueResponse((paginated_str.size() == read_length) ? "m" : "l"); + context->QueueResponse(paginated_str); + } + } + else + cemuLog_logDebug(LogType::Force, "[GDBStub] qXfer:features:read:{} isn't a known feature document", annex); + } + else if (type == "threads") + { + sint64 read_offset = std::stoul(query_args[1], nullptr, 16); + sint64 read_length = std::stoul(query_args[2], nullptr, 16); + + std::string threads_res; + threads_res += R"()"; + threads_res += ""; + // note: clion seems to default to the first thread + std::map threads_list; + selectThread(-1, [&threads_list](OSThread_t* thread) { + std::string entry; + entry += fmt::format(R"(context.upir.value()); + if (!thread->threadName.IsNull()) + entry += fmt::format(R"( name="{}")", CommandContext::EscapeXMLString(thread->threadName.GetPtr())); + // todo: could add a human-readable description of the thread here + entry += fmt::format(">"); + threads_list.emplace(GET_THREAD_ID(thread), entry); + }); + for (auto& entry : threads_list) + { + threads_res += entry.second; + } + threads_res += ""; + + if (read_offset >= threads_res.size()) + context->QueueResponse("l"); + else + { + auto paginated_str = threads_res.substr(read_offset, read_length); + context->QueueResponse((paginated_str.size() == read_length) ? "m" : "l"); + context->QueueResponse(paginated_str); + } + } + else if (type == "libraries") + { + sint64 read_offset = std::stoul(query_args[1], nullptr, 16); + sint64 read_length = std::stoul(query_args[2], nullptr, 16); + + std::string library_list; + library_list += R"()"; + library_list += ""; + + const auto module_count = RPLLoader_GetModuleCount(); + const auto module_list = RPLLoader_GetModuleList(); + for (sint32 i = 0; i < module_count; i++) + { + library_list += fmt::format(R"()", CommandContext::EscapeXMLString(module_list[i]->moduleName2), module_list[i]->regionMappingBase_text.GetMPTR()); + } + library_list += ""; + + if (read_offset >= library_list.size()) + context->QueueResponse("l"); + else + { + auto paginated_str = library_list.substr(read_offset, read_length); + context->QueueResponse((paginated_str.size() == read_length) ? "m" : "l"); + context->QueueResponse(paginated_str); + } + } + else + { + context->QueueResponse(RESPONSE_EMPTY); + } + } + else + { + context->QueueResponse(RESPONSE_EMPTY); + } +} + +void GDBServer::HandleVCont(std::unique_ptr& context) +{ + if (!context->IsValid()) + { + cemuLog_logDebug(LogType::Force, "[GDBStub] Received unsupported vCont command: {}", context->GetCommand()); + // cemu_assert_unimplemented(); + return context->QueueResponse(RESPONSE_EMPTY); + } + + const std::string& vcont_cmd = context->GetArgs()[0]; + if (vcont_cmd == "vCont?") + return context->QueueResponse("vCont;c;C;s;S"); + + else if (vcont_cmd != "vCont;") + return context->QueueResponse(RESPONSE_EMPTY); + + m_resumed_context = std::move(context); + + bool resumedNoThreads = true; + for (const auto operation : TokenizeView(m_resumed_context->GetArgs()[1], ';')) + { + // todo: this might have issues with the signal versions (C/S) + // todo: test whether this works with multiple vCont;c:123123;c:123123 + std::string_view operationType = operation.substr(0, operation.find(':')); + sint64 threadSelector = operationType.size() == operation.size() ? -1 : std::stoll(std::string(operation.substr(operationType.size() + 1)), nullptr, 16); + + if (operationType == "c" || operationType.starts_with("C")) + { + selectAndResumeThread(threadSelector); + resumedNoThreads = false; + } + else if (operationType == "s" || operationType.starts_with("S")) + { + selectThread(threadSelector, [this](OSThread_t* thread) { + auto nextInstructions = findNextInstruction(thread->context.srr0, thread->context.lr, thread->context.ctr); + for (MPTR nextInstr : nextInstructions) + { + auto bpIt = m_patchedInstructions.find(nextInstr); + if (bpIt == m_patchedInstructions.end()) + this->m_patchedInstructions.try_emplace(nextInstr, nextInstr, BreakpointType::BP_STEP_POINT, false, "swbreak:;"); + else + bpIt->second.PauseOnNextInterrupt(); + } + }); + } + } + + if (resumedNoThreads) + { + selectAndResumeThread(-1); + cemuLog_logDebug(LogType::Force, "[GDBStub] Resumed all threads after skip instructions"); + } +} + +void GDBServer::CMDContinue(std::unique_ptr& context) +{ + m_resumed_context = std::move(context); + selectAndResumeThread(m_activeThreadContinueSelector); +} + +void GDBServer::CMDNotFound(std::unique_ptr& context) +{ + return context->QueueResponse(RESPONSE_EMPTY); +} + +void GDBServer::CMDIsThreadActive(std::unique_ptr& context) +{ + sint64 threadSelector = std::stoll(context->GetArgs()[1], nullptr, 16); + bool foundThread = false; + selectThread(threadSelector, [&foundThread](OSThread_t* thread) { + foundThread = true; + }); + + if (foundThread) + return context->QueueResponse(RESPONSE_OK); + else + return context->QueueResponse(RESPONSE_ERROR); +} + +void GDBServer::CMDSetActiveThread(std::unique_ptr& context) +{ + sint64 threadSelector = std::stoll(context->GetArgs()[2], nullptr, 16); + if (threadSelector >= 0) + { + bool foundThread = false; + selectThread(threadSelector, [&foundThread](OSThread_t* thread) { + foundThread = true; + }); + if (!foundThread) + return context->QueueResponse(RESPONSE_ERROR); + } + if (context->GetArgs()[1] == "c") + m_activeThreadContinueSelector = threadSelector; + else + m_activeThreadSelector = threadSelector; + return context->QueueResponse(RESPONSE_OK); +} + +void GDBServer::CMDGetThreadStatus(std::unique_ptr& context) +{ + selectThread(0, [&context](OSThread_t* thread) { + context->QueueResponse(fmt::format("T05thread:{:08X};", memory_getVirtualOffsetFromPointer(thread))); + }); +} + +void GDBServer::CMDReadRegister(std::unique_ptr& context) const +{ + sint32 reg = std::stoi(context->GetArgs()[1], nullptr, 16); + selectThread(m_activeThreadSelector, [reg, &context](OSThread_t* thread) { + auto& cpu = thread->context; + if (reg >= RegisterID::R0_START && reg <= RegisterID::R31_END) + { + return context->QueueResponse(fmt::format("{:08X}", CPU_swapEndianU32(cpu.gpr[reg]))); + } + else if (reg >= RegisterID::F0_START && reg <= RegisterID::F31_END) + { + return context->QueueResponse(fmt::format("{:016X}", cpu.fp_ps0[reg - RegisterID::F0_START].value())); + } + else if (reg == RegisterID::FPSCR) + { + return context->QueueResponse(fmt::format("{:08X}", cpu.fpscr.fpscr.value())); + } + else + { + switch (reg) + { + case RegisterID::PC: return context->QueueResponse(fmt::format("{:08X}", cpu.srr0)); + case RegisterID::MSR: return context->QueueResponse("xxxxxxxx"); + case RegisterID::CR: return context->QueueResponse(fmt::format("{:08X}", cpu.cr)); + case RegisterID::LR: return context->QueueResponse(fmt::format("{:08X}", CPU_swapEndianU32(cpu.lr))); + case RegisterID::CTR: return context->QueueResponse(fmt::format("{:08X}", cpu.ctr)); + case RegisterID::XER: return context->QueueResponse(fmt::format("{:08X}", cpu.xer)); + default: break; + } + } + }); +} + +void GDBServer::CMDWriteRegister(std::unique_ptr& context) const +{ + sint32 reg = std::stoi(context->GetArgs()[1], nullptr, 16); + uint64 value = std::stoll(context->GetArgs()[2], nullptr, 16); + selectThread(m_activeThreadSelector, [reg, value, &context](OSThread_t* thread) { + auto& cpu = thread->context; + if (reg >= RegisterID::R0_START && reg <= RegisterID::R31_END) + { + cpu.gpr[reg] = CPU_swapEndianU32(value); + return context->QueueResponse(RESPONSE_OK); + } + else if (reg >= RegisterID::F0_START && reg <= RegisterID::F31_END) + { + // todo: figure out how to properly write to paired single registers + cpu.fp_ps0[reg - RegisterID::F0_START] = uint64be{value}; + return context->QueueResponse(RESPONSE_OK); + } + else if (reg == RegisterID::FPSCR) + { + cpu.fpscr.fpscr = uint32be{(uint32)value}; + return context->QueueResponse(RESPONSE_OK); + } + else + { + switch (reg) + { + case RegisterID::PC: + cpu.srr0 = value; + return context->QueueResponse(RESPONSE_OK); + case RegisterID::MSR: + return context->QueueResponse(RESPONSE_ERROR); + case RegisterID::CR: + cpu.cr = value; + return context->QueueResponse(RESPONSE_OK); + case RegisterID::LR: + cpu.lr = CPU_swapEndianU32(value); + return context->QueueResponse(RESPONSE_OK); + case RegisterID::CTR: + cpu.ctr = value; + return context->QueueResponse(RESPONSE_OK); + case RegisterID::XER: + cpu.xer = value; + return context->QueueResponse(RESPONSE_OK); + default: + return context->QueueResponse(RESPONSE_ERROR); + } + } + }); +} + +void GDBServer::CMDReadRegisters(std::unique_ptr& context) const +{ + selectThread(m_activeThreadSelector, [&context](OSThread_t* thread) { + for (uint32& reg : thread->context.gpr) + { + context->QueueResponse(fmt::format("{:08X}", CPU_swapEndianU32(reg))); + } + }); +} + +void GDBServer::CMDWriteRegisters(std::unique_ptr& context) const +{ + selectThread(m_activeThreadSelector, [&context](OSThread_t* thread) { + auto& registers = context->GetArgs()[1]; + for (uint32 i = 0; i < 32; i++) + { + thread->context.gpr[i] = CPU_swapEndianU32(std::stoi(registers.substr(i * 2, 2), nullptr, 16)); + } + }); +} + +void GDBServer::CMDReadMemory(std::unique_ptr& context) +{ + sint64 addr = std::stoul(context->GetArgs()[1], nullptr, 16); + sint64 length = std::stoul(context->GetArgs()[2], nullptr, 16); + + // todo: handle cross-mmu-range memory requests + if (!memory_isAddressRangeAccessible(addr, length)) + return context->QueueResponse(RESPONSE_ERROR); + + std::string memoryRepr; + uint8* values = memory_getPointerFromVirtualOffset(addr); + for (sint64 i = 0; i < length; i++) + { + memoryRepr += fmt::format("{:02X}", values[i]); + } + + auto patchesRange = m_patchedInstructions.lower_bound(addr); + while (patchesRange != m_patchedInstructions.end() && patchesRange->first < (addr + length)) + { + auto replStr = fmt::format("{:02X}", patchesRange->second.GetVisibleOpCode()); + memoryRepr[(patchesRange->first - addr) * 2] = replStr[0]; + memoryRepr[(patchesRange->first - addr) * 2 + 1] = replStr[1]; + patchesRange++; + } + return context->QueueResponse(memoryRepr); +} + +void GDBServer::CMDWriteMemory(std::unique_ptr& context) +{ + sint64 addr = std::stoul(context->GetArgs()[1], nullptr, 16); + sint64 length = std::stoul(context->GetArgs()[2], nullptr, 16); + auto source = context->GetArgs()[3]; + + // todo: handle cross-mmu-range memory requests + if (!memory_isAddressRangeAccessible(addr, length)) + return context->QueueResponse(RESPONSE_ERROR); + + uint8* values = memory_getPointerFromVirtualOffset(addr); + for (sint64 i = 0; i < length; i++) + { + uint8 hexValue; + const std::from_chars_result result = std::from_chars(source.data() + (i * 2), (source.data() + (i * 2) + 2), hexValue, 16); + if (result.ec == std::errc::invalid_argument || result.ec == std::errc::result_out_of_range) + return context->QueueResponse(RESPONSE_ERROR); + + if (auto it = m_patchedInstructions.find(addr + i); it != m_patchedInstructions.end()) + { + uint32 newOpCode = it->second.GetVisibleOpCode(); + uint32 byteIndex = 3 - ((addr + i) % 4); // inverted because of big endian, so address 0 is the highest byte + newOpCode &= ~(0xFF << (byteIndex * 8)); // mask out the byte + newOpCode |= ((uint32)hexValue << (byteIndex * 8)); // set new byte with OR + it->second.WriteNewOpCode(newOpCode); + } + else + { + values[i] = hexValue; + } + } + return context->QueueResponse(RESPONSE_OK); +} + +void GDBServer::CMDInsertBreakpoint(std::unique_ptr& context) +{ + auto type = std::stoul(context->GetArgs()[1], nullptr, 16); + MPTR addr = static_cast(std::stoul(context->GetArgs()[2], nullptr, 16)); + + if (type == 0 || type == 1) + { + auto bp = this->m_patchedInstructions.find(addr); + if (bp != this->m_patchedInstructions.end()) + this->m_patchedInstructions.erase(bp); + this->m_patchedInstructions.try_emplace(addr, addr, BreakpointType::BP_PERSISTENT, type == 0, type == 0 ? "swbreak:;" : "hwbreak:;"); + } + else if (type == 2 || type == 3 || type == 4) + { + if (this->m_watch_point) + return context->QueueResponse(RESPONSE_ERROR); + + this->m_watch_point = std::make_unique(addr, (AccessPointType)type); + } + + return context->QueueResponse(RESPONSE_OK); +} + +void GDBServer::CMDDeleteBreakpoint(std::unique_ptr& context) +{ + auto type = std::stoul(context->GetArgs()[1], nullptr, 16); + MPTR addr = static_cast(std::stoul(context->GetArgs()[2], nullptr, 16)); + + if (type == 0 || type == 1) + { + auto bp = this->m_patchedInstructions.find(addr); + if (bp == this->m_patchedInstructions.end() || !bp->second.ShouldBreakThreads()) + return context->QueueResponse(RESPONSE_ERROR); + else + this->m_patchedInstructions.erase(bp); + } + else if (type == 2 || type == 3 || type == 4) + { + if (!this->m_watch_point || this->m_watch_point->GetAddress() != addr) + return context->QueueResponse(RESPONSE_ERROR); + + this->m_watch_point.reset(); + } + + return context->QueueResponse(RESPONSE_OK); +} + +// Internal functions for control +void GDBServer::HandleTrapInstruction(PPCInterpreter_t* hCPU) +{ + // First, restore any removed breakpoints + for (auto& bp : m_patchedInstructions) + { + if (bp.second.IsRemoved()) + bp.second.Restore(); + } + + auto patchedBP = m_patchedInstructions.find(hCPU->instructionPointer); + if (patchedBP == m_patchedInstructions.end()) + return cemu_assert_suspicious(); + + // Secondly, delete one-shot breakpoints but also temporarily delete patched instruction to run original instruction + OSThread_t* currThread = coreinit::OSGetCurrentThread(); + std::string pauseReason = fmt::format("T05thread:{:08X};core:{:02X};{}", GET_THREAD_ID(currThread), PPCInterpreter_getCoreIndex(hCPU), patchedBP->second.GetReason()); + bool pauseThreads = patchedBP->second.ShouldBreakThreads() || patchedBP->second.ShouldBreakThreadsOnNextInterrupt(); + if (patchedBP->second.IsPersistent()) + { + // Insert new restore breakpoints at next possible instructions which restores breakpoints but won't pause the CPU + std::vector nextInstructions = findNextInstruction(hCPU->instructionPointer, hCPU->spr.LR, hCPU->spr.CTR); + for (MPTR nextInstr : nextInstructions) + { + if (!m_patchedInstructions.contains(nextInstr)) + this->m_patchedInstructions.try_emplace(nextInstr, nextInstr, BreakpointType::BP_STEP_POINT, false, ""); + } + patchedBP->second.RemoveTemporarily(); + } + else + { + m_patchedInstructions.erase(patchedBP); + } + + // Thirdly, delete any instructions that were generated by a skip instruction + for (auto it = m_patchedInstructions.cbegin(), next_it = it; it != m_patchedInstructions.cend(); it = next_it) + { + ++next_it; + if (it->second.IsSkipBreakpoint()) + { + m_patchedInstructions.erase(it); + } + } + + // Fourthly, the stub can insert breakpoints that are just meant to restore patched instructions, in which case we just want to continue + if (pauseThreads) + { + cemuLog_logDebug(LogType::Force, "[GDBStub] Got trapped by a breakpoint!"); + if (m_resumed_context) + { + // Spin up thread to signal when another GDB stub trap is found + ThreadPool::FireAndForget(&waitForBrokenThreads, std::move(m_resumed_context), pauseReason); + } + + breakThreads(GET_THREAD_ID(coreinit::OSGetCurrentThread())); + cemuLog_logDebug(LogType::Force, "[GDBStub] Resumed from a breakpoint!"); + } +} + +void GDBServer::HandleAccessException(uint64 dr6) +{ + bool triggeredWrite = GetBits(dr6, 2, 1); + bool triggeredReadWrite = GetBits(dr6, 3, 1); + + std::string response; + if (m_watch_point->GetType() == AccessPointType::BP_WRITE && triggeredWrite) + response = fmt::format("watch:{:08X};", m_watch_point->GetAddress()); + else if (m_watch_point->GetType() == AccessPointType::BP_READ && triggeredReadWrite && !triggeredWrite) + response = fmt::format("rwatch:{:08X};", m_watch_point->GetAddress()); + else if (m_watch_point->GetType() == AccessPointType::BP_BOTH && triggeredReadWrite) + response = fmt::format("awatch:{:08X};", m_watch_point->GetAddress()); + + if (!response.empty()) + { + PPCInterpreter_t* hCPU = PPCInterpreter_getCurrentInstance(); + cemuLog_logDebug(LogType::Force, "Received matching breakpoint exception: {}", response); + auto nextInstructions = findNextInstruction(hCPU->instructionPointer, hCPU->spr.LR, hCPU->spr.CTR); + for (MPTR nextInstr : nextInstructions) + { + auto bpIt = m_patchedInstructions.find(nextInstr); + if (bpIt == m_patchedInstructions.end()) + this->m_patchedInstructions.try_emplace(nextInstr, nextInstr, BreakpointType::BP_STEP_POINT, false, response); + else + bpIt->second.PauseOnNextInterrupt(); + } + } +} + +void GDBServer::HandleEntryStop(uint32 entryAddress) +{ + this->m_patchedInstructions.try_emplace(entryAddress, entryAddress, BreakpointType::BP_SINGLE, false, ""); + m_entry_point = entryAddress; +} \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Debugger/GDBStub.h b/src/Cafe/HW/Espresso/Debugger/GDBStub.h new file mode 100644 index 00000000..e198604d --- /dev/null +++ b/src/Cafe/HW/Espresso/Debugger/GDBStub.h @@ -0,0 +1,316 @@ +#pragma once + +#include "Common/precompiled.h" +#include "Common/socket.h" +#include "Cafe/OS/libs/coreinit/coreinit_Thread.h" + +#include + +class GDBServer { +public: + explicit GDBServer(uint16 port); + ~GDBServer(); + + bool Initialize(); + bool IsConnected() + { + return m_client_connected; + } + + void HandleEntryStop(uint32 entryAddress); + void HandleTrapInstruction(PPCInterpreter_t* hCPU); + void HandleAccessException(uint64 dr6); + + enum class CMDType : char + { + INVALID = '\0', + // Extended commands + QUERY_GET = 'q', + QUERY_SET = 'Q', + VCONT = 'v', + // Normal commands + CONTINUE = 'c', + IS_THREAD_RUNNING = 'T', + SET_ACTIVE_THREAD = 'H', + ACTIVE_THREAD_STATUS = '?', + ACTIVE_THREAD_STEP = 's', + REGISTER_READ = 'p', + REGISTER_SET = 'P', + REGISTERS_READ = 'g', + REGISTERS_WRITE = 'G', + MEMORY_READ = 'm', + MEMORY_WRITE = 'M', + BREAKPOINT_SET = 'Z', + BREAKPOINT_REMOVE = 'z', + }; + + class CommandContext { + public: + CommandContext(const GDBServer* server, const std::string& command) + : m_server(server), m_command(command) + { + std::smatch matches; + std::regex_match(command, matches, m_regex); + for (size_t i = 1; i < matches.size(); i++) + { + auto matchStr = matches[i].str(); + if (!matchStr.empty()) + m_args.emplace_back(std::move(matchStr)); + } + // send acknowledgement ahead of response + send(m_server->m_client_socket, RESPONSE_ACK.data(), (int)RESPONSE_ACK.size(), 0); + }; + ~CommandContext() + { + // cemuLog_logDebug(LogType::Force, "[GDBStub] Received: {}", m_command); + // cemuLog_logDebug(LogType::Force, "[GDBStub] Responded: +{}", m_response); + auto response_data = EscapeMessage(m_response); + auto response_full = fmt::format("${}#{:02x}", response_data, CalculateChecksum(response_data)); + send(m_server->m_client_socket, response_full.c_str(), (int)response_full.size(), 0); + } + CommandContext(const CommandContext&) = delete; + + [[nodiscard]] const std::string& GetCommand() const + { + return m_command; + }; + [[nodiscard]] const std::vector& GetArgs() const + { + return m_args; + }; + [[nodiscard]] bool IsValid() const + { + return !m_args.empty(); + }; + [[nodiscard]] CMDType GetType() const + { + return static_cast(m_command[0]); + }; + + // Respond Utils + static uint8 CalculateChecksum(std::string_view message_data) + { + return std::accumulate(message_data.begin(), message_data.end(), (uint8)0, std::plus<>()); + } + static std::string EscapeXMLString(std::string_view xml_data) + { + std::string escaped; + escaped.reserve(xml_data.size()); + for (char c : xml_data) + { + switch (c) + { + case '<': escaped += "<"; break; + case '>': escaped += ">"; break; + case '&': escaped += "&"; break; + case '"': escaped += """; break; + case '\'': escaped += "'"; break; + default: escaped += c; break; + } + } + return escaped; + } + static std::string EscapeMessage(std::string_view message) + { + std::string escaped; + escaped.reserve(message.size()); + for (char c : message) + { + if (c == '#' || c == '$' || c == '}' || c == '*') + { + escaped.push_back('}'); + escaped.push_back((char)(c ^ 0x20)); + } + else + escaped.push_back(c); + } + return escaped; + } + void QueueResponse(std::string_view data) + { + m_response += data; + } + + private: + const std::regex m_regex{ + R"((?:)" + R"((\?))" + R"(|(vCont\?))" + R"(|(vCont;)([a-zA-Z0-9-+=,\+:;]+))" + R"(|(qAttached))" + R"(|(qSupported):([a-zA-Z0-9-+=,\+;]+))" + R"(|(qTStatus))" + R"(|(qC))" + R"(|(qXfer):((?:features)|(?:threads)|(?:libraries)):read:([\w\.]*):([0-9a-zA-Z]+),([0-9a-zA-Z]+))" + R"(|(qfThreadInfo))" + R"(|(qsThreadInfo))" + R"(|(T)((?:-1)|(?:[0-9A-Fa-f]+)))" + R"(|(D))" // Detach + R"(|(H)(c|g)((?:-1)|(?:[0-9A-Fa-f]+)))" // Set active thread for other operations (not c) + R"(|(c)([0-9A-Fa-f]+)?)" // (Legacy, supported by vCont) Continue all for active thread + R"(|([Zz])([0-4]),([0-9A-Fa-f]+),([0-9]))" // Insert/delete breakpoints + R"(|(g))" // Read registers for active thread + R"(|(G)([0-9A-Fa-f]+))" // Write registers for active thread + R"(|(p)([0-9A-Fa-f]+))" // Read register for active thread + R"(|(P)([0-9A-Fa-f]+)=([0-9A-Fa-f]+))" // Write register for active thread + R"(|(m)([0-9A-Fa-f]+),([0-9A-Fa-f]+))" // Read memory + R"(|(M)([0-9A-Fa-f]+),([0-9A-Fa-f]+):([0-9A-Fa-f]+))" // Write memory + // R"(|(X)([0-9A-Fa-f]+),([0-9A-Fa-f]+):([0-9A-Fa-f]+))" // Write memory + R"())"}; + const GDBServer* m_server; + const std::string m_command; + std::vector m_args; + std::string m_response; + }; + + class ExecutionBreakpoint; + std::map m_patchedInstructions; + + class AccessBreakpoint; + std::unique_ptr m_watch_point; + +private: + static constexpr int s_maxGDBClients = 1; + static constexpr std::string_view s_supportedFeatures = "PacketSize=4096;qXfer:features:read+;qXfer:threads:read+;qXfer:libraries:read+;swbreak+;hwbreak+;vContSupported+"; + static constexpr size_t s_maxPacketSize = 1024 * 4; + const uint16 m_port; + + enum RegisterID + { + R0_START = 0, + R31_END = R0_START + 31, + PC = 64, + MSR = 65, + CR = 66, + LR = 67, + CTR = 68, + XER = 69, + F0_START = 71, + F31_END = F0_START + 31, + FPSCR = 103 + }; + + static constexpr std::string_view RESPONSE_EMPTY = ""; + static constexpr std::string_view RESPONSE_ACK = "+"; + static constexpr std::string_view RESPONSE_NACK = "-"; + static constexpr std::string_view RESPONSE_OK = "OK"; + static constexpr std::string_view RESPONSE_ERROR = "E01"; + + void ThreadFunc(); + std::atomic_bool m_stopRequested; + void HandleCommand(const std::string& command_str); + void HandleQuery(std::unique_ptr& context) const; + void HandleVCont(std::unique_ptr& context); + + // Commands + sint64 m_activeThreadSelector = 0; + sint64 m_activeThreadContinueSelector = 0; + void CMDContinue(std::unique_ptr& context); + void CMDNotFound(std::unique_ptr& context); + void CMDIsThreadActive(std::unique_ptr& context); + void CMDSetActiveThread(std::unique_ptr& context); + void CMDGetThreadStatus(std::unique_ptr& context); + + void CMDReadRegister(std::unique_ptr& context) const; + void CMDWriteRegister(std::unique_ptr& context) const; + void CMDReadRegisters(std::unique_ptr& context) const; + void CMDWriteRegisters(std::unique_ptr& context) const; + void CMDReadMemory(std::unique_ptr& context); + void CMDWriteMemory(std::unique_ptr& context); + void CMDInsertBreakpoint(std::unique_ptr& context); + void CMDDeleteBreakpoint(std::unique_ptr& context); + + std::thread m_thread; + std::atomic_bool m_resume_startup = false; + MPTR m_entry_point{}; + std::unique_ptr m_resumed_context; + + std::atomic_bool m_client_connected; + SOCKET m_server_socket = INVALID_SOCKET; + sockaddr_in m_server_addr{}; + SOCKET m_client_socket = INVALID_SOCKET; + sockaddr_in m_client_addr{}; +}; + +static constexpr std::string_view GDBTargetXML = R"( + + + powerpc:common + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +)"; + +extern std::unique_ptr g_gdbstub; diff --git a/src/Cafe/HW/Espresso/EspressoISA.h b/src/Cafe/HW/Espresso/EspressoISA.h index 4c630e4c..5e09763b 100644 --- a/src/Cafe/HW/Espresso/EspressoISA.h +++ b/src/Cafe/HW/Espresso/EspressoISA.h @@ -10,6 +10,18 @@ namespace Espresso CR_BIT_INDEX_SO = 3, }; + enum class PSQ_LOAD_TYPE + { + TYPE_F32 = 0, + TYPE_UNUSED1 = 1, + TYPE_UNUSED2 = 2, + TYPE_UNUSED3 = 3, + TYPE_U8 = 4, + TYPE_U16 = 5, + TYPE_S8 = 6, + TYPE_S16 = 7, + }; + enum class PrimaryOpcode { // underscore at the end of the name means that this instruction always updates CR0 (as if RC bit is set) @@ -91,17 +103,19 @@ namespace Espresso BCCTR = 528 }; - enum class OPCODE_31 + enum class Opcode31 { - + TW = 4, + MFTB = 371, }; inline PrimaryOpcode GetPrimaryOpcode(uint32 opcode) { return (PrimaryOpcode)(opcode >> 26); }; inline Opcode19 GetGroup19Opcode(uint32 opcode) { return (Opcode19)((opcode >> 1) & 0x3FF); }; + inline Opcode31 GetGroup31Opcode(uint32 opcode) { return (Opcode31)((opcode >> 1) & 0x3FF); }; struct BOField { - BOField() {}; + BOField() = default; BOField(uint8 bo) : bo(bo) {}; bool conditionInverted() const @@ -132,6 +146,12 @@ namespace Espresso uint8 bo; }; + // returns true if LK bit is set, only valid for branch instructions + inline bool DecodeLK(uint32 opcode) + { + return (opcode & 1) != 0; + } + inline void _decodeForm_I(uint32 opcode, uint32& LI, bool& AA, bool& LK) { LI = opcode & 0x3fffffc; @@ -183,13 +203,7 @@ namespace Espresso _decodeForm_D_branch(opcode, BD, BO, BI, AA, LK); } - inline void decodeOp_BCLR(uint32 opcode, BOField& BO, uint32& BI, bool& LK) - { - // form XL (with BD field expected to be zero) - _decodeForm_XL(opcode, BO, BI, LK); - } - - inline void decodeOp_BCCTR(uint32 opcode, BOField& BO, uint32& BI, bool& LK) + inline void decodeOp_BCSPR(uint32 opcode, BOField& BO, uint32& BI, bool& LK) // BCLR and BCSPR { // form XL (with BD field expected to be zero) _decodeForm_XL(opcode, BO, BI, LK); diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp index a3e45679..2fe07509 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterALU.hpp @@ -3,18 +3,17 @@ static void PPCInterpreter_setXerOV(PPCInterpreter_t* hCPU, bool hasOverflow) { if (hasOverflow) { - hCPU->spr.XER |= XER_SO; - hCPU->spr.XER |= XER_OV; + hCPU->xer_so = 1; + hCPU->xer_ov = 1; } else { - hCPU->spr.XER &= ~XER_OV; + hCPU->xer_ov = 0; } } static bool checkAdditionOverflow(uint32 x, uint32 y, uint32 r) { - // todo - update remaining *O instructions to use this function /* x y r result (has overflow) @@ -42,19 +41,11 @@ static void PPCInterpreter_ADD(PPCInterpreter_t* hCPU, uint32 opcode) static void PPCInterpreter_ADDO(PPCInterpreter_t* hCPU, uint32 opcode) { - // untested (Don't Starve Giant Edition uses this instruction + BSO) + // Don't Starve Giant Edition uses this instruction + BSO PPC_OPC_TEMPL3_XO(); - uint64 result = (uint64)hCPU->gpr[rA] + (uint64)hCPU->gpr[rB]; + uint32 result = hCPU->gpr[rA] + hCPU->gpr[rB]; + PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(hCPU->gpr[rA], hCPU->gpr[rB], result)); hCPU->gpr[rD] = (uint32)result; - if (result >= 0x100000000ULL) - { - hCPU->spr.XER |= XER_SO; - hCPU->spr.XER |= XER_OV; - } - else - { - hCPU->spr.XER &= ~XER_OV; - } if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); PPCInterpreter_nextInstruction(hCPU); @@ -85,13 +76,7 @@ static void PPCInterpreter_ADDCO(PPCInterpreter_t* hCPU, uint32 opcode) else hCPU->xer_ca = 0; // set SO/OV - if (hCPU->gpr[rD] < a) - { - hCPU->spr.XER |= XER_OV; - hCPU->spr.XER |= XER_SO; - } - else - hCPU->spr.XER &= ~XER_OV; + PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(a, b, hCPU->gpr[rD])); if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); PPCInterpreter_nextInstruction(hCPU); @@ -128,7 +113,6 @@ static void PPCInterpreter_ADDEO(PPCInterpreter_t* hCPU, uint32 opcode) else hCPU->xer_ca = 0; PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(a, b, hCPU->gpr[rD])); - // update CR if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); PPCInterpreter_nextInstruction(hCPU); @@ -145,7 +129,7 @@ static void PPCInterpreter_ADDI(PPCInterpreter_t* hCPU, uint32 opcode) static void PPCInterpreter_ADDIC(PPCInterpreter_t* hCPU, uint32 opcode) { - int rD, rA; + sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); uint32 a = hCPU->gpr[rA]; @@ -160,7 +144,7 @@ static void PPCInterpreter_ADDIC(PPCInterpreter_t* hCPU, uint32 opcode) static void PPCInterpreter_ADDIC_(PPCInterpreter_t* hCPU, uint32 opcode) { - int rD, rA; + sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); uint32 a = hCPU->gpr[rA]; @@ -170,14 +154,13 @@ static void PPCInterpreter_ADDIC_(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->xer_ca = 1; else hCPU->xer_ca = 0; - // update cr0 flags ppc_update_cr0(hCPU, hCPU->gpr[rD]); PPCInterpreter_nextInstruction(hCPU); } static void PPCInterpreter_ADDIS(PPCInterpreter_t* hCPU, uint32 opcode) { - int rD, rA; + sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_Shift16(opcode, rD, rA, imm); hCPU->gpr[rD] = (rA ? hCPU->gpr[rA] : 0) + imm; @@ -200,6 +183,23 @@ static void PPCInterpreter_ADDZE(PPCInterpreter_t* hCPU, uint32 opcode) PPCInterpreter_nextInstruction(hCPU); } +static void PPCInterpreter_ADDZEO(PPCInterpreter_t* hCPU, uint32 opcode) +{ + PPC_OPC_TEMPL3_XO(); + PPC_ASSERT(rB == 0); + uint32 a = hCPU->gpr[rA]; + uint32 ca = hCPU->xer_ca; + hCPU->gpr[rD] = a + ca; + PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(a, 0, hCPU->gpr[rD])); + if ((a == 0xffffffff) && ca) + hCPU->xer_ca = 1; + else + hCPU->xer_ca = 0; + if (opHasRC()) + ppc_update_cr0(hCPU, hCPU->gpr[rD]); + PPCInterpreter_nextInstruction(hCPU); +} + static void PPCInterpreter_ADDME(PPCInterpreter_t* hCPU, uint32 opcode) { PPC_OPC_TEMPL3_XO(); @@ -216,6 +216,23 @@ static void PPCInterpreter_ADDME(PPCInterpreter_t* hCPU, uint32 opcode) PPCInterpreter_nextInstruction(hCPU); } +static void PPCInterpreter_ADDMEO(PPCInterpreter_t* hCPU, uint32 opcode) +{ + PPC_OPC_TEMPL3_XO(); + PPC_ASSERT(rB == 0); + uint32 a = hCPU->gpr[rA]; + uint32 ca = hCPU->xer_ca; + hCPU->gpr[rD] = a + ca + 0xffffffff; + PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(a, 0xffffffff, hCPU->gpr[rD])); + if (a || ca) + hCPU->xer_ca = 1; + else + hCPU->xer_ca = 0; + if (opHasRC()) + ppc_update_cr0(hCPU, hCPU->gpr[rD]); + PPCInterpreter_nextInstruction(hCPU); +} + static void PPCInterpreter_SUBF(PPCInterpreter_t* hCPU, uint32 opcode) { PPC_OPC_TEMPL3_XO(); @@ -227,11 +244,12 @@ static void PPCInterpreter_SUBF(PPCInterpreter_t* hCPU, uint32 opcode) static void PPCInterpreter_SUBFO(PPCInterpreter_t* hCPU, uint32 opcode) { - // untested (Don't Starve Giant Edition uses this) + // Seen in Don't Starve Giant Edition and Teslagrad // also used by DS Virtual Console (Super Mario 64 DS) PPC_OPC_TEMPL3_XO(); - hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1; - PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~hCPU->gpr[rA], hCPU->gpr[rB], hCPU->gpr[rD])); + uint32 result = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1; + PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~hCPU->gpr[rA], hCPU->gpr[rB], result)); + hCPU->gpr[rD] = result; if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); PPCInterpreter_nextInstruction(hCPU); @@ -260,21 +278,13 @@ static void PPCInterpreter_SUBFCO(PPCInterpreter_t* hCPU, uint32 opcode) uint32 a = hCPU->gpr[rA]; uint32 b = hCPU->gpr[rB]; hCPU->gpr[rD] = ~a + b + 1; - // update xer + // update carry if (ppc_carry_3(~a, b, 1)) hCPU->xer_ca = 1; else hCPU->xer_ca = 0; // update xer SO/OV - if (checkAdditionOverflow(~a, b, hCPU->gpr[rD])) - { - hCPU->spr.XER |= XER_SO; - hCPU->spr.XER |= XER_OV; - } - else - { - hCPU->spr.XER &= ~XER_OV; - } + PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~a, b, hCPU->gpr[rD])); if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); PPCInterpreter_nextInstruction(hCPU); @@ -282,7 +292,7 @@ static void PPCInterpreter_SUBFCO(PPCInterpreter_t* hCPU, uint32 opcode) static void PPCInterpreter_SUBFIC(PPCInterpreter_t* hCPU, uint32 opcode) { - int rD, rA; + sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); uint32 a = hCPU->gpr[rA]; @@ -306,7 +316,6 @@ static void PPCInterpreter_SUBFE(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->xer_ca = 1; else hCPU->xer_ca = 0; - // update cr0 if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); PPCInterpreter_nextInstruction(hCPU); @@ -325,16 +334,7 @@ static void PPCInterpreter_SUBFEO(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->xer_ca = 1; else hCPU->xer_ca = 0; - if (checkAdditionOverflow(~a, b, result)) - { - hCPU->spr.XER |= XER_SO; - hCPU->spr.XER |= XER_OV; - } - else - { - hCPU->spr.XER &= ~XER_OV; - } - // update cr0 + PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~a, b, result)); if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); PPCInterpreter_nextInstruction(hCPU); @@ -356,9 +356,25 @@ static void PPCInterpreter_SUBFZE(PPCInterpreter_t* hCPU, uint32 opcode) PPCInterpreter_nextInstruction(hCPU); } +static void PPCInterpreter_SUBFZEO(PPCInterpreter_t* hCPU, uint32 opcode) +{ + PPC_OPC_TEMPL3_XO(); + PPC_ASSERT(rB == 0); + uint32 a = hCPU->gpr[rA]; + uint32 ca = hCPU->xer_ca; + hCPU->gpr[rD] = ~a + ca; + PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~a, 0, hCPU->gpr[rD])); + if (a == 0 && ca) + hCPU->xer_ca = 1; + else + hCPU->xer_ca = 0; + if (opHasRC()) + ppc_update_cr0(hCPU, hCPU->gpr[rD]); + PPCInterpreter_nextInstruction(hCPU); +} + static void PPCInterpreter_SUBFME(PPCInterpreter_t* hCPU, uint32 opcode) { - // untested PPC_OPC_TEMPL3_XO(); PPC_ASSERT(rB == 0); uint32 a = hCPU->gpr[rA]; @@ -369,7 +385,24 @@ static void PPCInterpreter_SUBFME(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->xer_ca = 1; else hCPU->xer_ca = 0; - // update cr0 + if (opcode & PPC_OPC_RC) + ppc_update_cr0(hCPU, hCPU->gpr[rD]); + PPCInterpreter_nextInstruction(hCPU); +} + +static void PPCInterpreter_SUBFMEO(PPCInterpreter_t* hCPU, uint32 opcode) +{ + PPC_OPC_TEMPL3_XO(); + PPC_ASSERT(rB == 0); + uint32 a = hCPU->gpr[rA]; + uint32 ca = hCPU->xer_ca; + hCPU->gpr[rD] = ~a + 0xFFFFFFFF + ca; + PPCInterpreter_setXerOV(hCPU, checkAdditionOverflow(~a, 0xFFFFFFFF, hCPU->gpr[rD])); + // update xer carry + if (ppc_carry_3(~a, 0xFFFFFFFF, ca)) + hCPU->xer_ca = 1; + else + hCPU->xer_ca = 0; if (opcode & PPC_OPC_RC) ppc_update_cr0(hCPU, hCPU->gpr[rD]); PPCInterpreter_nextInstruction(hCPU); @@ -382,13 +415,8 @@ static void PPCInterpreter_MULHW_(PPCInterpreter_t* hCPU, uint32 opcode) sint64 b = (sint32)hCPU->gpr[rB]; sint64 c = a * b; hCPU->gpr[rD] = ((uint64)c) >> 32; - if (opcode & PPC_OPC_RC) { - // update cr0 flags -#ifndef PUBLIC_RELEASE - assert_dbg(); -#endif + if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); - } PPCInterpreter_nextInstruction(hCPU); } @@ -419,17 +447,9 @@ static void PPCInterpreter_MULLWO(PPCInterpreter_t* hCPU, uint32 opcode) // Don't Starve Giant Edition uses this instruction + BSO // also used by FullBlast when a save file exists + it uses mfxer to access overflow result PPC_OPC_TEMPL3_XO(); - sint64 result = (sint64)hCPU->gpr[rA] * (sint64)hCPU->gpr[rB]; + sint64 result = (sint64)(sint32)hCPU->gpr[rA] * (sint64)(sint32)hCPU->gpr[rB]; hCPU->gpr[rD] = (uint32)result; - if (result < -0x80000000ll && result > 0x7FFFFFFFLL) - { - hCPU->spr.XER |= XER_SO; - hCPU->spr.XER |= XER_OV; - } - else - { - hCPU->spr.XER &= ~XER_OV; - } + PPCInterpreter_setXerOV(hCPU, result < -0x80000000ll || result > 0x7FFFFFFFLL); if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); PPCInterpreter_nextInstruction(hCPU); @@ -447,14 +467,14 @@ static void PPCInterpreter_MULLI(PPCInterpreter_t* hCPU, uint32 opcode) static void PPCInterpreter_DIVW(PPCInterpreter_t* hCPU, uint32 opcode) { PPC_OPC_TEMPL3_XO(); - sint32 a = hCPU->gpr[rA]; - sint32 b = hCPU->gpr[rB]; + sint32 a = (sint32)hCPU->gpr[rA]; + sint32 b = (sint32)hCPU->gpr[rB]; if (b == 0) - { - forceLogDebug_printf("Error: Division by zero! [%08X]\n", (uint32)hCPU->instructionPointer); - b++; - } - hCPU->gpr[rD] = a / b; + hCPU->gpr[rD] = a < 0 ? 0xFFFFFFFF : 0; + else if (a == 0x80000000 && b == 0xFFFFFFFF) + hCPU->gpr[rD] = 0xFFFFFFFF; + else + hCPU->gpr[rD] = a / b; if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); PPCInterpreter_nextInstruction(hCPU); @@ -463,19 +483,23 @@ static void PPCInterpreter_DIVW(PPCInterpreter_t* hCPU, uint32 opcode) static void PPCInterpreter_DIVWO(PPCInterpreter_t* hCPU, uint32 opcode) { PPC_OPC_TEMPL3_XO(); - sint32 a = hCPU->gpr[rA]; - sint32 b = hCPU->gpr[rB]; + sint32 a = (sint32)hCPU->gpr[rA]; + sint32 b = (sint32)hCPU->gpr[rB]; if (b == 0) { - if (opcode & PPC_OPC_OE) - hCPU->spr.XER |= XER_OV; - PPCInterpreter_nextInstruction(hCPU); - return; + PPCInterpreter_setXerOV(hCPU, true); + hCPU->gpr[rD] = a < 0 ? 0xFFFFFFFF : 0; + } + else if(a == 0x80000000 && b == 0xFFFFFFFF) + { + PPCInterpreter_setXerOV(hCPU, true); + hCPU->gpr[rD] = 0xFFFFFFFF; + } + else + { + hCPU->gpr[rD] = a / b; + PPCInterpreter_setXerOV(hCPU, false); } - hCPU->gpr[rD] = a / b; - if (opcode & PPC_OPC_OE) - hCPU->spr.XER &= ~XER_OV; - // todo: Handle SO if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); PPCInterpreter_nextInstruction(hCPU); @@ -484,12 +508,14 @@ static void PPCInterpreter_DIVWO(PPCInterpreter_t* hCPU, uint32 opcode) static void PPCInterpreter_DIVWU(PPCInterpreter_t* hCPU, uint32 opcode) { PPC_OPC_TEMPL3_XO(); - if (hCPU->gpr[rB] == 0) - { - PPCInterpreter_nextInstruction(hCPU); - return; - } - hCPU->gpr[rD] = hCPU->gpr[rA] / hCPU->gpr[rB]; + uint32 a = hCPU->gpr[rA]; + uint32 b = hCPU->gpr[rB]; + if (b == 0) + hCPU->gpr[rD] = 0; + else if (a == 0x80000000 && b == 0xFFFFFFFF) + hCPU->gpr[rD] = 0; + else + hCPU->gpr[rD] = a / b; if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); PPCInterpreter_nextInstruction(hCPU); @@ -498,17 +524,23 @@ static void PPCInterpreter_DIVWU(PPCInterpreter_t* hCPU, uint32 opcode) static void PPCInterpreter_DIVWUO(PPCInterpreter_t* hCPU, uint32 opcode) { PPC_OPC_TEMPL3_XO(); - if (hCPU->gpr[rB] == 0) + uint32 a = hCPU->gpr[rA]; + uint32 b = hCPU->gpr[rB]; + if (b == 0) { - if (opcode & PPC_OPC_OE) - hCPU->spr.XER |= XER_OV; - PPCInterpreter_nextInstruction(hCPU); - return; + PPCInterpreter_setXerOV(hCPU, true); + hCPU->gpr[rD] = 0; + } + else if(a == 0x80000000 && b == 0xFFFFFFFF) + { + PPCInterpreter_setXerOV(hCPU, false); + hCPU->gpr[rD] = 0; + } + else + { + hCPU->gpr[rD] = a / b; + PPCInterpreter_setXerOV(hCPU, false); } - hCPU->gpr[rD] = hCPU->gpr[rA] / hCPU->gpr[rB]; - if (opcode & PPC_OPC_OE) - hCPU->spr.XER &= ~XER_OV; - // todo: Handle SO if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); PPCInterpreter_nextInstruction(hCPU); @@ -535,6 +567,13 @@ static void PPCInterpreter_CRANDC(PPCInterpreter_t* hCPU, uint32 opcode) PPCInterpreter_nextInstruction(hCPU); } +static void PPCInterpreter_CRNAND(PPCInterpreter_t* hCPU, uint32 opcode) +{ + PPC_OPC_TEMPL_X_CR(); + ppc_setCRBit(hCPU, crD, (ppc_getCRBit(hCPU, crA)&ppc_getCRBit(hCPU, crB)) ^ 1); + PPCInterpreter_nextInstruction(hCPU); +} + static void PPCInterpreter_CROR(PPCInterpreter_t* hCPU, uint32 opcode) { PPC_OPC_TEMPL_X_CR(); @@ -577,15 +616,7 @@ static void PPCInterpreter_NEGO(PPCInterpreter_t* hCPU, uint32 opcode) { PPC_OPC_TEMPL3_XO(); PPC_ASSERT(rB == 0); - if (hCPU->gpr[rA] == 0x80000000) - { - hCPU->spr.XER |= XER_SO; - hCPU->spr.XER |= XER_OV; - } - else - { - hCPU->spr.XER &= ~XER_OV; - } + PPCInterpreter_setXerOV(hCPU, hCPU->gpr[rA] == 0x80000000); hCPU->gpr[rD] = (uint32)-((sint32)hCPU->gpr[rA]); if (opHasRC()) ppc_update_cr0(hCPU, hCPU->gpr[rD]); @@ -900,8 +931,7 @@ static void PPCInterpreter_CMP(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->cr[cr * 4 + CR_BIT_GT] = 1; else hCPU->cr[cr * 4 + CR_BIT_EQ] = 1; - if ((hCPU->spr.XER & XER_SO) != 0) - hCPU->cr[cr * 4 + CR_BIT_SO] = 1; + hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so; PPCInterpreter_nextInstruction(hCPU); } @@ -923,8 +953,7 @@ static void PPCInterpreter_CMPL(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->cr[cr * 4 + CR_BIT_GT] = 1; else hCPU->cr[cr * 4 + CR_BIT_EQ] = 1; - if ((hCPU->spr.XER & XER_SO) != 0) - hCPU->cr[cr * 4 + CR_BIT_SO] = 1; + hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so; PPCInterpreter_nextInstruction(hCPU); } @@ -947,8 +976,7 @@ static void PPCInterpreter_CMPI(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->cr[cr * 4 + CR_BIT_GT] = 1; else hCPU->cr[cr * 4 + CR_BIT_EQ] = 1; - if (hCPU->spr.XER & XER_SO) - hCPU->cr[cr * 4 + CR_BIT_SO] = 1; + hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so; PPCInterpreter_nextInstruction(hCPU); } @@ -971,8 +999,7 @@ static void PPCInterpreter_CMPLI(PPCInterpreter_t* hCPU, uint32 opcode) hCPU->cr[cr * 4 + CR_BIT_GT] = 1; else hCPU->cr[cr * 4 + CR_BIT_EQ] = 1; - if (hCPU->spr.XER & XER_SO) - hCPU->cr[cr * 4 + CR_BIT_SO] = 1; + hCPU->cr[cr * 4 + CR_BIT_SO] = hCPU->xer_so; PPCInterpreter_nextInstruction(hCPU); } diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp index a33a1be6..2c99b84c 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterFPU.cpp @@ -32,7 +32,7 @@ espresso_frsqrte_entry_t frsqrteLookupTable[32] = {0x20c1000, 0x35e},{0x1f12000, 0x332},{0x1d79000, 0x30a},{0x1bf4000, 0x2e6}, }; -double frsqrte_espresso(double input) +ATTR_MS_ABI double frsqrte_espresso(double input) { unsigned long long x = *(unsigned long long*)&input; @@ -111,7 +111,7 @@ espresso_fres_entry_t fresLookupTable[32] = {0x88400, 0x11a}, {0x65000, 0x11a}, {0x41c00, 0x108}, {0x20c00, 0x106} }; -double fres_espresso(double input) +ATTR_MS_ABI double fres_espresso(double input) { // based on testing we know that fres uses only the first 15 bits of the mantissa // seee eeee eeee mmmm mmmm mmmm mmmx xxxx .... (s = sign, e = exponent, m = mantissa, x = not used) @@ -515,7 +515,7 @@ void PPCInterpreter_MTFSF(PPCInterpreter_t* hCPU, uint32 Opcode) static bool logFPSCRWriteOnce = false; if( logFPSCRWriteOnce == false ) { - forceLog_printf("Unsupported write to FPSCR\n"); + cemuLog_log(LogType::Force, "Unsupported write to FPSCR"); logFPSCRWriteOnce = true; } PPCInterpreter_nextInstruction(hCPU); @@ -697,4 +697,4 @@ void PPCInterpreter_FCMPU(PPCInterpreter_t* hCPU, uint32 Opcode) fcmpu_espresso(hCPU, crfD, hCPU->fpr[frA].fp0, hCPU->fpr[frB].fp0); PPCInterpreter_nextInstruction(hCPU); -} \ No newline at end of file +} diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterHLE.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterHLE.cpp index 6aa1fcfa..cf7ba195 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterHLE.cpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterHLE.cpp @@ -2,62 +2,70 @@ #include "PPCInterpreterInternal.h" #include "PPCInterpreterHelper.h" -std::unordered_set sUnsupportedHLECalls; +std::unordered_set s_unsupportedHLECalls; void PPCInterpreter_handleUnsupportedHLECall(PPCInterpreter_t* hCPU) { const char* libFuncName = (char*)memory_getPointerFromVirtualOffset(hCPU->instructionPointer + 8); std::string tempString = fmt::format("Unsupported lib call: {}", libFuncName); - if (sUnsupportedHLECalls.find(tempString) == sUnsupportedHLECalls.end()) + if (s_unsupportedHLECalls.find(tempString) == s_unsupportedHLECalls.end()) { cemuLog_log(LogType::UnsupportedAPI, "{}", tempString); - sUnsupportedHLECalls.emplace(tempString); + s_unsupportedHLECalls.emplace(tempString); } hCPU->gpr[3] = 0; PPCInterpreter_nextInstruction(hCPU); } -std::vector* sPPCHLETable{}; +static constexpr size_t HLE_TABLE_CAPACITY = 0x4000; +HLECALL s_ppcHleTable[HLE_TABLE_CAPACITY]{}; +sint32 s_ppcHleTableWriteIndex = 0; +std::mutex s_ppcHleTableMutex; -HLEIDX PPCInterpreter_registerHLECall(HLECALL hleCall) +HLEIDX PPCInterpreter_registerHLECall(HLECALL hleCall, std::string hleName) { - if (!sPPCHLETable) - sPPCHLETable = new std::vector(); - for (sint32 i = 0; i < sPPCHLETable->size(); i++) + std::unique_lock _l(s_ppcHleTableMutex); + if (s_ppcHleTableWriteIndex >= HLE_TABLE_CAPACITY) { - if ((*sPPCHLETable)[i] == hleCall) - return i; + cemuLog_log(LogType::Force, "HLE table is full"); + cemu_assert(false); } - HLEIDX newFuncIndex = (sint32)sPPCHLETable->size(); - sPPCHLETable->resize(sPPCHLETable->size() + 1); - (*sPPCHLETable)[newFuncIndex] = hleCall; - return newFuncIndex; + for (sint32 i = 0; i < s_ppcHleTableWriteIndex; i++) + { + if (s_ppcHleTable[i] == hleCall) + { + return i; + } + } + cemu_assert(s_ppcHleTableWriteIndex < HLE_TABLE_CAPACITY); + s_ppcHleTable[s_ppcHleTableWriteIndex] = hleCall; + HLEIDX funcIndex = s_ppcHleTableWriteIndex; + s_ppcHleTableWriteIndex++; + return funcIndex; } HLECALL PPCInterpreter_getHLECall(HLEIDX funcIndex) { - if (funcIndex < 0 || funcIndex >= sPPCHLETable->size()) + if (funcIndex < 0 || funcIndex >= HLE_TABLE_CAPACITY) return nullptr; - return sPPCHLETable->data()[funcIndex]; + return s_ppcHleTable[funcIndex]; } -std::mutex g_hleLogMutex; +std::mutex s_hleLogMutex; void PPCInterpreter_virtualHLE(PPCInterpreter_t* hCPU, unsigned int opcode) { uint32 hleFuncId = opcode & 0xFFFF; - if (hleFuncId == 0xFFD0) + if (hleFuncId == 0xFFD0) [[unlikely]] { - g_hleLogMutex.lock(); + s_hleLogMutex.lock(); PPCInterpreter_handleUnsupportedHLECall(hCPU); - g_hleLogMutex.unlock(); - return; + s_hleLogMutex.unlock(); } else { // os lib function - cemu_assert(hleFuncId < sPPCHLETable->size()); - auto hleCall = (*sPPCHLETable)[hleFuncId]; + auto hleCall = PPCInterpreter_getHLECall(hleFuncId); cemu_assert(hleCall); hleCall(hCPU); } diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterImpl.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterImpl.cpp index 6e3de535..547472ab 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterImpl.cpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterImpl.cpp @@ -1,6 +1,7 @@ #include "PPCInterpreterInternal.h" #include "PPCInterpreterHelper.h" #include "Cafe/HW/Espresso/Debugger/Debugger.h" +#include "Cafe/HW/Espresso/Debugger/GDBStub.h" class PPCItpCafeOSUsermode { @@ -139,7 +140,7 @@ public: return vAddr; } -#ifndef PUBLIC_RELEASE +#ifdef CEMU_DEBUG_ASSERT if (hCPU->memoryException) assert_dbg(); // should not be set anymore #endif @@ -236,7 +237,7 @@ public: lookupHash = ~lookupHash; } - forceLogDebug_printf("DSI exception at 0x%08x LR 0x%08x DataAddress %08x", hCPU->instructionPointer, hCPU->spr.LR, vAddr); + cemuLog_logDebug(LogType::Force, "DSI exception at 0x{:08x} DataAddress {:08x}", hCPU->instructionPointer, vAddr); generateDSIException(hCPU, vAddr); @@ -377,12 +378,12 @@ public: if (pAddr >= 0x01FFF000 && pAddr < 0x02000000) { debug_printf("Access u32 boot param block 0x%08x IP %08x LR %08x\n", pAddr, hCPU->instructionPointer, hCPU->spr.LR); - forceLogDebug_printf("Access u32 boot param block 0x%08x (org %08x) IP %08x LR %08x\n", pAddr, address, hCPU->instructionPointer, hCPU->spr.LR); + cemuLog_logDebug(LogType::Force, "Access u32 boot param block 0x{:08x} (org {:08x}) IP {:08x}", pAddr, address, hCPU->instructionPointer); } if (pAddr >= 0xFFEB73B0 && pAddr < (0xFFEB73B0+0x40C)) { debug_printf("Access cached u32 boot param block 0x%08x IP %08x LR %08x\n", pAddr, hCPU->instructionPointer, hCPU->spr.LR); - forceLogDebug_printf("Access cached u32 boot param block 0x%08x (org %08x) IP %08x LR %08x\n", pAddr, address, hCPU->instructionPointer, hCPU->spr.LR); + cemuLog_logDebug(LogType::Force, "Access cached u32 boot param block 0x{:08x} (org {:08x}) IP {:08x}", pAddr, address, hCPU->instructionPointer); } if (pAddr >= 0x0c000000 && pAddr < 0x0d100000) @@ -427,9 +428,6 @@ public: } }; -uint32 testIP[100]; -uint32 testIPC = 0; - template class PPCInterpreterContainer { @@ -456,7 +454,7 @@ public: { case 0: debug_printf("ZERO[NOP] | 0x%08X\n", (unsigned int)hCPU->instructionPointer); - #ifndef PUBLIC_RELEASE + #ifdef CEMU_DEBUG_ASSERT assert_dbg(); while (true) std::this_thread::sleep_for(std::chrono::seconds(1)); #endif @@ -465,6 +463,10 @@ public: case 1: // virtual HLE PPCInterpreter_virtualHLE(hCPU, opcode); break; + case 3: + cemuLog_logDebug(LogType::Force, "Unsupported TWI instruction executed at {:08x}", hCPU->instructionPointer); + PPCInterpreter_nextInstruction(hCPU); + break; case 4: switch (PPC_getBits(opcode, 30, 5)) { @@ -481,8 +483,9 @@ public: PPCInterpreter_PS_CMPU1(hCPU, opcode); break; default: - debug_printf("Unknown execute %04X as [4->0] at %08X\n", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer); + cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [4->0] at {:08x}", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer); cemu_assert_unimplemented(); + hCPU->instructionPointer += 4; break; } break; @@ -508,8 +511,9 @@ public: PPCInterpreter_PS_ABS(hCPU, opcode); break; default: - debug_printf("Unknown execute %04X as [4->8] at %08X\n", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer); + cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [4->8] at {:08x}", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer); cemu_assert_unimplemented(); + hCPU->instructionPointer += 4; break; } break; @@ -547,8 +551,9 @@ public: PPCInterpreter_PS_MERGE11(hCPU, opcode); break; default: - debug_printf("Unknown execute %04X as [4->16] at %08X\n", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer); - debugBreakpoint(); + cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [4->16] at {:08x}", PPC_getBits(opcode, 25, 5), hCPU->instructionPointer); + cemu_assert_unimplemented(); + hCPU->instructionPointer += 4; break; } break; @@ -589,8 +594,9 @@ public: PPCInterpreter_PS_NMADD(hCPU, opcode); break; default: - debug_printf("Unknown execute %04X as [4] at %08X\n", PPC_getBits(opcode, 30, 5), hCPU->instructionPointer); + cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [4] at {:08x}", PPC_getBits(opcode, 30, 5), hCPU->instructionPointer); cemu_assert_unimplemented(); + hCPU->instructionPointer += 4; break; } break; @@ -622,12 +628,15 @@ public: PPCInterpreter_BCX(hCPU, opcode); break; case 17: - if (PPC_getBits(opcode, 30, 1) == 1) { + if (PPC_getBits(opcode, 30, 1) == 1) + { PPCInterpreter_SC(hCPU, opcode); } - else { - debug_printf("Unsupported Opcode [0x17 --> 0x0]\n"); + else + { + cemuLog_logDebug(LogType::Force, "Unsupported Opcode [0x17 --> 0x0]"); cemu_assert_unimplemented(); + hCPU->instructionPointer += 4; } break; case 18: @@ -657,6 +666,9 @@ public: case 193: PPCInterpreter_CRXOR(hCPU, opcode); break; + case 225: + PPCInterpreter_CRNAND(hCPU, opcode); + break; case 257: PPCInterpreter_CRAND(hCPU, opcode); break; @@ -673,8 +685,9 @@ public: PPCInterpreter_BCCTR(hCPU, opcode); break; default: - debug_printf("Unknown execute %04X as [19] at %08X\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer); + cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [19] at {:08x}\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer); cemu_assert_unimplemented(); + hCPU->instructionPointer += 4; break; } break; @@ -712,9 +725,6 @@ public: PPCInterpreter_CMP(hCPU, opcode); break; case 4: - #ifndef PUBLIC_RELEASE - debug_printf("TW instruction executed at %08x\n", hCPU->instructionPointer); - #endif PPCInterpreter_TW(hCPU, opcode); break; case 8: @@ -894,6 +904,12 @@ public: case 522: PPCInterpreter_ADDCO(hCPU, opcode); break; + case 523: // 11 | OE + PPCInterpreter_MULHWU_(hCPU, opcode); // OE is ignored + break; + case 533: + PPCInterpreter_LSWX(hCPU, opcode); + break; case 534: PPCInterpreter_LWBRX(hCPU, opcode); break; @@ -912,6 +928,9 @@ public: case 567: PPCInterpreter_LFSUX(hCPU, opcode); break; + case 587: // 75 | OE + PPCInterpreter_MULHW_(hCPU, opcode); // OE is ignored for MULHW + break; case 595: PPCInterpreter_MFSR(hCPU, opcode); break; @@ -942,15 +961,30 @@ public: case 663: PPCInterpreter_STFSX(hCPU, opcode); break; + case 661: + PPCInterpreter_STSWX(hCPU, opcode); + break; case 695: PPCInterpreter_STFSUX(hCPU, opcode); break; + case 712: // 200 | OE + PPCInterpreter_SUBFZEO(hCPU, opcode); + break; + case 714: // 202 | OE + PPCInterpreter_ADDZEO(hCPU, opcode); + break; case 725: PPCInterpreter_STSWI(hCPU, opcode); break; case 727: PPCInterpreter_STFDX(hCPU, opcode); break; + case 744: // 232 | OE + PPCInterpreter_SUBFMEO(hCPU, opcode); + break; + case 746: // 234 | OE + PPCInterpreter_ADDMEO(hCPU, opcode); + break; case 747: PPCInterpreter_MULLWO(hCPU, opcode); break; @@ -997,10 +1031,8 @@ public: PPCInterpreter_DCBZ(hCPU, opcode); break; default: - debug_printf("Unknown execute %04X as [31] at %08X\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer); - #ifndef PUBLIC_RELEASE - assert_dbg(); - #endif + cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [31] at {:08x}\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer); + cemu_assert_unimplemented(); hCPU->instructionPointer += 4; break; } @@ -1083,7 +1115,7 @@ public: case 57: PPCInterpreter_PSQ_LU(hCPU, opcode); break; - case 59: //Opcode category + case 59: // opcode category switch (PPC_getBits(opcode, 30, 5)) { case 18: @@ -1114,8 +1146,9 @@ public: PPCInterpreter_FNMADDS(hCPU, opcode); break; default: - debug_printf("Unknown execute %04X as [59] at %08X\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer); + cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [59] at {:08x}\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer); cemu_assert_unimplemented(); + hCPU->instructionPointer += 4; break; } break; @@ -1194,18 +1227,19 @@ public: case 583: PPCInterpreter_MFFS(hCPU, opcode); break; - case 711: // IBM documentation has this wrong as 771? + case 711: PPCInterpreter_MTFSF(hCPU, opcode); break; default: - debug_printf("Unknown execute %04X as [63] at %08X\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer); + cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} as [63] at {:08x}\n", PPC_getBits(opcode, 30, 10), hCPU->instructionPointer); cemu_assert_unimplemented(); + PPCInterpreter_nextInstruction(hCPU); break; } } break; default: - debug_printf("Unknown execute %04X at %08X\n", PPC_getBits(opcode, 5, 6), (unsigned int)hCPU->instructionPointer); + cemuLog_logDebug(LogType::Force, "Unknown execute {:04x} at {:08x}\n", PPC_getBits(opcode, 5, 6), (unsigned int)hCPU->instructionPointer); cemu_assert_unimplemented(); } } diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h index bc8458d9..896fd21c 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h @@ -50,9 +50,9 @@ #define CR_BIT_EQ 2 #define CR_BIT_SO 3 -#define XER_SO (1<<31) // summary overflow bit -#define XER_OV (1<<30) // overflow bit #define XER_BIT_CA (29) // carry bit index. To accelerate frequent access, this bit is stored as a separate uint8 +#define XER_BIT_SO (31) // summary overflow, counterpart to CR SO +#define XER_BIT_OV (30) // FPSCR #define FPSCR_VXSNAN (1<<24) @@ -118,7 +118,8 @@ static inline void ppc_update_cr0(PPCInterpreter_t* hCPU, uint32 r) { - hCPU->cr[CR_BIT_SO] = (hCPU->spr.XER&XER_SO) ? 1 : 0; + cemu_assert_debug(hCPU->xer_so <= 1); + hCPU->cr[CR_BIT_SO] = hCPU->xer_so; hCPU->cr[CR_BIT_LT] = ((r != 0) ? 1 : 0) & ((r & 0x80000000) ? 1 : 0); hCPU->cr[CR_BIT_EQ] = (r == 0); hCPU->cr[CR_BIT_GT] = hCPU->cr[CR_BIT_EQ] ^ hCPU->cr[CR_BIT_LT] ^ 1; // this works because EQ and LT can never be set at the same time. So the only case where GT becomes 1 is when LT=0 and EQ=0 @@ -190,8 +191,8 @@ inline double roundTo25BitAccuracy(double d) return *(double*)&v; } -double fres_espresso(double input); -double frsqrte_espresso(double input); +ATTR_MS_ABI double fres_espresso(double input); +ATTR_MS_ABI double frsqrte_espresso(double input); void fcmpu_espresso(PPCInterpreter_t* hCPU, int crfD, double a, double b); diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp index 694e05e6..ea7bb038 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterLoadStore.hpp @@ -31,7 +31,7 @@ static void PPCInterpreter_STW(PPCInterpreter_t* hCPU, uint32 Opcode) static void PPCInterpreter_STWU(PPCInterpreter_t* hCPU, uint32 Opcode) { - int rA, rS; + sint32 rA, rS; uint32 imm; PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm); ppcItpCtrl::ppcMem_writeDataU32(hCPU, hCPU->gpr[rA] + imm, hCPU->gpr[rS]); @@ -42,7 +42,7 @@ static void PPCInterpreter_STWU(PPCInterpreter_t* hCPU, uint32 Opcode) static void PPCInterpreter_STWX(PPCInterpreter_t* hCPU, uint32 Opcode) { - int rA, rS, rB; + sint32 rA, rS, rB; PPC_OPC_TEMPL_X(Opcode, rS, rA, rB); ppcItpCtrl::ppcMem_writeDataU32(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], hCPU->gpr[rS]); PPCInterpreter_nextInstruction(hCPU); @@ -85,7 +85,8 @@ static void PPCInterpreter_STWCX(PPCInterpreter_t* hCPU, uint32 Opcode) ppc_setCRBit(hCPU, CR_BIT_GT, 0); ppc_setCRBit(hCPU, CR_BIT_EQ, 1); } - ppc_setCRBit(hCPU, CR_BIT_SO, (hCPU->spr.XER&XER_SO) != 0 ? 1 : 0); + cemu_assert_debug(hCPU->xer_so <= 1); + ppc_setCRBit(hCPU, CR_BIT_SO, hCPU->xer_so); // remove reservation hCPU->reservedMemAddr = 0; hCPU->reservedMemValue = 0; @@ -102,7 +103,7 @@ static void PPCInterpreter_STWCX(PPCInterpreter_t* hCPU, uint32 Opcode) static void PPCInterpreter_STWUX(PPCInterpreter_t* hCPU, uint32 Opcode) { - int rA, rS, rB; + sint32 rA, rS, rB; PPC_OPC_TEMPL_X(Opcode, rS, rA, rB); ppcItpCtrl::ppcMem_writeDataU32(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], hCPU->gpr[rS]); if (rA) @@ -112,7 +113,7 @@ static void PPCInterpreter_STWUX(PPCInterpreter_t* hCPU, uint32 Opcode) static void PPCInterpreter_STWBRX(PPCInterpreter_t* hCPU, uint32 Opcode) { - int rA, rS, rB; + sint32 rA, rS, rB; PPC_OPC_TEMPL_X(Opcode, rS, rA, rB); ppcItpCtrl::ppcMem_writeDataU32(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], _swapEndianU32(hCPU->gpr[rS])); PPCInterpreter_nextInstruction(hCPU); @@ -120,7 +121,7 @@ static void PPCInterpreter_STWBRX(PPCInterpreter_t* hCPU, uint32 Opcode) static void PPCInterpreter_STMW(PPCInterpreter_t* hCPU, uint32 Opcode) { - int rS, rA; + sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm); uint32 ea = (rA ? hCPU->gpr[rA] : 0) + imm; @@ -135,7 +136,7 @@ static void PPCInterpreter_STMW(PPCInterpreter_t* hCPU, uint32 Opcode) static void PPCInterpreter_STH(PPCInterpreter_t* hCPU, uint32 Opcode) { - int rA, rS; + sint32 rA, rS; uint32 imm; PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm); ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + imm, (uint16)hCPU->gpr[rS]); @@ -144,7 +145,7 @@ static void PPCInterpreter_STH(PPCInterpreter_t* hCPU, uint32 Opcode) static void PPCInterpreter_STHU(PPCInterpreter_t* hCPU, uint32 Opcode) { - int rA, rS; + sint32 rA, rS; uint32 imm; PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm); ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + imm, (uint16)hCPU->gpr[rS]); @@ -155,7 +156,7 @@ static void PPCInterpreter_STHU(PPCInterpreter_t* hCPU, uint32 Opcode) static void PPCInterpreter_STHX(PPCInterpreter_t* hCPU, uint32 Opcode) { - int rA, rS, rB; + sint32 rA, rS, rB; PPC_OPC_TEMPL_X(Opcode, rS, rA, rB); ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], (uint16)hCPU->gpr[rS]); PPCInterpreter_nextInstruction(hCPU); @@ -163,7 +164,7 @@ static void PPCInterpreter_STHX(PPCInterpreter_t* hCPU, uint32 Opcode) static void PPCInterpreter_STHUX(PPCInterpreter_t* hCPU, uint32 Opcode) { - int rA, rS, rB; + sint32 rA, rS, rB; PPC_OPC_TEMPL_X(Opcode, rS, rA, rB); ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], (uint16)hCPU->gpr[rS]); if (rA) @@ -173,7 +174,7 @@ static void PPCInterpreter_STHUX(PPCInterpreter_t* hCPU, uint32 Opcode) static void PPCInterpreter_STHBRX(PPCInterpreter_t* hCPU, uint32 Opcode) { - int rA, rS, rB; + sint32 rA, rS, rB; PPC_OPC_TEMPL_X(Opcode, rS, rA, rB); ppcItpCtrl::ppcMem_writeDataU16(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], _swapEndianU16((uint16)hCPU->gpr[rS])); PPCInterpreter_nextInstruction(hCPU); @@ -181,7 +182,7 @@ static void PPCInterpreter_STHBRX(PPCInterpreter_t* hCPU, uint32 Opcode) static void PPCInterpreter_STB(PPCInterpreter_t* hCPU, uint32 Opcode) { - int rA, rS; + sint32 rA, rS; uint32 imm; PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm); ppcItpCtrl::ppcMem_writeDataU8(hCPU, (rA ? hCPU->gpr[rA] : 0) + imm, (uint8)hCPU->gpr[rS]); @@ -190,7 +191,7 @@ static void PPCInterpreter_STB(PPCInterpreter_t* hCPU, uint32 Opcode) static void PPCInterpreter_STBU(PPCInterpreter_t* hCPU, uint32 Opcode) { - int rA, rS; + sint32 rA, rS; uint32 imm; PPC_OPC_TEMPL_D_SImm(Opcode, rS, rA, imm); ppcItpCtrl::ppcMem_writeDataU8(hCPU, hCPU->gpr[rA] + imm, (uint8)hCPU->gpr[rS]); @@ -200,7 +201,7 @@ static void PPCInterpreter_STBU(PPCInterpreter_t* hCPU, uint32 Opcode) static void PPCInterpreter_STBX(PPCInterpreter_t* hCPU, uint32 Opcode) { - int rA, rS, rB; + sint32 rA, rS, rB; PPC_OPC_TEMPL_X(Opcode, rS, rA, rB); ppcItpCtrl::ppcMem_writeDataU8(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], (uint8)hCPU->gpr[rS]); PPCInterpreter_nextInstruction(hCPU); @@ -208,7 +209,7 @@ static void PPCInterpreter_STBX(PPCInterpreter_t* hCPU, uint32 Opcode) static void PPCInterpreter_STBUX(PPCInterpreter_t* hCPU, uint32 Opcode) { - int rA, rS, rB; + sint32 rA, rS, rB; PPC_OPC_TEMPL_X(Opcode, rS, rA, rB); ppcItpCtrl::ppcMem_writeDataU8(hCPU, (rA ? hCPU->gpr[rA] : 0) + hCPU->gpr[rB], (uint8)hCPU->gpr[rS]); if (rA) @@ -218,7 +219,7 @@ static void PPCInterpreter_STBUX(PPCInterpreter_t* hCPU, uint32 Opcode) static void PPCInterpreter_STSWI(PPCInterpreter_t* hCPU, uint32 Opcode) { - int rA, rS, nb; + sint32 rA, rS, nb; PPC_OPC_TEMPL_X(Opcode, rS, rA, nb); if (nb == 0) nb = 32; uint32 ea = rA ? hCPU->gpr[rA] : 0; @@ -228,7 +229,39 @@ static void PPCInterpreter_STSWI(PPCInterpreter_t* hCPU, uint32 Opcode) { if (i == 0) { - r = hCPU->gpr[rS]; + r = rS < 32 ? hCPU->gpr[rS] : 0; // what happens if rS is out of bounds? + rS++; + rS %= 32; + i = 4; + } + ppcItpCtrl::ppcMem_writeDataU8(hCPU, ea, (r >> 24)); + r <<= 8; + ea++; + i--; + nb--; + } + PPCInterpreter_nextInstruction(hCPU); +} + +static void PPCInterpreter_STSWX(PPCInterpreter_t* hCPU, uint32 Opcode) +{ + sint32 rA, rS, rB; + PPC_OPC_TEMPL_X(Opcode, rS, rA, rB); + sint32 nb = hCPU->spr.XER&0x7F; + if (nb == 0) + { + PPCInterpreter_nextInstruction(hCPU); + return; + } + uint32 ea = rA ? hCPU->gpr[rA] : 0; + ea += hCPU->gpr[rB]; + uint32 r = 0; + int i = 0; + while (nb > 0) + { + if (i == 0) + { + r = rS < 32 ? hCPU->gpr[rS] : 0; // what happens if rS is out of bounds? rS++; rS %= 32; i = 4; @@ -459,7 +492,6 @@ static void PPCInterpreter_LSWI(PPCInterpreter_t* hCPU, uint32 Opcode) PPC_OPC_TEMPL_X(Opcode, rD, rA, nb); if (nb == 0) nb = 32; - uint32 ea = rA ? hCPU->gpr[rA] : 0; uint32 r = 0; int i = 4; @@ -469,7 +501,8 @@ static void PPCInterpreter_LSWI(PPCInterpreter_t* hCPU, uint32 Opcode) if (i == 0) { i = 4; - hCPU->gpr[rD] = r; + if(rD < 32) + hCPU->gpr[rD] = r; rD++; rD %= 32; r = 0; @@ -486,7 +519,52 @@ static void PPCInterpreter_LSWI(PPCInterpreter_t* hCPU, uint32 Opcode) r <<= 8; i--; } - hCPU->gpr[rD] = r; + if(rD < 32) + hCPU->gpr[rD] = r; + PPCInterpreter_nextInstruction(hCPU); +} + +static void PPCInterpreter_LSWX(PPCInterpreter_t* hCPU, uint32 Opcode) +{ + sint32 rA, rD, rB; + PPC_OPC_TEMPL_X(Opcode, rD, rA, rB); + // byte count comes from XER + uint32 nb = (hCPU->spr.XER>>0)&0x7F; + if (nb == 0) + { + PPCInterpreter_nextInstruction(hCPU); + return; // no-op + } + uint32 ea = rA ? hCPU->gpr[rA] : 0; + ea += hCPU->gpr[rB]; + uint32 r = 0; + int i = 4; + uint8 v; + while (nb>0) + { + if (i == 0) + { + i = 4; + if(rD < 32) + hCPU->gpr[rD] = r; + rD++; + rD %= 32; + r = 0; + } + v = ppcItpCtrl::ppcMem_readDataU8(hCPU, ea); + r <<= 8; + r |= v; + ea++; + i--; + nb--; + } + while (i) + { + r <<= 8; + i--; + } + if(rD < 32) + hCPU->gpr[rD] = r; PPCInterpreter_nextInstruction(hCPU); } diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp index 2d808fef..4449f135 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterMain.cpp @@ -6,7 +6,6 @@ thread_local PPCInterpreter_t* ppcInterpreterCurrentInstance; // main thread instruction counter and timing -volatile uint64 ppcMainThreadCycleCounter = 0; uint64 ppcMainThreadDECCycleValue = 0; // value that was set to dec register uint64 ppcMainThreadDECCycleStart = 0; // at which cycle the dec register was set, if == 0 -> dec is 0 uint64 ppcCyclesSince2000 = 0; @@ -29,11 +28,16 @@ PPCInterpreter_t* PPCInterpreter_createInstance(unsigned int Entrypoint) return pData; } -PPCInterpreter_t* PPCInterpreter_getCurrentInstance() +TLS_WORKAROUND_NOINLINE PPCInterpreter_t* PPCInterpreter_getCurrentInstance() { return ppcInterpreterCurrentInstance; } +TLS_WORKAROUND_NOINLINE void PPCInterpreter_setCurrentInstance(PPCInterpreter_t* hCPU) +{ + ppcInterpreterCurrentInstance = hCPU; +} + uint64 PPCInterpreter_getMainCoreCycleCounter() { return PPCTimer_getFromRDTSC(); @@ -59,16 +63,25 @@ void PPCInterpreter_setDEC(PPCInterpreter_t* hCPU, uint32 newValue) uint32 PPCInterpreter_getXER(PPCInterpreter_t* hCPU) { uint32 xerValue = hCPU->spr.XER; - xerValue &= ~(1<xer_ca ) - xerValue |= (1<xer_ca) + xerValue |= (1 << XER_BIT_CA); + if (hCPU->xer_so) + xerValue |= (1 << XER_BIT_SO); + if (hCPU->xer_ov) + xerValue |= (1 << XER_BIT_OV); return xerValue; } void PPCInterpreter_setXER(PPCInterpreter_t* hCPU, uint32 v) { - hCPU->spr.XER = v; - hCPU->xer_ca = (v>>XER_BIT_CA)&1; + const uint32 XER_MASK = 0xE0FFFFFF; // some bits are masked out. Figure out which ones exactly + hCPU->spr.XER = v & XER_MASK; + hCPU->xer_ca = (v >> XER_BIT_CA) & 1; + hCPU->xer_so = (v >> XER_BIT_SO) & 1; + hCPU->xer_ov = (v >> XER_BIT_OV) & 1; } uint32 PPCInterpreter_getCoreIndex(PPCInterpreter_t* hCPU) @@ -78,24 +91,25 @@ uint32 PPCInterpreter_getCoreIndex(PPCInterpreter_t* hCPU) uint32 PPCInterpreter_getCurrentCoreIndex() { - return ppcInterpreterCurrentInstance->spr.UPIR; + return PPCInterpreter_getCurrentInstance()->spr.UPIR; }; uint8* PPCInterpreterGetStackPointer() { - return memory_getPointerFromVirtualOffset(ppcInterpreterCurrentInstance->gpr[1]); + return memory_getPointerFromVirtualOffset(PPCInterpreter_getCurrentInstance()->gpr[1]); } -uint8* PPCInterpreterGetAndModifyStackPointer(sint32 offset) +uint8* PPCInterpreter_PushAndReturnStackPointer(sint32 offset) { - uint8* result = memory_getPointerFromVirtualOffset(ppcInterpreterCurrentInstance->gpr[1] - offset); - ppcInterpreterCurrentInstance->gpr[1] -= offset; + PPCInterpreter_t* hCPU = PPCInterpreter_getCurrentInstance(); + uint8* result = memory_getPointerFromVirtualOffset(hCPU->gpr[1] - offset); + hCPU->gpr[1] -= offset; return result; } void PPCInterpreterModifyStackPointer(sint32 offset) { - ppcInterpreterCurrentInstance->gpr[1] -= offset; + PPCInterpreter_getCurrentInstance()->gpr[1] -= offset; } uint32 RPLLoader_MakePPCCallable(void(*ppcCallableExport)(PPCInterpreter_t* hCPU)); diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp index 7178eaeb..7809a01d 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.cpp @@ -5,14 +5,13 @@ #include "Cafe/OS/libs/coreinit/coreinit_CodeGen.h" #include "../Recompiler/PPCRecompiler.h" -#include "../Recompiler/PPCRecompilerX64.h" #include #include "Cafe/HW/Latte/Core/LatteBufferCache.h" void PPCInterpreter_MFMSR(PPCInterpreter_t* hCPU, uint32 Opcode) { - forceLogDebug_printf("Rare instruction: MFMSR"); + cemuLog_logDebug(LogType::Force, "Rare instruction: MFMSR"); if (hCPU->sprExtended.msr & MSR_PR) { PPC_ASSERT(true); @@ -28,7 +27,7 @@ void PPCInterpreter_MFMSR(PPCInterpreter_t* hCPU, uint32 Opcode) void PPCInterpreter_MTMSR(PPCInterpreter_t* hCPU, uint32 Opcode) { - forceLogDebug_printf("Rare instruction: MTMSR"); + cemuLog_logDebug(LogType::Force, "Rare instruction: MTMSR"); if (hCPU->sprExtended.msr & MSR_PR) { PPC_ASSERT(true); @@ -43,7 +42,7 @@ void PPCInterpreter_MTMSR(PPCInterpreter_t* hCPU, uint32 Opcode) void PPCInterpreter_MTFSB1X(PPCInterpreter_t* hCPU, uint32 Opcode) { - forceLogDebug_printf("Rare instruction: MTFSB1X"); + cemuLog_logDebug(LogType::Force, "Rare instruction: MTFSB1X"); int crbD, n1, n2; PPC_OPC_TEMPL_X(Opcode, crbD, n1, n2); if (crbD != 1 && crbD != 2) @@ -94,7 +93,6 @@ void PPCInterpreter_MTCRF(PPCInterpreter_t* hCPU, uint32 Opcode) { // frequently used by GCC compiled code (e.g. SM64 port) // tested - uint32 rS; uint32 crfMask; PPC_OPC_TEMPL_XFX(Opcode, rS, crfMask); @@ -212,7 +210,7 @@ void PPCInterpreter_BCLRX(PPCInterpreter_t* hCPU, uint32 Opcode) if (hCPU->spr.CTR == 0) { PPC_ASSERT(true); - forceLogDebug_printf("Decrementer underflow!\n"); + cemuLog_logDebug(LogType::Force, "Decrementer underflow!"); } hCPU->spr.CTR--; } @@ -331,7 +329,7 @@ void PPCInterpreter_EIEIO(PPCInterpreter_t* hCPU, uint32 Opcode) void PPCInterpreter_SC(PPCInterpreter_t* hCPU, uint32 Opcode) { - forceLogDebug_printf("SC executed at 0x%08x", hCPU->instructionPointer); + cemuLog_logDebug(LogType::Force, "SC executed at 0x{:08x}", hCPU->instructionPointer); // next instruction PPCInterpreter_nextInstruction(hCPU); } @@ -352,7 +350,7 @@ void PPCInterpreter_ISYNC(PPCInterpreter_t* hCPU, uint32 Opcode) void PPCInterpreter_RFI(PPCInterpreter_t* hCPU, uint32 Opcode) { - forceLogDebug_printf("RFI"); + cemuLog_logDebug(LogType::Force, "RFI"); hCPU->sprExtended.msr &= ~(0x87C0FF73 | 0x00040000); hCPU->sprExtended.msr |= hCPU->sprExtended.srr1 & 0x87c0ff73; hCPU->sprExtended.msr |= MSR_RI; diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.hpp index 4da41590..9bfcd53d 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.hpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterOPC.hpp @@ -65,9 +65,14 @@ static void PPCInterpreter_MFTB(PPCInterpreter_t* hCPU, uint32 opcode) static void PPCInterpreter_TW(PPCInterpreter_t* hCPU, uint32 opcode) { sint32 to, rA, rB; - PPC_OPC_TEMPL_X(opcode, to, rB, rA); + PPC_OPC_TEMPL_X(opcode, to, rA, rB); cemu_assert_debug(to == 0); + if(to != 0) + PPCInterpreter_nextInstruction(hCPU); - debugger_enterTW(hCPU); + if (rA == DEBUGGER_BP_T_DEBUGGER) + debugger_enterTW(hCPU); + else if (rA == DEBUGGER_BP_T_GDBSTUB) + g_gdbstub->HandleTrapInstruction(hCPU); } diff --git a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterSPR.hpp b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterSPR.hpp index 819f317a..2d38e728 100644 --- a/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterSPR.hpp +++ b/src/Cafe/HW/Espresso/Interpreter/PPCInterpreterSPR.hpp @@ -559,7 +559,7 @@ static void PPCSprSupervisor_set(PPCInterpreter_t* hCPU, uint32 spr, uint32 newV break; default: debug_printf("[C%d] Set unhandled SPR 0x%x to %08x (supervisor mode)\n", hCPU->spr.UPIR, spr, newValue); -#ifndef PUBLIC_RELEASE +#ifdef CEMU_DEBUG_ASSERT assert_dbg(); #endif break; @@ -598,7 +598,7 @@ static void PPCSpr_set(PPCInterpreter_t* hCPU, uint32 spr, uint32 newValue) break; default: debug_printf("[C%d] Set unhandled SPR %d to %08x\n", hCPU->spr.UPIR, spr, newValue); -#ifndef PUBLIC_RELEASE +#ifdef CEMU_DEBUG_ASSERT assert_dbg(); #endif break; @@ -782,7 +782,7 @@ static uint32 PPCSprSupervisor_get(PPCInterpreter_t* hCPU, uint32 spr) break; default: debug_printf("[C%d] Get unhandled SPR %d\n", hCPU->spr.UPIR, spr); -#ifndef PUBLIC_RELEASE +#ifdef CEMU_DEBUG_ASSERT assert_dbg(); #endif break; @@ -840,7 +840,7 @@ static uint32 PPCSpr_get(PPCInterpreter_t* hCPU, uint32 spr) break; default: debug_printf("[C%d] Get unhandled SPR %d\n", hCPU->spr.UPIR, spr); -#ifndef PUBLIC_RELEASE +#ifdef CEMU_DEBUG_ASSERT assert_dbg(); #endif break; diff --git a/src/Cafe/HW/Espresso/PPCCallback.h b/src/Cafe/HW/Espresso/PPCCallback.h index 0771020d..3d5393b1 100644 --- a/src/Cafe/HW/Espresso/PPCCallback.h +++ b/src/Cafe/HW/Espresso/PPCCallback.h @@ -5,8 +5,28 @@ struct PPCCoreCallbackData_t { sint32 gprCount = 0; sint32 floatCount = 0; + sint32 stackCount = 0; }; +inline void _PPCCoreCallback_writeGPRArg(PPCCoreCallbackData_t& data, PPCInterpreter_t* hCPU, uint32 value) +{ + if (data.gprCount < 8) + { + hCPU->gpr[3 + data.gprCount] = value; + data.gprCount++; + } + else + { + uint32 stackOffset = 8 + data.stackCount * 4; + + // PPCCore_executeCallbackInternal does -16*4 to save the current stack area + stackOffset -= 16 * 4; + + memory_writeU32(hCPU->gpr[1] + stackOffset, value); + data.stackCount++; + } +} + // callback functions inline uint32 PPCCoreCallback(MPTR function, const PPCCoreCallbackData_t& data) { @@ -16,44 +36,42 @@ inline uint32 PPCCoreCallback(MPTR function, const PPCCoreCallbackData_t& data) template uint32 PPCCoreCallback(MPTR function, PPCCoreCallbackData_t& data, T currentArg, TArgs... args) { - cemu_assert_debug(data.gprCount <= 8); - cemu_assert_debug(data.floatCount <= 8); + // TODO float arguments on stack + cemu_assert_debug(data.floatCount < 8); + + PPCInterpreter_t* hCPU = PPCInterpreter_getCurrentInstance(); if constexpr (std::is_pointer_v) { - ppcInterpreterCurrentInstance->gpr[3 + data.gprCount] = MEMPTR(currentArg).GetMPTR(); - data.gprCount++; + _PPCCoreCallback_writeGPRArg(data, hCPU, MEMPTR(currentArg).GetMPTR()); } else if constexpr (std::is_base_of_v>) { - ppcInterpreterCurrentInstance->gpr[3 + data.gprCount] = currentArg.GetMPTR(); - data.gprCount++; + _PPCCoreCallback_writeGPRArg(data, hCPU, currentArg.GetMPTR()); } else if constexpr (std::is_reference_v) { - ppcInterpreterCurrentInstance->gpr[3 + data.gprCount] = MEMPTR(¤tArg).GetMPTR(); - data.gprCount++; + _PPCCoreCallback_writeGPRArg(data, hCPU, MEMPTR(¤tArg).GetMPTR()); } else if constexpr(std::is_enum_v) { using TEnum = typename std::underlying_type::type; - return PPCCoreCallback(function, data, (TEnum)currentArg, std::forward(args)...); + return PPCCoreCallback(function, data, (TEnum)currentArg, std::forward(args)...); } else if constexpr (std::is_floating_point_v) { - ppcInterpreterCurrentInstance->fpr[1 + data.floatCount].fpr = (double)currentArg; + hCPU->fpr[1 + data.floatCount].fpr = (double)currentArg; data.floatCount++; } else if constexpr (std::is_integral_v && sizeof(T) == sizeof(uint64)) { - ppcInterpreterCurrentInstance->gpr[3 + data.gprCount] = (uint32)(currentArg >> 32); // high - ppcInterpreterCurrentInstance->gpr[3 + data.gprCount + 1] = (uint32)currentArg; // low + hCPU->gpr[3 + data.gprCount] = (uint32)(currentArg >> 32); // high + hCPU->gpr[3 + data.gprCount + 1] = (uint32)currentArg; // low data.gprCount += 2; } else { - ppcInterpreterCurrentInstance->gpr[3 + data.gprCount] = (uint32)currentArg; - data.gprCount++; + _PPCCoreCallback_writeGPRArg(data, hCPU, (uint32)currentArg); } return PPCCoreCallback(function, data, args...); diff --git a/src/Cafe/HW/Espresso/PPCScheduler.cpp b/src/Cafe/HW/Espresso/PPCScheduler.cpp index ab662150..a4c04aaa 100644 --- a/src/Cafe/HW/Espresso/PPCScheduler.cpp +++ b/src/Cafe/HW/Espresso/PPCScheduler.cpp @@ -11,21 +11,24 @@ uint32 ppcThreadQuantum = 45000; // execute 45000 instructions before thread res void PPCInterpreter_relinquishTimeslice() { - if( ppcInterpreterCurrentInstance->remainingCycles >= 0 ) + PPCInterpreter_t* hCPU = PPCInterpreter_getCurrentInstance(); + if( hCPU->remainingCycles >= 0 ) { - ppcInterpreterCurrentInstance->skippedCycles = ppcInterpreterCurrentInstance->remainingCycles + 1; - ppcInterpreterCurrentInstance->remainingCycles = -1; + hCPU->skippedCycles = hCPU->remainingCycles + 1; + hCPU->remainingCycles = -1; } } void PPCCore_boostQuantum(sint32 numCycles) { - ppcInterpreterCurrentInstance->remainingCycles += numCycles; + PPCInterpreter_t* hCPU = PPCInterpreter_getCurrentInstance(); + hCPU->remainingCycles += numCycles; } void PPCCore_deboostQuantum(sint32 numCycles) { - ppcInterpreterCurrentInstance->remainingCycles -= numCycles; + PPCInterpreter_t* hCPU = PPCInterpreter_getCurrentInstance(); + hCPU->remainingCycles -= numCycles; } namespace coreinit @@ -36,7 +39,7 @@ namespace coreinit void PPCCore_switchToScheduler() { cemu_assert_debug(__OSHasSchedulerLock() == false); // scheduler lock must not be hold past thread time slice - cemu_assert_debug(ppcInterpreterCurrentInstance->coreInterruptMask != 0 || CafeSystem::GetForegroundTitleId() == 0x000500001019e600); + cemu_assert_debug(PPCInterpreter_getCurrentInstance()->coreInterruptMask != 0 || CafeSystem::GetForegroundTitleId() == 0x000500001019e600); __OSLockScheduler(); coreinit::__OSThreadSwitchToNext(); __OSUnlockScheduler(); @@ -45,7 +48,7 @@ void PPCCore_switchToScheduler() void PPCCore_switchToSchedulerWithLock() { cemu_assert_debug(__OSHasSchedulerLock() == true); // scheduler lock must be hold - cemu_assert_debug(ppcInterpreterCurrentInstance->coreInterruptMask != 0 || CafeSystem::GetForegroundTitleId() == 0x000500001019e600); + cemu_assert_debug(PPCInterpreter_getCurrentInstance()->coreInterruptMask != 0 || CafeSystem::GetForegroundTitleId() == 0x000500001019e600); coreinit::__OSThreadSwitchToNext(); } @@ -58,7 +61,7 @@ void _PPCCore_callbackExit(PPCInterpreter_t* hCPU) PPCInterpreter_t* PPCCore_executeCallbackInternal(uint32 functionMPTR) { cemu_assert_debug(functionMPTR != 0); - PPCInterpreter_t* hCPU = ppcInterpreterCurrentInstance; + PPCInterpreter_t* hCPU = PPCInterpreter_getCurrentInstance(); // remember LR and instruction pointer uint32 lr = hCPU->spr.LR; uint32 ip = hCPU->instructionPointer; @@ -100,11 +103,6 @@ PPCInterpreter_t* PPCCore_executeCallbackInternal(uint32 functionMPTR) return hCPU; } -void PPCCore_deleteAllThreads() -{ - assert_dbg(); -} - void PPCCore_init() { } diff --git a/src/Cafe/HW/Espresso/PPCSchedulerLLE.cpp b/src/Cafe/HW/Espresso/PPCSchedulerLLE.cpp index 4732586a..bb4bf9ff 100644 --- a/src/Cafe/HW/Espresso/PPCSchedulerLLE.cpp +++ b/src/Cafe/HW/Espresso/PPCSchedulerLLE.cpp @@ -163,7 +163,7 @@ void smdpArea_processCommand(smdpArea_t* smdpArea, smdpCommand_t* cmd) { cmd->ukn08 = 1; // cmd->ukn2C ? - forceLogDebug_printf("SMDP command received - todo"); + cemuLog_logDebug(LogType::Force, "SMDP command received - todo"); smdpArea_pushResult(smdpArea, memory_getVirtualOffsetFromPointer(cmd)); } else @@ -220,7 +220,7 @@ void PPCCoreLLE_startSingleCoreScheduler(uint32 entrypoint) for (uint32 coreIndex = 0; coreIndex < 3; coreIndex++) { PPCInterpreter_t* hCPU = cpuContext->cores+coreIndex; - ppcInterpreterCurrentInstance = hCPU; + PPCInterpreter_setCurrentInstance(hCPU); if (coreIndex == 1) { // check SCR core 1 enable bit @@ -242,4 +242,4 @@ void PPCCoreLLE_startSingleCoreScheduler(uint32 entrypoint) } } assert_dbg(); -} \ No newline at end of file +} diff --git a/src/Cafe/HW/Espresso/PPCState.h b/src/Cafe/HW/Espresso/PPCState.h index 540bbd4e..fd943d39 100644 --- a/src/Cafe/HW/Espresso/PPCState.h +++ b/src/Cafe/HW/Espresso/PPCState.h @@ -49,12 +49,12 @@ struct PPCInterpreter_t uint32 fpscr; uint8 cr[32]; // 0 -> bit not set, 1 -> bit set (upper 7 bits of each byte must always be zero) (cr0 starts at index 0, cr1 at index 4 ..) uint8 xer_ca; // carry from xer - uint8 LSQE; - uint8 PSE; + uint8 xer_so; + uint8 xer_ov; // thread remaining cycles sint32 remainingCycles; // if this value goes below zero, the next thread is scheduled sint32 skippedCycles; // number of skipped cycles - struct + struct { uint32 LR; uint32 CTR; @@ -65,12 +65,12 @@ struct PPCInterpreter_t // LWARX and STWCX uint32 reservedMemAddr; uint32 reservedMemValue; - /* Note: Everything above is potentially hardcoded into Cemuhook. Do not touch anything or it will risk breaking compatibility */ // temporary storage for recompiler FPR_t temporaryFPR[8]; - uint32 temporaryGPR[4]; + uint32 temporaryGPR[4]; // deprecated, refactor backend dependency on this away + uint32 temporaryGPR_reg[4]; // values below this are not used by Cafe OS usermode - struct + struct { uint32 fpecr; // is this the same register as fpscr ? uint32 DEC; @@ -85,7 +85,7 @@ struct PPCInterpreter_t // DMA uint32 dmaU; uint32 dmaL; - // MMU + // MMU uint32 dbatU[8]; uint32 dbatL[8]; uint32 ibatU[8]; @@ -93,6 +93,8 @@ struct PPCInterpreter_t uint32 sr[16]; uint32 sdr1; }sprExtended; + uint8 LSQE; + uint8 PSE; // global CPU values PPCInterpreterGlobal_t* global; // interpreter control @@ -150,6 +152,7 @@ static uint64 PPCInterpreter_getCallParamU64(PPCInterpreter_t* hCPU, uint32 inde PPCInterpreter_t* PPCInterpreter_createInstance(unsigned int Entrypoint); PPCInterpreter_t* PPCInterpreter_getCurrentInstance(); +void PPCInterpreter_setCurrentInstance(PPCInterpreter_t* hCPU); uint64 PPCInterpreter_getMainCoreCycleCounter(); @@ -193,7 +196,6 @@ uint32 PPCInterpreter_getCurrentCoreIndex(); void PPCInterpreter_setDEC(PPCInterpreter_t* hCPU, uint32 newValue); // timing for main processor -extern volatile uint64 ppcMainThreadCycleCounter; extern uint64 ppcCyclesSince2000; // on init this is set to the cycles that passed since 1.1.2000 extern uint64 ppcCyclesSince2000TimerClock; // on init this is set to the cycles that passed since 1.1.2000 / 20 extern uint64 ppcCyclesSince2000_UTC; @@ -204,7 +206,6 @@ extern uint64 ppcMainThreadDECCycleStart; // at which cycle the dec register was void PPCTimer_init(); void PPCTimer_waitForInit(); uint64 PPCTimer_getFromRDTSC(); -bool PPCTimer_hasInvariantRDTSCSupport(); uint64 PPCTimer_microsecondsToTsc(uint64 us); uint64 PPCTimer_tscToMicroseconds(uint64 us); @@ -215,8 +216,7 @@ void PPCTimer_start(); // core info and control extern uint32 ppcThreadQuantum; -extern thread_local PPCInterpreter_t *ppcInterpreterCurrentInstance; -uint8* PPCInterpreterGetAndModifyStackPointer(sint32 offset); +uint8* PPCInterpreter_PushAndReturnStackPointer(sint32 offset); uint8* PPCInterpreterGetStackPointer(); void PPCInterpreterModifyStackPointer(sint32 offset); @@ -230,15 +230,14 @@ static inline float flushDenormalToZero(float f) // HLE interface -typedef void(*HLECALL)(PPCInterpreter_t* hCPU); +using HLECALL = void(*)(PPCInterpreter_t*); +using HLEIDX = sint32; -typedef sint32 HLEIDX; -HLEIDX PPCInterpreter_registerHLECall(HLECALL hleCall); +HLEIDX PPCInterpreter_registerHLECall(HLECALL hleCall, std::string hleName); HLECALL PPCInterpreter_getHLECall(HLEIDX funcIndex); // HLE scheduler -void PPCCore_deleteAllThreads(); void PPCInterpreter_relinquishTimeslice(); void PPCCore_boostQuantum(sint32 numCycles); diff --git a/src/Cafe/HW/Espresso/PPCTimer.cpp b/src/Cafe/HW/Espresso/PPCTimer.cpp index 36198dac..257973a6 100644 --- a/src/Cafe/HW/Espresso/PPCTimer.cpp +++ b/src/Cafe/HW/Espresso/PPCTimer.cpp @@ -1,9 +1,13 @@ #include "Cafe/HW/Espresso/Const.h" -#include -#include "asm/x64util.h" #include "config/ActiveSettings.h" #include "util/helpers/fspinlock.h" #include "util/highresolutiontimer/HighResolutionTimer.h" +#include "Common/cpu_features.h" + +#if defined(ARCH_X86_64) +#include +#pragma intrinsic(__rdtsc) +#endif uint64 _rdtscLastMeasure = 0; uint64 _rdtscFrequency = 0; @@ -18,8 +22,6 @@ static_assert(sizeof(uint128_t) == 16); uint128_t _rdtscAcc{}; -#pragma intrinsic(__rdtsc) - uint64 muldiv64(uint64 a, uint64 b, uint64 d) { uint64 diva = a / d; @@ -29,17 +31,12 @@ uint64 muldiv64(uint64 a, uint64 b, uint64 d) return diva * b + moda * divb + moda * modb / d; } -bool PPCTimer_hasInvariantRDTSCSupport() -{ - uint32 cpuv[4]; - cpuid((int*)cpuv, 0x80000007); - return ((cpuv[3] >> 8) & 1); -} - uint64 PPCTimer_estimateRDTSCFrequency() { - if (PPCTimer_hasInvariantRDTSCSupport() == false) - forceLog_printf("Invariant TSC not supported"); + #if defined(ARCH_X86_64) + if (!g_CPUFeatures.x86.invariant_tsc) + cemuLog_log(LogType::Force, "Invariant TSC not supported"); + #endif _mm_mfence(); uint64 tscStart = __rdtsc(); @@ -62,12 +59,12 @@ uint64 PPCTimer_estimateRDTSCFrequency() uint64 tsc_freq = muldiv64(tsc_diff, hrtFreq, hrtDiff); // uint64 freqMultiplier = tsc_freq / hrtFreq; - //forceLog_printf("RDTSC measurement test:"); - //forceLog_printf("TSC-diff: 0x%016llx", tsc_diff); - //forceLog_printf("TSC-freq: 0x%016llx", tsc_freq); - //forceLog_printf("HPC-diff: 0x%016llx", qpc_diff); - //forceLog_printf("HPC-freq: 0x%016llx", (uint64)qpc_freq.QuadPart); - //forceLog_printf("Multiplier: 0x%016llx", freqMultiplier); + //cemuLog_log(LogType::Force, "RDTSC measurement test:"); + //cemuLog_log(LogType::Force, "TSC-diff: 0x{:016x}", tsc_diff); + //cemuLog_log(LogType::Force, "TSC-freq: 0x{:016x}", tsc_freq); + //cemuLog_log(LogType::Force, "HPC-diff: 0x{:016x}", qpc_diff); + //cemuLog_log(LogType::Force, "HPC-freq: 0x{:016x}", (uint64)qpc_freq.QuadPart); + //cemuLog_log(LogType::Force, "Multiplier: 0x{:016x}", freqMultiplier); return tsc_freq; } @@ -129,7 +126,7 @@ FSpinlock sTimerSpinlock; // thread safe uint64 PPCTimer_getFromRDTSC() { - sTimerSpinlock.acquire(); + sTimerSpinlock.lock(); _mm_mfence(); uint64 rdtscCurrentMeasure = __rdtsc(); uint64 rdtscDif = rdtscCurrentMeasure - _rdtscLastMeasure; @@ -165,6 +162,6 @@ uint64 PPCTimer_getFromRDTSC() _tickSummary += elapsedTick; - sTimerSpinlock.release(); + sTimerSpinlock.unlock(); return _tickSummary; } diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp new file mode 100644 index 00000000..728460a4 --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp @@ -0,0 +1,1695 @@ +#include "BackendAArch64.h" + +#pragma push_macro("CSIZE") +#undef CSIZE +#include +#pragma pop_macro("CSIZE") +#include + +#include + +#include "../PPCRecompiler.h" +#include "Common/precompiled.h" +#include "Common/cpu_features.h" +#include "HW/Espresso/Interpreter/PPCInterpreterInternal.h" +#include "HW/Espresso/Interpreter/PPCInterpreterHelper.h" +#include "HW/Espresso/PPCState.h" + +using namespace Xbyak_aarch64; + +constexpr uint32 TEMP_GPR_1_ID = 25; +constexpr uint32 TEMP_GPR_2_ID = 26; +constexpr uint32 PPC_RECOMPILER_INSTANCE_DATA_REG_ID = 27; +constexpr uint32 MEMORY_BASE_REG_ID = 28; +constexpr uint32 HCPU_REG_ID = 29; + +constexpr uint32 TEMP_FPR_ID = 31; + +struct FPReg +{ + explicit FPReg(size_t index) + : index(index), VReg(index), QReg(index), DReg(index), SReg(index), HReg(index), BReg(index) + { + } + const size_t index; + const VReg VReg; + const QReg QReg; + const DReg DReg; + const SReg SReg; + const HReg HReg; + const BReg BReg; +}; + +struct GPReg +{ + explicit GPReg(size_t index) + : index(index), XReg(index), WReg(index) + { + } + const size_t index; + const XReg XReg; + const WReg WReg; +}; + +static const XReg HCPU_REG{HCPU_REG_ID}, PPC_REC_INSTANCE_REG{PPC_RECOMPILER_INSTANCE_DATA_REG_ID}, MEM_BASE_REG{MEMORY_BASE_REG_ID}; +static const GPReg TEMP_GPR1{TEMP_GPR_1_ID}; +static const GPReg TEMP_GPR2{TEMP_GPR_2_ID}; +static const GPReg LR{TEMP_GPR_2_ID}; + +static const FPReg TEMP_FPR{TEMP_FPR_ID}; + +static const util::Cpu s_cpu; + +class AArch64Allocator : public Allocator +{ + private: +#ifdef XBYAK_USE_MMAP_ALLOCATOR + inline static MmapAllocator s_allocator; +#else + inline static Allocator s_allocator; +#endif + Allocator* m_allocatorImpl; + bool m_freeDisabled = false; + + public: + AArch64Allocator() + : m_allocatorImpl(reinterpret_cast(&s_allocator)) {} + + uint32* alloc(size_t size) override + { + return m_allocatorImpl->alloc(size); + } + + void setFreeDisabled(bool disabled) + { + m_freeDisabled = disabled; + } + + void free(uint32* p) override + { + if (!m_freeDisabled) + m_allocatorImpl->free(p); + } + + [[nodiscard]] bool useProtect() const override + { + return !m_freeDisabled && m_allocatorImpl->useProtect(); + } +}; + +struct UnconditionalJumpInfo +{ + IMLSegment* target; +}; + +struct ConditionalRegJumpInfo +{ + IMLSegment* target; + WReg regBool; + bool mustBeTrue; +}; + +struct NegativeRegValueJumpInfo +{ + IMLSegment* target; + WReg regValue; +}; + +using JumpInfo = std::variant< + UnconditionalJumpInfo, + ConditionalRegJumpInfo, + NegativeRegValueJumpInfo>; + +struct AArch64GenContext_t : CodeGenerator +{ + explicit AArch64GenContext_t(Allocator* allocator = nullptr); + void enterRecompilerCode(); + void leaveRecompilerCode(); + + void r_name(IMLInstruction* imlInstruction); + void name_r(IMLInstruction* imlInstruction); + bool r_s32(IMLInstruction* imlInstruction); + bool r_r(IMLInstruction* imlInstruction); + bool r_r_s32(IMLInstruction* imlInstruction); + bool r_r_s32_carry(IMLInstruction* imlInstruction); + bool r_r_r(IMLInstruction* imlInstruction); + bool r_r_r_carry(IMLInstruction* imlInstruction); + void compare(IMLInstruction* imlInstruction); + void compare_s32(IMLInstruction* imlInstruction); + bool load(IMLInstruction* imlInstruction, bool indexed); + bool store(IMLInstruction* imlInstruction, bool indexed); + void atomic_cmp_store(IMLInstruction* imlInstruction); + bool macro(IMLInstruction* imlInstruction); + void call_imm(IMLInstruction* imlInstruction); + bool fpr_load(IMLInstruction* imlInstruction, bool indexed); + bool fpr_store(IMLInstruction* imlInstruction, bool indexed); + void fpr_r_r(IMLInstruction* imlInstruction); + void fpr_r_r_r(IMLInstruction* imlInstruction); + void fpr_r_r_r_r(IMLInstruction* imlInstruction); + void fpr_r(IMLInstruction* imlInstruction); + void fpr_compare(IMLInstruction* imlInstruction); + void cjump(IMLInstruction* imlInstruction, IMLSegment* imlSegment); + void jump(IMLSegment* imlSegment); + void conditionalJumpCycleCheck(IMLSegment* imlSegment); + + static constexpr size_t MAX_JUMP_INSTR_COUNT = 2; + std::list> jumps; + void prepareJump(JumpInfo&& jumpInfo) + { + jumps.emplace_back(getSize(), jumpInfo); + for (int i = 0; i < MAX_JUMP_INSTR_COUNT; ++i) + nop(); + } + + std::map segmentStarts; + void storeSegmentStart(IMLSegment* imlSegment) + { + segmentStarts[imlSegment] = getSize(); + } + + bool processAllJumps() + { + for (auto jump : jumps) + { + auto jumpStart = jump.first; + auto jumpInfo = jump.second; + bool success = std::visit( + [&, this](const auto& jump) { + setSize(jumpStart); + sint64 targetAddress = segmentStarts.at(jump.target); + sint64 addressOffset = targetAddress - jumpStart; + return handleJump(addressOffset, jump); + }, + jumpInfo); + if (!success) + { + return false; + } + } + return true; + } + + bool handleJump(sint64 addressOffset, const UnconditionalJumpInfo& jump) + { + // in +/-128MB + if (-0x8000000 <= addressOffset && addressOffset <= 0x7ffffff) + { + b(addressOffset); + return true; + } + + cemu_assert_suspicious(); + + return false; + } + + bool handleJump(sint64 addressOffset, const ConditionalRegJumpInfo& jump) + { + bool mustBeTrue = jump.mustBeTrue; + + // in +/-32KB + if (-0x8000 <= addressOffset && addressOffset <= 0x7fff) + { + if (mustBeTrue) + tbnz(jump.regBool, 0, addressOffset); + else + tbz(jump.regBool, 0, addressOffset); + return true; + } + + // in +/-1MB + if (-0x100000 <= addressOffset && addressOffset <= 0xfffff) + { + if (mustBeTrue) + cbnz(jump.regBool, addressOffset); + else + cbz(jump.regBool, addressOffset); + return true; + } + + Label skipJump; + if (mustBeTrue) + tbz(jump.regBool, 0, skipJump); + else + tbnz(jump.regBool, 0, skipJump); + addressOffset -= 4; + + // in +/-128MB + if (-0x8000000 <= addressOffset && addressOffset <= 0x7ffffff) + { + b(addressOffset); + L(skipJump); + return true; + } + + cemu_assert_suspicious(); + + return false; + } + + bool handleJump(sint64 addressOffset, const NegativeRegValueJumpInfo& jump) + { + // in +/-32KB + if (-0x8000 <= addressOffset && addressOffset <= 0x7fff) + { + tbnz(jump.regValue, 31, addressOffset); + return true; + } + + // in +/-1MB + if (-0x100000 <= addressOffset && addressOffset <= 0xfffff) + { + tst(jump.regValue, 0x80000000); + addressOffset -= 4; + bne(addressOffset); + return true; + } + + Label skipJump; + tbz(jump.regValue, 31, skipJump); + addressOffset -= 4; + + // in +/-128MB + if (-0x8000000 <= addressOffset && addressOffset <= 0x7ffffff) + { + b(addressOffset); + L(skipJump); + return true; + } + + cemu_assert_suspicious(); + + return false; + } +}; + +template T> +T fpReg(const IMLReg& imlReg) +{ + cemu_assert_debug(imlReg.GetRegFormat() == IMLRegFormat::F64); + auto regId = imlReg.GetRegID(); + cemu_assert_debug(regId >= IMLArchAArch64::PHYSREG_FPR_BASE && regId < IMLArchAArch64::PHYSREG_FPR_BASE + IMLArchAArch64::PHYSREG_FPR_COUNT); + return T(regId - IMLArchAArch64::PHYSREG_FPR_BASE); +} + +template T> +T gpReg(const IMLReg& imlReg) +{ + auto regFormat = imlReg.GetRegFormat(); + if (std::is_same_v) + cemu_assert_debug(regFormat == IMLRegFormat::I32); + else if (std::is_same_v) + cemu_assert_debug(regFormat == IMLRegFormat::I64); + else + cemu_assert_unimplemented(); + + auto regId = imlReg.GetRegID(); + cemu_assert_debug(regId >= IMLArchAArch64::PHYSREG_GPR_BASE && regId < IMLArchAArch64::PHYSREG_GPR_BASE + IMLArchAArch64::PHYSREG_GPR_COUNT); + return T(regId - IMLArchAArch64::PHYSREG_GPR_BASE); +} + +template To, std::derived_from From> +To aliasAs(const From& reg) +{ + return To(reg.getIdx()); +} + +template To, std::derived_from From> +To aliasAs(const From& reg) +{ + return To(reg.getIdx()); +} + +AArch64GenContext_t::AArch64GenContext_t(Allocator* allocator) + : CodeGenerator(DEFAULT_MAX_CODE_SIZE, AutoGrow, allocator) +{ +} + +constexpr uint64 ones(uint32 size) +{ + return (size == 64) ? 0xffffffffffffffff : ((uint64)1 << size) - 1; +} + +constexpr bool isAdrImmValidFPR(sint32 imm, uint32 bits) +{ + uint32 times = bits / 8; + uint32 sh = std::countr_zero(times); + return (0 <= imm && imm <= 4095 * times) && ((uint64)imm & ones(sh)) == 0; +} + +constexpr bool isAdrImmValidGPR(sint32 imm, uint32 bits = 32) +{ + uint32 size = std::countr_zero(bits / 8u); + sint32 times = 1 << size; + return (0 <= imm && imm <= 4095 * times) && ((uint64)imm & ones(size)) == 0; +} + +constexpr bool isAdrImmRangeValid(sint32 rangeStart, sint32 rangeOffset, sint32 bits, std::invocable auto check) +{ + for (sint32 i = rangeStart; i <= rangeStart + rangeOffset; i += bits / 8) + if (!check(i, bits)) + return false; + return true; +} + +constexpr bool isAdrImmRangeValidGPR(sint32 rangeStart, sint32 rangeOffset, sint32 bits = 32) +{ + return isAdrImmRangeValid(rangeStart, rangeOffset, bits, isAdrImmValidGPR); +} + +constexpr bool isAdrImmRangeValidFpr(sint32 rangeStart, sint32 rangeOffset, sint32 bits) +{ + return isAdrImmRangeValid(rangeStart, rangeOffset, bits, isAdrImmValidFPR); +} + +// Verify that all of the offsets for the PPCInterpreter_t members that we use in r_name/name_r have a valid imm value for AdrUimm +static_assert(isAdrImmRangeValidGPR(offsetof(PPCInterpreter_t, gpr), sizeof(uint32) * 31)); +static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, spr.LR))); +static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, spr.CTR))); +static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, spr.XER))); +static_assert(isAdrImmRangeValidGPR(offsetof(PPCInterpreter_t, spr.UGQR), sizeof(PPCInterpreter_t::spr.UGQR[0]) * (SPR_UGQR7 - SPR_UGQR0))); +static_assert(isAdrImmRangeValidGPR(offsetof(PPCInterpreter_t, temporaryGPR_reg), sizeof(uint32) * 3)); +static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, xer_ca), 8)); +static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, xer_so), 8)); +static_assert(isAdrImmRangeValidGPR(offsetof(PPCInterpreter_t, cr), PPCREC_NAME_CR_LAST - PPCREC_NAME_CR, 8)); +static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, reservedMemAddr))); +static_assert(isAdrImmValidGPR(offsetof(PPCInterpreter_t, reservedMemValue))); +static_assert(isAdrImmRangeValidFpr(offsetof(PPCInterpreter_t, fpr), sizeof(FPR_t) * 63, 64)); +static_assert(isAdrImmRangeValidFpr(offsetof(PPCInterpreter_t, temporaryFPR), sizeof(FPR_t) * 7, 128)); + +void AArch64GenContext_t::r_name(IMLInstruction* imlInstruction) +{ + uint32 name = imlInstruction->op_r_name.name; + + if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64) + { + XReg regRXReg = gpReg(imlInstruction->op_r_name.regR); + WReg regR = aliasAs(regRXReg); + if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32) + { + ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0))); + } + else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999) + { + uint32 sprIndex = (name - PPCREC_NAME_SPR0); + if (sprIndex == SPR_LR) + ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.LR))); + else if (sprIndex == SPR_CTR) + ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.CTR))); + else if (sprIndex == SPR_XER) + ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.XER))); + else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) + ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0))); + else + cemu_assert_suspicious(); + } + else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) + { + ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY))); + } + else if (name == PPCREC_NAME_XER_CA) + { + ldrb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, xer_ca))); + } + else if (name == PPCREC_NAME_XER_SO) + { + ldrb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, xer_so))); + } + else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) + { + ldrb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR))); + } + else if (name == PPCREC_NAME_CPU_MEMRES_EA) + { + ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, reservedMemAddr))); + } + else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + { + ldr(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, reservedMemValue))); + } + else + { + cemu_assert_suspicious(); + } + } + else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64) + { + auto imlRegR = imlInstruction->op_r_name.regR; + + if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64)) + { + uint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2; + uint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2; + uint32 offset = offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + (pairIndex ? sizeof(double) : 0); + ldr(fpReg(imlRegR), AdrUimm(HCPU_REG, offset)); + } + else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) + { + ldr(fpReg(imlRegR), AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0))); + } + else + { + cemu_assert_suspicious(); + } + } + else + { + cemu_assert_suspicious(); + } +} + +void AArch64GenContext_t::name_r(IMLInstruction* imlInstruction) +{ + uint32 name = imlInstruction->op_r_name.name; + + if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64) + { + XReg regRXReg = gpReg(imlInstruction->op_r_name.regR); + WReg regR = aliasAs(regRXReg); + if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32) + { + str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0))); + } + else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999) + { + uint32 sprIndex = (name - PPCREC_NAME_SPR0); + if (sprIndex == SPR_LR) + str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.LR))); + else if (sprIndex == SPR_CTR) + str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.CTR))); + else if (sprIndex == SPR_XER) + str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.XER))); + else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) + str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0))); + else + cemu_assert_suspicious(); + } + else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) + { + str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY))); + } + else if (name == PPCREC_NAME_XER_CA) + { + strb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, xer_ca))); + } + else if (name == PPCREC_NAME_XER_SO) + { + strb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, xer_so))); + } + else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) + { + strb(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR))); + } + else if (name == PPCREC_NAME_CPU_MEMRES_EA) + { + str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, reservedMemAddr))); + } + else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + { + str(regR, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, reservedMemValue))); + } + else + { + cemu_assert_suspicious(); + } + } + else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64) + { + auto imlRegR = imlInstruction->op_r_name.regR; + if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64)) + { + uint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2; + uint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2; + sint32 offset = offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + pairIndex * sizeof(double); + str(fpReg(imlRegR), AdrUimm(HCPU_REG, offset)); + } + else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) + { + str(fpReg(imlRegR), AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0))); + } + else + { + cemu_assert_suspicious(); + } + } + else + { + cemu_assert_suspicious(); + } +} + +bool AArch64GenContext_t::r_r(IMLInstruction* imlInstruction) +{ + WReg regR = gpReg(imlInstruction->op_r_r.regR); + WReg regA = gpReg(imlInstruction->op_r_r.regA); + + if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) + { + mov(regR, regA); + } + else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP) + { + rev(regR, regA); + } + else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32) + { + sxtb(regR, regA); + } + else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32) + { + sxth(regR, regA); + } + else if (imlInstruction->operation == PPCREC_IML_OP_NOT) + { + mvn(regR, regA); + } + else if (imlInstruction->operation == PPCREC_IML_OP_NEG) + { + neg(regR, regA); + } + else if (imlInstruction->operation == PPCREC_IML_OP_CNTLZW) + { + clz(regR, regA); + } + else + { + cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_r_r(): Unsupported operation {:x}", imlInstruction->operation); + return false; + } + return true; +} + +bool AArch64GenContext_t::r_s32(IMLInstruction* imlInstruction) +{ + sint32 imm32 = imlInstruction->op_r_immS32.immS32; + WReg reg = gpReg(imlInstruction->op_r_immS32.regR); + + if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) + { + mov(reg, imm32); + } + else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE) + { + ror(reg, reg, 32 - (imm32 & 0x1f)); + } + else + { + cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_r_s32(): Unsupported operation {:x}", imlInstruction->operation); + return false; + } + return true; +} + +bool AArch64GenContext_t::r_r_s32(IMLInstruction* imlInstruction) +{ + WReg regR = gpReg(imlInstruction->op_r_r_s32.regR); + WReg regA = gpReg(imlInstruction->op_r_r_s32.regA); + sint32 immS32 = imlInstruction->op_r_r_s32.immS32; + + if (imlInstruction->operation == PPCREC_IML_OP_ADD) + { + add_imm(regR, regA, immS32, TEMP_GPR1.WReg); + } + else if (imlInstruction->operation == PPCREC_IML_OP_SUB) + { + sub_imm(regR, regA, immS32, TEMP_GPR1.WReg); + } + else if (imlInstruction->operation == PPCREC_IML_OP_AND) + { + mov(TEMP_GPR1.WReg, immS32); + and_(regR, regA, TEMP_GPR1.WReg); + } + else if (imlInstruction->operation == PPCREC_IML_OP_OR) + { + mov(TEMP_GPR1.WReg, immS32); + orr(regR, regA, TEMP_GPR1.WReg); + } + else if (imlInstruction->operation == PPCREC_IML_OP_XOR) + { + mov(TEMP_GPR1.WReg, immS32); + eor(regR, regA, TEMP_GPR1.WReg); + } + else if (imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED) + { + mov(TEMP_GPR1.WReg, immS32); + mul(regR, regA, TEMP_GPR1.WReg); + } + else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) + { + lsl(regR, regA, (uint32)immS32 & 0x1f); + } + else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) + { + lsr(regR, regA, (uint32)immS32 & 0x1f); + } + else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) + { + asr(regR, regA, (uint32)immS32 & 0x1f); + } + else + { + cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_r_r_s32(): Unsupported operation {:x}", imlInstruction->operation); + cemu_assert_suspicious(); + return false; + } + return true; +} + +bool AArch64GenContext_t::r_r_s32_carry(IMLInstruction* imlInstruction) +{ + WReg regR = gpReg(imlInstruction->op_r_r_s32_carry.regR); + WReg regA = gpReg(imlInstruction->op_r_r_s32_carry.regA); + WReg regCarry = gpReg(imlInstruction->op_r_r_s32_carry.regCarry); + + sint32 immS32 = imlInstruction->op_r_r_s32_carry.immS32; + if (imlInstruction->operation == PPCREC_IML_OP_ADD) + { + adds_imm(regR, regA, immS32, TEMP_GPR1.WReg); + cset(regCarry, Cond::CS); + } + else if (imlInstruction->operation == PPCREC_IML_OP_ADD_WITH_CARRY) + { + mov(TEMP_GPR1.WReg, immS32); + cmp(regCarry, 1); + adcs(regR, regA, TEMP_GPR1.WReg); + cset(regCarry, Cond::CS); + } + else + { + cemu_assert_suspicious(); + return false; + } + + return true; +} + +bool AArch64GenContext_t::r_r_r(IMLInstruction* imlInstruction) +{ + WReg regResult = gpReg(imlInstruction->op_r_r_r.regR); + XReg reg64Result = aliasAs(regResult); + WReg regOperand1 = gpReg(imlInstruction->op_r_r_r.regA); + WReg regOperand2 = gpReg(imlInstruction->op_r_r_r.regB); + + if (imlInstruction->operation == PPCREC_IML_OP_ADD) + { + add(regResult, regOperand1, regOperand2); + } + else if (imlInstruction->operation == PPCREC_IML_OP_SUB) + { + sub(regResult, regOperand1, regOperand2); + } + else if (imlInstruction->operation == PPCREC_IML_OP_OR) + { + orr(regResult, regOperand1, regOperand2); + } + else if (imlInstruction->operation == PPCREC_IML_OP_AND) + { + and_(regResult, regOperand1, regOperand2); + } + else if (imlInstruction->operation == PPCREC_IML_OP_XOR) + { + eor(regResult, regOperand1, regOperand2); + } + else if (imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED) + { + mul(regResult, regOperand1, regOperand2); + } + else if (imlInstruction->operation == PPCREC_IML_OP_SLW) + { + tst(regOperand2, 32); + lsl(regResult, regOperand1, regOperand2); + csel(regResult, regResult, wzr, Cond::EQ); + } + else if (imlInstruction->operation == PPCREC_IML_OP_SRW) + { + tst(regOperand2, 32); + lsr(regResult, regOperand1, regOperand2); + csel(regResult, regResult, wzr, Cond::EQ); + } + else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE) + { + neg(TEMP_GPR1.WReg, regOperand2); + ror(regResult, regOperand1, TEMP_GPR1.WReg); + } + else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) + { + asr(regResult, regOperand1, regOperand2); + } + else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) + { + lsr(regResult, regOperand1, regOperand2); + } + else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) + { + lsl(regResult, regOperand1, regOperand2); + } + else if (imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED) + { + sdiv(regResult, regOperand1, regOperand2); + } + else if (imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED) + { + udiv(regResult, regOperand1, regOperand2); + } + else if (imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED) + { + smull(reg64Result, regOperand1, regOperand2); + lsr(reg64Result, reg64Result, 32); + } + else if (imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED) + { + umull(reg64Result, regOperand1, regOperand2); + lsr(reg64Result, reg64Result, 32); + } + else + { + cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_r_r_r(): Unsupported operation {:x}", imlInstruction->operation); + return false; + } + return true; +} + +bool AArch64GenContext_t::r_r_r_carry(IMLInstruction* imlInstruction) +{ + WReg regR = gpReg(imlInstruction->op_r_r_r_carry.regR); + WReg regA = gpReg(imlInstruction->op_r_r_r_carry.regA); + WReg regB = gpReg(imlInstruction->op_r_r_r_carry.regB); + WReg regCarry = gpReg(imlInstruction->op_r_r_r_carry.regCarry); + + if (imlInstruction->operation == PPCREC_IML_OP_ADD) + { + adds(regR, regA, regB); + cset(regCarry, Cond::CS); + } + else if (imlInstruction->operation == PPCREC_IML_OP_ADD_WITH_CARRY) + { + cmp(regCarry, 1); + adcs(regR, regA, regB); + cset(regCarry, Cond::CS); + } + else + { + cemu_assert_suspicious(); + return false; + } + + return true; +} + +Cond ImlCondToArm64Cond(IMLCondition condition) +{ + switch (condition) + { + case IMLCondition::EQ: + return Cond::EQ; + case IMLCondition::NEQ: + return Cond::NE; + case IMLCondition::UNSIGNED_GT: + return Cond::HI; + case IMLCondition::UNSIGNED_LT: + return Cond::LO; + case IMLCondition::SIGNED_GT: + return Cond::GT; + case IMLCondition::SIGNED_LT: + return Cond::LT; + default: + { + cemu_assert_suspicious(); + return Cond::EQ; + } + } +} + +void AArch64GenContext_t::compare(IMLInstruction* imlInstruction) +{ + WReg regR = gpReg(imlInstruction->op_compare.regR); + WReg regA = gpReg(imlInstruction->op_compare.regA); + WReg regB = gpReg(imlInstruction->op_compare.regB); + Cond cond = ImlCondToArm64Cond(imlInstruction->op_compare.cond); + cmp(regA, regB); + cset(regR, cond); +} + +void AArch64GenContext_t::compare_s32(IMLInstruction* imlInstruction) +{ + WReg regR = gpReg(imlInstruction->op_compare.regR); + WReg regA = gpReg(imlInstruction->op_compare.regA); + sint32 imm = imlInstruction->op_compare_s32.immS32; + auto cond = ImlCondToArm64Cond(imlInstruction->op_compare.cond); + cmp_imm(regA, imm, TEMP_GPR1.WReg); + cset(regR, cond); +} + +void AArch64GenContext_t::cjump(IMLInstruction* imlInstruction, IMLSegment* imlSegment) +{ + auto regBool = gpReg(imlInstruction->op_conditional_jump.registerBool); + prepareJump(ConditionalRegJumpInfo{ + .target = imlSegment->nextSegmentBranchTaken, + .regBool = regBool, + .mustBeTrue = imlInstruction->op_conditional_jump.mustBeTrue, + }); +} + +void AArch64GenContext_t::jump(IMLSegment* imlSegment) +{ + prepareJump(UnconditionalJumpInfo{.target = imlSegment->nextSegmentBranchTaken}); +} + +void AArch64GenContext_t::conditionalJumpCycleCheck(IMLSegment* imlSegment) +{ + ldr(TEMP_GPR1.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, remainingCycles))); + prepareJump(NegativeRegValueJumpInfo{ + .target = imlSegment->nextSegmentBranchTaken, + .regValue = TEMP_GPR1.WReg, + }); +} + +void* PPCRecompiler_virtualHLE(PPCInterpreter_t* ppcInterpreter, uint32 hleFuncId) +{ + void* prevRSPTemp = ppcInterpreter->rspTemp; + if (hleFuncId == 0xFFD0) + { + ppcInterpreter->remainingCycles -= 500; // let subtract about 500 cycles for each HLE call + ppcInterpreter->gpr[3] = 0; + PPCInterpreter_nextInstruction(ppcInterpreter); + return PPCInterpreter_getCurrentInstance(); + } + else + { + auto hleCall = PPCInterpreter_getHLECall(hleFuncId); + cemu_assert(hleCall != nullptr); + hleCall(ppcInterpreter); + } + ppcInterpreter->rspTemp = prevRSPTemp; + return PPCInterpreter_getCurrentInstance(); +} + +bool AArch64GenContext_t::macro(IMLInstruction* imlInstruction) +{ + if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG) + { + WReg branchDstReg = gpReg(imlInstruction->op_macro.paramReg); + + mov(TEMP_GPR1.WReg, offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); + add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, branchDstReg, ShMod::LSL, 1); + ldr(TEMP_GPR1.XReg, AdrExt(PPC_REC_INSTANCE_REG, TEMP_GPR1.WReg, ExtMod::UXTW)); + mov(LR.WReg, branchDstReg); + br(TEMP_GPR1.XReg); + return true; + } + else if (imlInstruction->operation == PPCREC_IML_MACRO_BL) + { + uint32 newLR = imlInstruction->op_macro.param + 4; + + mov(TEMP_GPR1.WReg, newLR); + str(TEMP_GPR1.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, spr.LR))); + + uint32 newIP = imlInstruction->op_macro.param2; + uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL; + mov(TEMP_GPR1.XReg, lookupOffset); + ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg)); + mov(LR.WReg, newIP); + br(TEMP_GPR1.XReg); + return true; + } + else if (imlInstruction->operation == PPCREC_IML_MACRO_B_FAR) + { + uint32 newIP = imlInstruction->op_macro.param2; + uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL; + mov(TEMP_GPR1.XReg, lookupOffset); + ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg)); + mov(LR.WReg, newIP); + br(TEMP_GPR1.XReg); + return true; + } + else if (imlInstruction->operation == PPCREC_IML_MACRO_LEAVE) + { + uint32 currentInstructionAddress = imlInstruction->op_macro.param; + mov(TEMP_GPR1.XReg, (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); // newIP = 0 special value for recompiler exit + ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg)); + mov(LR.WReg, currentInstructionAddress); + br(TEMP_GPR1.XReg); + return true; + } + else if (imlInstruction->operation == PPCREC_IML_MACRO_DEBUGBREAK) + { + brk(0xf000); + return true; + } + else if (imlInstruction->operation == PPCREC_IML_MACRO_COUNT_CYCLES) + { + uint32 cycleCount = imlInstruction->op_macro.param; + AdrUimm adrCycles = AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, remainingCycles)); + ldr(TEMP_GPR1.WReg, adrCycles); + sub_imm(TEMP_GPR1.WReg, TEMP_GPR1.WReg, cycleCount, TEMP_GPR2.WReg); + str(TEMP_GPR1.WReg, adrCycles); + return true; + } + else if (imlInstruction->operation == PPCREC_IML_MACRO_HLE) + { + uint32 ppcAddress = imlInstruction->op_macro.param; + uint32 funcId = imlInstruction->op_macro.param2; + Label cyclesLeftLabel; + + // update instruction pointer + mov(TEMP_GPR1.WReg, ppcAddress); + str(TEMP_GPR1.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, instructionPointer))); + // set parameters + str(x30, AdrPreImm(sp, -16)); + + mov(x0, HCPU_REG); + mov(w1, funcId); + // call HLE function + + mov(TEMP_GPR1.XReg, (uint64)PPCRecompiler_virtualHLE); + blr(TEMP_GPR1.XReg); + + mov(HCPU_REG, x0); + + ldr(x30, AdrPostImm(sp, 16)); + + // check if cycles where decreased beyond zero, if yes -> leave recompiler + ldr(TEMP_GPR1.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, remainingCycles))); + tbz(TEMP_GPR1.WReg, 31, cyclesLeftLabel); // check if negative + + mov(TEMP_GPR1.XReg, offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); + ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg)); + ldr(LR.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, instructionPointer))); + // branch to recompiler exit + br(TEMP_GPR1.XReg); + + L(cyclesLeftLabel); + // check if instruction pointer was changed + // assign new instruction pointer to LR.WReg + ldr(LR.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, instructionPointer))); + mov(TEMP_GPR1.XReg, offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); + add(TEMP_GPR1.XReg, TEMP_GPR1.XReg, LR.XReg, ShMod::LSL, 1); + ldr(TEMP_GPR1.XReg, AdrReg(PPC_REC_INSTANCE_REG, TEMP_GPR1.XReg)); + // branch to [ppcRecompilerDirectJumpTable + PPCInterpreter_t::instructionPointer * 2] + br(TEMP_GPR1.XReg); + return true; + } + else + { + cemuLog_log(LogType::Recompiler, "Unknown recompiler macro operation %d\n", imlInstruction->operation); + cemu_assert_suspicious(); + } + return false; +} + +bool AArch64GenContext_t::load(IMLInstruction* imlInstruction, bool indexed) +{ + cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32); + cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32); + if (indexed) + cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32); + + sint32 memOffset = imlInstruction->op_storeLoad.immS32; + bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend; + bool switchEndian = imlInstruction->op_storeLoad.flags2.swapEndian; + WReg memReg = gpReg(imlInstruction->op_storeLoad.registerMem); + WReg dataReg = gpReg(imlInstruction->op_storeLoad.registerData); + + add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg); + if (indexed) + add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, gpReg(imlInstruction->op_storeLoad.registerMem2)); + + auto adr = AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW); + if (imlInstruction->op_storeLoad.copyWidth == 32) + { + ldr(dataReg, adr); + if (switchEndian) + rev(dataReg, dataReg); + } + else if (imlInstruction->op_storeLoad.copyWidth == 16) + { + if (switchEndian) + { + ldrh(dataReg, adr); + rev(dataReg, dataReg); + if (signExtend) + asr(dataReg, dataReg, 16); + else + lsr(dataReg, dataReg, 16); + } + else + { + if (signExtend) + ldrsh(dataReg, adr); + else + ldrh(dataReg, adr); + } + } + else if (imlInstruction->op_storeLoad.copyWidth == 8) + { + if (signExtend) + ldrsb(dataReg, adr); + else + ldrb(dataReg, adr); + } + else + { + return false; + } + return true; +} + +bool AArch64GenContext_t::store(IMLInstruction* imlInstruction, bool indexed) +{ + cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32); + cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32); + if (indexed) + cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32); + + WReg dataReg = gpReg(imlInstruction->op_storeLoad.registerData); + WReg memReg = gpReg(imlInstruction->op_storeLoad.registerMem); + sint32 memOffset = imlInstruction->op_storeLoad.immS32; + bool swapEndian = imlInstruction->op_storeLoad.flags2.swapEndian; + + add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg); + if (indexed) + add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, gpReg(imlInstruction->op_storeLoad.registerMem2)); + AdrExt adr = AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW); + if (imlInstruction->op_storeLoad.copyWidth == 32) + { + if (swapEndian) + { + rev(TEMP_GPR2.WReg, dataReg); + str(TEMP_GPR2.WReg, adr); + } + else + { + str(dataReg, adr); + } + } + else if (imlInstruction->op_storeLoad.copyWidth == 16) + { + if (swapEndian) + { + rev(TEMP_GPR2.WReg, dataReg); + lsr(TEMP_GPR2.WReg, TEMP_GPR2.WReg, 16); + strh(TEMP_GPR2.WReg, adr); + } + else + { + strh(dataReg, adr); + } + } + else if (imlInstruction->op_storeLoad.copyWidth == 8) + { + strb(dataReg, adr); + } + else + { + return false; + } + return true; +} + +void AArch64GenContext_t::atomic_cmp_store(IMLInstruction* imlInstruction) +{ + WReg outReg = gpReg(imlInstruction->op_atomic_compare_store.regBoolOut); + WReg eaReg = gpReg(imlInstruction->op_atomic_compare_store.regEA); + WReg valReg = gpReg(imlInstruction->op_atomic_compare_store.regWriteValue); + WReg cmpValReg = gpReg(imlInstruction->op_atomic_compare_store.regCompareValue); + + if (s_cpu.isAtomicSupported()) + { + mov(TEMP_GPR2.WReg, cmpValReg); + add(TEMP_GPR1.XReg, MEM_BASE_REG, eaReg, ExtMod::UXTW); + casal(TEMP_GPR2.WReg, valReg, AdrNoOfs(TEMP_GPR1.XReg)); + cmp(TEMP_GPR2.WReg, cmpValReg); + cset(outReg, Cond::EQ); + } + else + { + Label notEqual; + Label storeFailed; + + add(TEMP_GPR1.XReg, MEM_BASE_REG, eaReg, ExtMod::UXTW); + L(storeFailed); + ldaxr(TEMP_GPR2.WReg, AdrNoOfs(TEMP_GPR1.XReg)); + cmp(TEMP_GPR2.WReg, cmpValReg); + bne(notEqual); + stlxr(TEMP_GPR2.WReg, valReg, AdrNoOfs(TEMP_GPR1.XReg)); + cbnz(TEMP_GPR2.WReg, storeFailed); + + L(notEqual); + cset(outReg, Cond::EQ); + } +} + +bool AArch64GenContext_t::fpr_load(IMLInstruction* imlInstruction, bool indexed) +{ + const IMLReg& dataReg = imlInstruction->op_storeLoad.registerData; + SReg dataSReg = fpReg(dataReg); + DReg dataDReg = fpReg(dataReg); + WReg realRegisterMem = gpReg(imlInstruction->op_storeLoad.registerMem); + WReg indexReg = indexed ? gpReg(imlInstruction->op_storeLoad.registerMem2) : wzr; + sint32 adrOffset = imlInstruction->op_storeLoad.immS32; + uint8 mode = imlInstruction->op_storeLoad.mode; + + if (mode == PPCREC_FPR_LD_MODE_SINGLE) + { + add_imm(TEMP_GPR1.WReg, realRegisterMem, adrOffset, TEMP_GPR1.WReg); + if (indexed) + add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg); + ldr(TEMP_GPR2.WReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW)); + rev(TEMP_GPR2.WReg, TEMP_GPR2.WReg); + fmov(dataSReg, TEMP_GPR2.WReg); + + if (imlInstruction->op_storeLoad.flags2.notExpanded) + { + // leave value as single + } + else + { + fcvt(dataDReg, dataSReg); + } + } + else if (mode == PPCREC_FPR_LD_MODE_DOUBLE) + { + add_imm(TEMP_GPR1.WReg, realRegisterMem, adrOffset, TEMP_GPR1.WReg); + if (indexed) + add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg); + ldr(TEMP_GPR2.XReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW)); + rev(TEMP_GPR2.XReg, TEMP_GPR2.XReg); + fmov(dataDReg, TEMP_GPR2.XReg); + } + else + { + return false; + } + return true; +} + +// store to memory +bool AArch64GenContext_t::fpr_store(IMLInstruction* imlInstruction, bool indexed) +{ + const IMLReg& dataImlReg = imlInstruction->op_storeLoad.registerData; + DReg dataDReg = fpReg(dataImlReg); + SReg dataSReg = fpReg(dataImlReg); + WReg memReg = gpReg(imlInstruction->op_storeLoad.registerMem); + WReg indexReg = indexed ? gpReg(imlInstruction->op_storeLoad.registerMem2) : wzr; + sint32 memOffset = imlInstruction->op_storeLoad.immS32; + uint8 mode = imlInstruction->op_storeLoad.mode; + + if (mode == PPCREC_FPR_ST_MODE_SINGLE) + { + add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg); + if (indexed) + add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg); + + if (imlInstruction->op_storeLoad.flags2.notExpanded) + { + // value is already in single format + fmov(TEMP_GPR2.WReg, dataSReg); + } + else + { + fcvt(TEMP_FPR.SReg, dataDReg); + fmov(TEMP_GPR2.WReg, TEMP_FPR.SReg); + } + rev(TEMP_GPR2.WReg, TEMP_GPR2.WReg); + str(TEMP_GPR2.WReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW)); + } + else if (mode == PPCREC_FPR_ST_MODE_DOUBLE) + { + add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg); + if (indexed) + add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg); + fmov(TEMP_GPR2.XReg, dataDReg); + rev(TEMP_GPR2.XReg, TEMP_GPR2.XReg); + str(TEMP_GPR2.XReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW)); + } + else if (mode == PPCREC_FPR_ST_MODE_UI32_FROM_PS0) + { + add_imm(TEMP_GPR1.WReg, memReg, memOffset, TEMP_GPR1.WReg); + if (indexed) + add(TEMP_GPR1.WReg, TEMP_GPR1.WReg, indexReg); + fmov(TEMP_GPR2.WReg, dataSReg); + rev(TEMP_GPR2.WReg, TEMP_GPR2.WReg); + str(TEMP_GPR2.WReg, AdrExt(MEM_BASE_REG, TEMP_GPR1.WReg, ExtMod::UXTW)); + } + else + { + cemu_assert_suspicious(); + cemuLog_log(LogType::Recompiler, "PPCRecompilerAArch64Gen_imlInstruction_fpr_store(): Unsupported mode %d\n", mode); + return false; + } + return true; +} + +// FPR op FPR +void AArch64GenContext_t::fpr_r_r(IMLInstruction* imlInstruction) +{ + auto imlRegR = imlInstruction->op_fpr_r_r.regR; + auto imlRegA = imlInstruction->op_fpr_r_r.regA; + + if (imlInstruction->operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT) + { + fcvtzs(gpReg(imlRegR), fpReg(imlRegA)); + return; + } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT) + { + scvtf(fpReg(imlRegR), gpReg(imlRegA)); + return; + } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT) + { + cemu_assert_debug(imlRegR.GetRegFormat() == IMLRegFormat::F64); // assuming target is always F64 for now + // exact operation depends on size of types. Floats are automatically promoted to double if the target is F64 + DReg regFprDReg = fpReg(imlRegR); + SReg regFprSReg = fpReg(imlRegR); + if (imlRegA.GetRegFormat() == IMLRegFormat::I32) + { + fmov(regFprSReg, gpReg(imlRegA)); + // float to double + fcvt(regFprDReg, regFprSReg); + } + else if (imlRegA.GetRegFormat() == IMLRegFormat::I64) + { + fmov(regFprDReg, gpReg(imlRegA)); + } + else + { + cemu_assert_unimplemented(); + } + return; + } + + DReg regR = fpReg(imlRegR); + DReg regA = fpReg(imlRegA); + + if (imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN) + { + fmov(regR, regA); + } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY) + { + fmul(regR, regR, regA); + } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE) + { + fdiv(regR, regR, regA); + } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD) + { + fadd(regR, regR, regA); + } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB) + { + fsub(regR, regR, regA); + } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_FCTIWZ) + { + fcvtzs(regR, regA); + } + else + { + cemu_assert_suspicious(); + } +} + +void AArch64GenContext_t::fpr_r_r_r(IMLInstruction* imlInstruction) +{ + DReg regR = fpReg(imlInstruction->op_fpr_r_r_r.regR); + DReg regA = fpReg(imlInstruction->op_fpr_r_r_r.regA); + DReg regB = fpReg(imlInstruction->op_fpr_r_r_r.regB); + + if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY) + { + fmul(regR, regA, regB); + } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD) + { + fadd(regR, regA, regB); + } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB) + { + fsub(regR, regA, regB); + } + else + { + cemu_assert_suspicious(); + } +} + +/* + * FPR = op (fprA, fprB, fprC) + */ +void AArch64GenContext_t::fpr_r_r_r_r(IMLInstruction* imlInstruction) +{ + DReg regR = fpReg(imlInstruction->op_fpr_r_r_r_r.regR); + DReg regA = fpReg(imlInstruction->op_fpr_r_r_r_r.regA); + DReg regB = fpReg(imlInstruction->op_fpr_r_r_r_r.regB); + DReg regC = fpReg(imlInstruction->op_fpr_r_r_r_r.regC); + + if (imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT) + { + fcmp(regA, 0.0); + fcsel(regR, regC, regB, Cond::GE); + } + else + { + cemu_assert_suspicious(); + } +} + +void AArch64GenContext_t::fpr_r(IMLInstruction* imlInstruction) +{ + DReg regRDReg = fpReg(imlInstruction->op_fpr_r.regR); + SReg regRSReg = fpReg(imlInstruction->op_fpr_r.regR); + + if (imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE) + { + fneg(regRDReg, regRDReg); + } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_LOAD_ONE) + { + fmov(regRDReg, 1.0); + } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ABS) + { + fabs(regRDReg, regRDReg); + } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS) + { + fabs(regRDReg, regRDReg); + fneg(regRDReg, regRDReg); + } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM) + { + // convert to 32bit single + fcvt(regRSReg, regRDReg); + // convert back to 64bit double + fcvt(regRDReg, regRSReg); + } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64) + { + // convert bottom to 64bit double + fcvt(regRDReg, regRSReg); + } + else + { + cemu_assert_unimplemented(); + } +} + +Cond ImlFPCondToArm64Cond(IMLCondition cond) +{ + switch (cond) + { + case IMLCondition::UNORDERED_GT: + return Cond::GT; + case IMLCondition::UNORDERED_LT: + return Cond::MI; + case IMLCondition::UNORDERED_EQ: + return Cond::EQ; + case IMLCondition::UNORDERED_U: + return Cond::VS; + default: + { + cemu_assert_suspicious(); + return Cond::EQ; + } + } +} + +void AArch64GenContext_t::fpr_compare(IMLInstruction* imlInstruction) +{ + WReg regR = gpReg(imlInstruction->op_fpr_compare.regR); + DReg regA = fpReg(imlInstruction->op_fpr_compare.regA); + DReg regB = fpReg(imlInstruction->op_fpr_compare.regB); + auto cond = ImlFPCondToArm64Cond(imlInstruction->op_fpr_compare.cond); + fcmp(regA, regB); + cset(regR, cond); +} + +void AArch64GenContext_t::call_imm(IMLInstruction* imlInstruction) +{ + str(x30, AdrPreImm(sp, -16)); + mov(TEMP_GPR1.XReg, imlInstruction->op_call_imm.callAddress); + blr(TEMP_GPR1.XReg); + ldr(x30, AdrPostImm(sp, 16)); +} + +bool PPCRecompiler_generateAArch64Code(struct PPCRecFunction_t* PPCRecFunction, struct ppcImlGenContext_t* ppcImlGenContext) +{ + AArch64Allocator allocator; + AArch64GenContext_t aarch64GenContext{&allocator}; + + // generate iml instruction code + bool codeGenerationFailed = false; + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + { + if (codeGenerationFailed) + break; + segIt->x64Offset = aarch64GenContext.getSize(); + + aarch64GenContext.storeSegmentStart(segIt); + + for (size_t i = 0; i < segIt->imlList.size(); i++) + { + IMLInstruction* imlInstruction = segIt->imlList.data() + i; + if (imlInstruction->type == PPCREC_IML_TYPE_R_NAME) + { + aarch64GenContext.r_name(imlInstruction); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_NAME_R) + { + aarch64GenContext.name_r(imlInstruction); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_R_R) + { + if (!aarch64GenContext.r_r(imlInstruction)) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) + { + if (!aarch64GenContext.r_s32(imlInstruction)) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32) + { + if (!aarch64GenContext.r_r_s32(imlInstruction)) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32_CARRY) + { + if (!aarch64GenContext.r_r_s32_carry(imlInstruction)) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R) + { + if (!aarch64GenContext.r_r_r(imlInstruction)) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R_CARRY) + { + if (!aarch64GenContext.r_r_r_carry(imlInstruction)) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE) + { + aarch64GenContext.compare(imlInstruction); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32) + { + aarch64GenContext.compare_s32(imlInstruction); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + aarch64GenContext.cjump(imlInstruction, segIt); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_JUMP) + { + aarch64GenContext.jump(segIt); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + { + aarch64GenContext.conditionalJumpCycleCheck(segIt); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_MACRO) + { + if (!aarch64GenContext.macro(imlInstruction)) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD) + { + if (!aarch64GenContext.load(imlInstruction, false)) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED) + { + if (!aarch64GenContext.load(imlInstruction, true)) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_STORE) + { + if (!aarch64GenContext.store(imlInstruction, false)) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) + { + if (!aarch64GenContext.store(imlInstruction, true)) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) + { + aarch64GenContext.atomic_cmp_store(imlInstruction); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_CALL_IMM) + { + aarch64GenContext.call_imm(imlInstruction); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_NO_OP) + { + // no op + } + else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD) + { + if (!aarch64GenContext.fpr_load(imlInstruction, false)) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) + { + if (!aarch64GenContext.fpr_load(imlInstruction, true)) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE) + { + if (!aarch64GenContext.fpr_store(imlInstruction, false)) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) + { + if (!aarch64GenContext.fpr_store(imlInstruction, true)) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R) + { + aarch64GenContext.fpr_r_r(imlInstruction); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R) + { + aarch64GenContext.fpr_r_r_r(imlInstruction); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R) + { + aarch64GenContext.fpr_r_r_r_r(imlInstruction); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R) + { + aarch64GenContext.fpr_r(imlInstruction); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_COMPARE) + { + aarch64GenContext.fpr_compare(imlInstruction); + } + else + { + codeGenerationFailed = true; + cemu_assert_suspicious(); + cemuLog_log(LogType::Recompiler, "PPCRecompiler_generateAArch64Code(): Unsupported iml type {}", imlInstruction->type); + } + } + } + + // handle failed code generation + if (codeGenerationFailed) + { + return false; + } + + if (!aarch64GenContext.processAllJumps()) + { + cemuLog_log(LogType::Recompiler, "PPCRecompiler_generateAArch64Code(): some jumps exceeded the +/-128MB offset."); + return false; + } + + aarch64GenContext.readyRE(); + + // set code + PPCRecFunction->x86Code = aarch64GenContext.getCode(); + PPCRecFunction->x86Size = aarch64GenContext.getMaxSize(); + // set free disabled to skip freeing the code from the CodeGenerator destructor + allocator.setFreeDisabled(true); + return true; +} + +void PPCRecompiler_cleanupAArch64Code(void* code, size_t size) +{ + AArch64Allocator allocator; + if (allocator.useProtect()) + CodeArray::protect(code, size, CodeArray::PROTECT_RW); + allocator.free(static_cast(code)); +} + +void AArch64GenContext_t::enterRecompilerCode() +{ + constexpr size_t STACK_SIZE = 160 /* x19 .. x30 + v8.d[0] .. v15.d[0] */; + static_assert(STACK_SIZE % 16 == 0); + sub(sp, sp, STACK_SIZE); + mov(x9, sp); + + stp(x19, x20, AdrPostImm(x9, 16)); + stp(x21, x22, AdrPostImm(x9, 16)); + stp(x23, x24, AdrPostImm(x9, 16)); + stp(x25, x26, AdrPostImm(x9, 16)); + stp(x27, x28, AdrPostImm(x9, 16)); + stp(x29, x30, AdrPostImm(x9, 16)); + st4((v8.d - v11.d)[0], AdrPostImm(x9, 32)); + st4((v12.d - v15.d)[0], AdrPostImm(x9, 32)); + mov(HCPU_REG, x1); // call argument 2 + mov(PPC_REC_INSTANCE_REG, (uint64)ppcRecompilerInstanceData); + mov(MEM_BASE_REG, (uint64)memory_base); + + // branch to recFunc + blr(x0); // call argument 1 + + mov(x9, sp); + ldp(x19, x20, AdrPostImm(x9, 16)); + ldp(x21, x22, AdrPostImm(x9, 16)); + ldp(x23, x24, AdrPostImm(x9, 16)); + ldp(x25, x26, AdrPostImm(x9, 16)); + ldp(x27, x28, AdrPostImm(x9, 16)); + ldp(x29, x30, AdrPostImm(x9, 16)); + ld4((v8.d - v11.d)[0], AdrPostImm(x9, 32)); + ld4((v12.d - v15.d)[0], AdrPostImm(x9, 32)); + + add(sp, sp, STACK_SIZE); + + ret(); +} + +void AArch64GenContext_t::leaveRecompilerCode() +{ + str(LR.WReg, AdrUimm(HCPU_REG, offsetof(PPCInterpreter_t, instructionPointer))); + ret(); +} + +bool initializedInterfaceFunctions = false; +AArch64GenContext_t enterRecompilerCode_ctx{}; + +AArch64GenContext_t leaveRecompilerCode_unvisited_ctx{}; +AArch64GenContext_t leaveRecompilerCode_visited_ctx{}; +void PPCRecompilerAArch64Gen_generateRecompilerInterfaceFunctions() +{ + if (initializedInterfaceFunctions) + return; + initializedInterfaceFunctions = true; + + enterRecompilerCode_ctx.enterRecompilerCode(); + enterRecompilerCode_ctx.readyRE(); + PPCRecompiler_enterRecompilerCode = enterRecompilerCode_ctx.getCode(); + + leaveRecompilerCode_unvisited_ctx.leaveRecompilerCode(); + leaveRecompilerCode_unvisited_ctx.readyRE(); + PPCRecompiler_leaveRecompilerCode_unvisited = leaveRecompilerCode_unvisited_ctx.getCode(); + + leaveRecompilerCode_visited_ctx.leaveRecompilerCode(); + leaveRecompilerCode_visited_ctx.readyRE(); + PPCRecompiler_leaveRecompilerCode_visited = leaveRecompilerCode_visited_ctx.getCode(); +} diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h b/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h new file mode 100644 index 00000000..b610ee04 --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h @@ -0,0 +1,18 @@ +#pragma once + +#include "HW/Espresso/Recompiler/IML/IMLInstruction.h" +#include "../PPCRecompiler.h" + +bool PPCRecompiler_generateAArch64Code(struct PPCRecFunction_t* PPCRecFunction, struct ppcImlGenContext_t* ppcImlGenContext); +void PPCRecompiler_cleanupAArch64Code(void* code, size_t size); + +void PPCRecompilerAArch64Gen_generateRecompilerInterfaceFunctions(); + +// architecture specific constants +namespace IMLArchAArch64 +{ + static constexpr int PHYSREG_GPR_BASE = 0; + static constexpr int PHYSREG_GPR_COUNT = 25; + static constexpr int PHYSREG_FPR_BASE = PHYSREG_GPR_COUNT; + static constexpr int PHYSREG_FPR_COUNT = 31; +}; // namespace IMLArchAArch64 \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp new file mode 100644 index 00000000..eadb80fb --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.cpp @@ -0,0 +1,1672 @@ +#include "Cafe/HW/Espresso/PPCState.h" +#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h" +#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterHelper.h" +#include "../PPCRecompiler.h" +#include "../PPCRecompilerIml.h" +#include "BackendX64.h" +#include "Cafe/OS/libs/coreinit/coreinit_Time.h" +#include "util/MemMapper/MemMapper.h" +#include "Common/cpu_features.h" +#include + +static x86Assembler64::GPR32 _reg32(IMLReg physReg) +{ + cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I32); + IMLRegID regId = physReg.GetRegID(); + cemu_assert_debug(regId < 16); + return (x86Assembler64::GPR32)regId; +} + +static uint32 _reg64(IMLReg physReg) +{ + cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I64); + IMLRegID regId = physReg.GetRegID(); + cemu_assert_debug(regId < 16); + return regId; +} + +uint32 _regF64(IMLReg physReg) +{ + cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::F64); + IMLRegID regId = physReg.GetRegID(); + cemu_assert_debug(regId >= IMLArchX86::PHYSREG_FPR_BASE && regId < IMLArchX86::PHYSREG_FPR_BASE+16); + regId -= IMLArchX86::PHYSREG_FPR_BASE; + return regId; +} + +static x86Assembler64::GPR8_REX _reg8(IMLReg physReg) +{ + cemu_assert_debug(physReg.GetRegFormat() == IMLRegFormat::I32); // for now these are represented as 32bit + return (x86Assembler64::GPR8_REX)physReg.GetRegID(); +} + +static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId) +{ + return (x86Assembler64::GPR32)regId; +} + +static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId) +{ + return (x86Assembler64::GPR8_REX)regId; +} + +static x86Assembler64::GPR8_REX _reg8_from_reg64(uint32 regId) +{ + return (x86Assembler64::GPR8_REX)regId; +} + +static x86Assembler64::GPR64 _reg64_from_reg32(x86Assembler64::GPR32 regId) +{ + return (x86Assembler64::GPR64)regId; +} + +X86Cond _x86Cond(IMLCondition imlCond) +{ + switch (imlCond) + { + case IMLCondition::EQ: + return X86_CONDITION_Z; + case IMLCondition::NEQ: + return X86_CONDITION_NZ; + case IMLCondition::UNSIGNED_GT: + return X86_CONDITION_NBE; + case IMLCondition::UNSIGNED_LT: + return X86_CONDITION_B; + case IMLCondition::SIGNED_GT: + return X86_CONDITION_NLE; + case IMLCondition::SIGNED_LT: + return X86_CONDITION_L; + default: + break; + } + cemu_assert_suspicious(); + return X86_CONDITION_Z; +} + +X86Cond _x86CondInverted(IMLCondition imlCond) +{ + switch (imlCond) + { + case IMLCondition::EQ: + return X86_CONDITION_NZ; + case IMLCondition::NEQ: + return X86_CONDITION_Z; + case IMLCondition::UNSIGNED_GT: + return X86_CONDITION_BE; + case IMLCondition::UNSIGNED_LT: + return X86_CONDITION_NB; + case IMLCondition::SIGNED_GT: + return X86_CONDITION_LE; + case IMLCondition::SIGNED_LT: + return X86_CONDITION_NL; + default: + break; + } + cemu_assert_suspicious(); + return X86_CONDITION_Z; +} + +X86Cond _x86Cond(IMLCondition imlCond, bool condIsInverted) +{ + if (condIsInverted) + return _x86CondInverted(imlCond); + return _x86Cond(imlCond); +} + +/* +* Remember current instruction output offset for reloc +* The instruction generated after this method has been called will be adjusted +*/ +void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, void* extraInfo = nullptr) +{ + x64GenContext->relocateOffsetTable2.emplace_back(x64GenContext->emitter->GetWriteIndex(), extraInfo); +} + +void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset) +{ + uint8* instructionData = x64GenContext->emitter->GetBufferPtr() + jumpInstructionOffset; + if (instructionData[0] == 0x0F && (instructionData[1] >= 0x80 && instructionData[1] <= 0x8F)) + { + // far conditional jump + *(uint32*)(instructionData + 2) = (destinationOffset - (jumpInstructionOffset + 6)); + } + else if (instructionData[0] >= 0x70 && instructionData[0] <= 0x7F) + { + // short conditional jump + sint32 distance = (sint32)((destinationOffset - (jumpInstructionOffset + 2))); + cemu_assert_debug(distance >= -128 && distance <= 127); + *(uint8*)(instructionData + 1) = (uint8)distance; + } + else if (instructionData[0] == 0xE9) + { + *(uint32*)(instructionData + 1) = (destinationOffset - (jumpInstructionOffset + 5)); + } + else if (instructionData[0] == 0xEB) + { + sint32 distance = (sint32)((destinationOffset - (jumpInstructionOffset + 2))); + cemu_assert_debug(distance >= -128 && distance <= 127); + *(uint8*)(instructionData + 1) = (uint8)distance; + } + else + { + assert_dbg(); + } +} + +void* ATTR_MS_ABI PPCRecompiler_virtualHLE(PPCInterpreter_t* hCPU, uint32 hleFuncId) +{ + void* prevRSPTemp = hCPU->rspTemp; + if( hleFuncId == 0xFFD0 ) + { + hCPU->remainingCycles -= 500; // let subtract about 500 cycles for each HLE call + hCPU->gpr[3] = 0; + PPCInterpreter_nextInstruction(hCPU); + return hCPU; + } + else + { + auto hleCall = PPCInterpreter_getHLECall(hleFuncId); + cemu_assert(hleCall != nullptr); + hleCall(hCPU); + } + hCPU->rspTemp = prevRSPTemp; + return PPCInterpreter_getCurrentInstance(); +} + +bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + if (imlInstruction->operation == PPCREC_IML_MACRO_B_TO_REG) + { + //x64Gen_int3(x64GenContext); + uint32 branchDstReg = _reg32(imlInstruction->op_macro.paramReg); + if(X86_REG_RDX != branchDstReg) + x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RDX, branchDstReg); + // potential optimization: Use branchDstReg directly if possible instead of moving to RDX/EDX + // JMP [offset+RDX*(8/4)+R15] + x64Gen_writeU8(x64GenContext, 0x41); + x64Gen_writeU8(x64GenContext, 0xFF); + x64Gen_writeU8(x64GenContext, 0xA4); + x64Gen_writeU8(x64GenContext, 0x57); + x64Gen_writeU32(x64GenContext, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); + return true; + } + else if( imlInstruction->operation == PPCREC_IML_MACRO_BL ) + { + // MOV DWORD [SPR_LinkRegister], newLR + uint32 newLR = imlInstruction->op_macro.param + 4; + x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.LR), newLR); + // remember new instruction pointer in RDX + uint32 newIP = imlInstruction->op_macro.param2; + x64Gen_mov_reg64Low32_imm32(x64GenContext, X86_REG_RDX, newIP); + // since RDX is constant we can use JMP [R15+const_offset] if jumpTableOffset+RDX*2 does not exceed the 2GB boundary + uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL; + if (lookupOffset >= 0x80000000ULL) + { + // JMP [offset+RDX*(8/4)+R15] + x64Gen_writeU8(x64GenContext, 0x41); + x64Gen_writeU8(x64GenContext, 0xFF); + x64Gen_writeU8(x64GenContext, 0xA4); + x64Gen_writeU8(x64GenContext, 0x57); + x64Gen_writeU32(x64GenContext, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); + } + else + { + x64Gen_writeU8(x64GenContext, 0x41); + x64Gen_writeU8(x64GenContext, 0xFF); + x64Gen_writeU8(x64GenContext, 0xA7); + x64Gen_writeU32(x64GenContext, (uint32)lookupOffset); + } + return true; + } + else if( imlInstruction->operation == PPCREC_IML_MACRO_B_FAR ) + { + // remember new instruction pointer in RDX + uint32 newIP = imlInstruction->op_macro.param2; + x64Gen_mov_reg64Low32_imm32(x64GenContext, X86_REG_RDX, newIP); + // Since RDX is constant we can use JMP [R15+const_offset] if jumpTableOffset+RDX*2 does not exceed the 2GB boundary + uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL; + if (lookupOffset >= 0x80000000ULL) + { + // JMP [offset+RDX*(8/4)+R15] + x64Gen_writeU8(x64GenContext, 0x41); + x64Gen_writeU8(x64GenContext, 0xFF); + x64Gen_writeU8(x64GenContext, 0xA4); + x64Gen_writeU8(x64GenContext, 0x57); + x64Gen_writeU32(x64GenContext, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); + } + else + { + x64Gen_writeU8(x64GenContext, 0x41); + x64Gen_writeU8(x64GenContext, 0xFF); + x64Gen_writeU8(x64GenContext, 0xA7); + x64Gen_writeU32(x64GenContext, (uint32)lookupOffset); + } + return true; + } + else if( imlInstruction->operation == PPCREC_IML_MACRO_LEAVE ) + { + uint32 currentInstructionAddress = imlInstruction->op_macro.param; + // remember PC value in REG_EDX + x64Gen_mov_reg64Low32_imm32(x64GenContext, X86_REG_RDX, currentInstructionAddress); + + uint32 newIP = 0; // special value for recompiler exit + uint64 lookupOffset = (uint64)&(((PPCRecompilerInstanceData_t*)NULL)->ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL; + // JMP [R15+offset] + x64Gen_writeU8(x64GenContext, 0x41); + x64Gen_writeU8(x64GenContext, 0xFF); + x64Gen_writeU8(x64GenContext, 0xA7); + x64Gen_writeU32(x64GenContext, (uint32)lookupOffset); + return true; + } + else if( imlInstruction->operation == PPCREC_IML_MACRO_DEBUGBREAK ) + { + x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, imlInstruction->op_macro.param2); + x64Gen_int3(x64GenContext); + return true; + } + else if( imlInstruction->operation == PPCREC_IML_MACRO_COUNT_CYCLES ) + { + uint32 cycleCount = imlInstruction->op_macro.param; + x64Gen_sub_mem32reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, remainingCycles), cycleCount); + return true; + } + else if( imlInstruction->operation == PPCREC_IML_MACRO_HLE ) + { + uint32 ppcAddress = imlInstruction->op_macro.param; + uint32 funcId = imlInstruction->op_macro.param2; + // update instruction pointer + x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer), ppcAddress); + // set parameters + x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RCX, REG_RESV_HCPU); + x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RDX, funcId); + // restore stackpointer from hCPU->rspTemp + x64Emit_mov_reg64_mem64(x64GenContext, X86_REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp)); + // reserve space on stack for call parameters + x64Gen_sub_reg64_imm32(x64GenContext, X86_REG_RSP, 8*11); // must be uneven number in order to retain stack 0x10 alignment + x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RBP, 0); + // call HLE function + x64Gen_mov_reg64_imm64(x64GenContext, X86_REG_RAX, (uint64)PPCRecompiler_virtualHLE); + x64Gen_call_reg64(x64GenContext, X86_REG_RAX); + // restore RSP to hCPU (from RAX, result of PPCRecompiler_virtualHLE) + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_HCPU, X86_REG_RAX); + // MOV R15, ppcRecompilerInstanceData + x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_RECDATA, (uint64)ppcRecompilerInstanceData); + // MOV R13, memory_base + x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_MEMBASE, (uint64)memory_base); + // check if cycles where decreased beyond zero, if yes -> leave recompiler + x64Gen_bt_mem8(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative + sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); + x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_CARRY, 0); + + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_RDX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer)); + // set EAX to 0 (we assume that ppcRecompilerDirectJumpTable[0] will be a recompiler escape function) + x64Gen_xor_reg32_reg32(x64GenContext, X86_REG_RAX, X86_REG_RAX); + // ADD RAX, REG_RESV_RECDATA + x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, REG_RESV_RECDATA); + // JMP [recompilerCallTable+EAX/4*8] + x64Gen_jmp_memReg64(x64GenContext, X86_REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); + // check if instruction pointer was changed + // assign new instruction pointer to EAX + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_RAX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer)); + // remember instruction pointer in REG_EDX + x64Gen_mov_reg64_reg64(x64GenContext, X86_REG_RDX, X86_REG_RAX); + // EAX *= 2 + x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, X86_REG_RAX); + // ADD RAX, REG_RESV_RECDATA + x64Gen_add_reg64_reg64(x64GenContext, X86_REG_RAX, REG_RESV_RECDATA); + // JMP [ppcRecompilerDirectJumpTable+RAX/4*8] + x64Gen_jmp_memReg64(x64GenContext, X86_REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); + return true; + } + else + { + debug_printf("Unknown recompiler macro operation %d\n", imlInstruction->operation); + assert_dbg(); + } + return false; +} + +/* +* Load from memory +*/ +bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) +{ + cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32); + cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32); + if (indexed) + cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32); + + IMLRegID realRegisterData = imlInstruction->op_storeLoad.registerData.GetRegID(); + IMLRegID realRegisterMem = imlInstruction->op_storeLoad.registerMem.GetRegID(); + IMLRegID realRegisterMem2 = PPC_REC_INVALID_REGISTER; + if( indexed ) + realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2.GetRegID(); + if( indexed && realRegisterMem == realRegisterMem2 ) + { + return false; + } + if( indexed && realRegisterData == realRegisterMem2 ) + { + // for indexed memory access realRegisterData must not be the same register as the second memory register, + // this can easily be worked around by swapping realRegisterMem and realRegisterMem2 + std::swap(realRegisterMem, realRegisterMem2); + } + + bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend; + bool switchEndian = imlInstruction->op_storeLoad.flags2.swapEndian; + if( imlInstruction->op_storeLoad.copyWidth == 32 ) + { + if (indexed) + { + x64Gen_lea_reg64Low32_reg64Low32PlusReg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem, realRegisterMem2); + } + if( g_CPUFeatures.x86.movbe && switchEndian ) + { + if (indexed) + { + x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); + } + else + { + x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); + } + } + else + { + if (indexed) + { + x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); + if (switchEndian) + x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); + } + else + { + x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); + if (switchEndian) + x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); + } + } + } + else if( imlInstruction->op_storeLoad.copyWidth == 16 ) + { + if (indexed) + { + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + } + if(g_CPUFeatures.x86.movbe && switchEndian ) + { + x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); + if( indexed && realRegisterMem != realRegisterData ) + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + } + else + { + x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); + if( indexed && realRegisterMem != realRegisterData ) + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + if( switchEndian ) + x64Gen_rol_reg64Low16_imm8(x64GenContext, realRegisterData, 8); + } + if( signExtend ) + x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData); + else + x64Gen_movZeroExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData); + } + else if( imlInstruction->op_storeLoad.copyWidth == 8 ) + { + if( indexed ) + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + if( signExtend ) + x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); + else + x64Emit_movZX_reg32_mem8(x64GenContext, realRegisterData, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); + if( indexed && realRegisterMem != realRegisterData ) + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + } + else + return false; + return true; +} + +/* +* Write to memory +*/ +bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) +{ + cemu_assert_debug(imlInstruction->op_storeLoad.registerData.GetRegFormat() == IMLRegFormat::I32); + cemu_assert_debug(imlInstruction->op_storeLoad.registerMem.GetRegFormat() == IMLRegFormat::I32); + if (indexed) + cemu_assert_debug(imlInstruction->op_storeLoad.registerMem2.GetRegFormat() == IMLRegFormat::I32); + + IMLRegID realRegisterData = imlInstruction->op_storeLoad.registerData.GetRegID(); + IMLRegID realRegisterMem = imlInstruction->op_storeLoad.registerMem.GetRegID(); + IMLRegID realRegisterMem2 = PPC_REC_INVALID_REGISTER; + if (indexed) + realRegisterMem2 = imlInstruction->op_storeLoad.registerMem2.GetRegID(); + + if (indexed && realRegisterMem == realRegisterMem2) + { + return false; + } + if (indexed && realRegisterData == realRegisterMem2) + { + // for indexed memory access realRegisterData must not be the same register as the second memory register, + // this can easily be worked around by swapping realRegisterMem and realRegisterMem2 + std::swap(realRegisterMem, realRegisterMem2); + } + + bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend; + bool swapEndian = imlInstruction->op_storeLoad.flags2.swapEndian; + if (imlInstruction->op_storeLoad.copyWidth == 32) + { + uint32 valueRegister; + if ((swapEndian == false || g_CPUFeatures.x86.movbe) && realRegisterMem != realRegisterData) + { + valueRegister = realRegisterData; + } + else + { + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); + valueRegister = REG_RESV_TEMP; + } + if (!g_CPUFeatures.x86.movbe && swapEndian) + x64Gen_bswap_reg64Lower32bit(x64GenContext, valueRegister); + if (indexed) + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + if (g_CPUFeatures.x86.movbe && swapEndian) + x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); + else + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); + if (indexed) + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + } + else if (imlInstruction->op_storeLoad.copyWidth == 16) + { + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); + if (swapEndian) + x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); + if (indexed) + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); + if (indexed) + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + // todo: Optimize this, e.g. by using MOVBE + } + else if (imlInstruction->op_storeLoad.copyWidth == 8) + { + if (indexed && realRegisterMem == realRegisterData) + { + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); + realRegisterData = REG_RESV_TEMP; + } + if (indexed) + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, realRegisterData); + if (indexed) + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + } + else + return false; + return true; +} + +void PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + auto regBoolOut = _reg32_from_reg8(_reg8(imlInstruction->op_atomic_compare_store.regBoolOut)); + auto regEA = _reg32(imlInstruction->op_atomic_compare_store.regEA); + auto regVal = _reg32(imlInstruction->op_atomic_compare_store.regWriteValue); + auto regCmp = _reg32(imlInstruction->op_atomic_compare_store.regCompareValue); + + cemu_assert_debug(regBoolOut == X86_REG_EAX); + cemu_assert_debug(regEA != X86_REG_EAX); + cemu_assert_debug(regVal != X86_REG_EAX); + cemu_assert_debug(regCmp != X86_REG_EAX); + + x64GenContext->emitter->MOV_dd(X86_REG_EAX, regCmp); + x64GenContext->emitter->LockPrefix(); + x64GenContext->emitter->CMPXCHG_dd_l(REG_RESV_MEMBASE, 0, _reg64_from_reg32(regEA), 1, regVal); + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regBoolOut); + x64GenContext->emitter->AND_di32(regBoolOut, 1); // SETcc doesn't clear the upper bits so we do it manually here +} + +void PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + // the register allocator takes care of spilling volatile registers and moving parameters to the right registers, so we don't need to do any special handling here + x64GenContext->emitter->SUB_qi8(X86_REG_RSP, 0x20); // reserve enough space for any parameters while keeping stack alignment of 16 intact + x64GenContext->emitter->MOV_qi64(X86_REG_RAX, imlInstruction->op_call_imm.callAddress); + x64GenContext->emitter->CALL_q(X86_REG_RAX); + x64GenContext->emitter->ADD_qi8(X86_REG_RSP, 0x20); + // a note about the stack pointer: + // currently the code generated by generateEnterRecompilerCode makes sure the stack is 16 byte aligned, so we don't need to fix it up here +} + +bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + auto regR = _reg32(imlInstruction->op_r_r.regR); + auto regA = _reg32(imlInstruction->op_r_r.regA); + + if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) + { + // registerResult = registerA + if (regR != regA) + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); + } + else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP) + { + if (regA != regR) + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); // if movbe is available we can move and swap in a single instruction? + x64Gen_bswap_reg64Lower32bit(x64GenContext, regR); + } + else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 ) + { + x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, regR, regA); + } + else if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32) + { + x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, regR, reg32ToReg16(regA)); + } + else if( imlInstruction->operation == PPCREC_IML_OP_NOT ) + { + // copy register content if different registers + if( regR != regA ) + x64Gen_mov_reg64_reg64(x64GenContext, regR, regA); + x64Gen_not_reg64Low32(x64GenContext, regR); + } + else if (imlInstruction->operation == PPCREC_IML_OP_NEG) + { + // copy register content if different registers + if (regR != regA) + x64Gen_mov_reg64_reg64(x64GenContext, regR, regA); + x64Gen_neg_reg64Low32(x64GenContext, regR); + } + else if( imlInstruction->operation == PPCREC_IML_OP_CNTLZW ) + { + // count leading zeros + // LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5]) + if(g_CPUFeatures.x86.lzcnt) + { + x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, regR, regA); + } + else + { + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, regA, regA); + sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); + x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); + x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, regR, regA); + x64Gen_neg_reg64Low32(x64GenContext, regR); + x64Gen_add_reg64Low32_imm32(x64GenContext, regR, 32-1); + sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); + x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); + x64Gen_mov_reg64Low32_imm32(x64GenContext, regR, 32); + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); + } + } + else if( imlInstruction->operation == PPCREC_IML_OP_X86_CMP) + { + x64GenContext->emitter->CMP_dd(regR, regA); + } + else + { + cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation); + return false; + } + return true; +} + +bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + auto regR = _reg32(imlInstruction->op_r_immS32.regR); + + if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN ) + { + x64Gen_mov_reg64Low32_imm32(x64GenContext, regR, (uint32)imlInstruction->op_r_immS32.immS32); + } + else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) + { + cemu_assert_debug((imlInstruction->op_r_immS32.immS32 & 0x80) == 0); + x64Gen_rol_reg64Low32_imm8(x64GenContext, regR, (uint8)imlInstruction->op_r_immS32.immS32); + } + else if( imlInstruction->operation == PPCREC_IML_OP_X86_CMP) + { + sint32 imm = imlInstruction->op_r_immS32.immS32; + x64GenContext->emitter->CMP_di32(regR, imm); + } + else + { + cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation); + return false; + } + return true; +} + +bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + auto rRegResult = _reg32(imlInstruction->op_r_r_r.regR); + auto rRegOperand1 = _reg32(imlInstruction->op_r_r_r.regA); + auto rRegOperand2 = _reg32(imlInstruction->op_r_r_r.regB); + + if (imlInstruction->operation == PPCREC_IML_OP_ADD) + { + // registerResult = registerOperand1 + registerOperand2 + if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) ) + { + // be careful not to overwrite the operand before we use it + if( rRegResult == rRegOperand1 ) + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); + else + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); + } + else + { + // copy operand1 to destination register before doing addition + x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); + } + } + else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) + { + if( rRegOperand1 == rRegOperand2 ) + { + // result = operand1 - operand1 -> 0 + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); + } + else if( rRegResult == rRegOperand1 ) + { + // result = result - operand2 + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); + } + else if ( rRegResult == rRegOperand2 ) + { + // result = operand1 - result + x64Gen_neg_reg64Low32(x64GenContext, rRegResult); + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); + } + else + { + x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); + } + } + else if (imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR) + { + if (rRegResult == rRegOperand2) + std::swap(rRegOperand1, rRegOperand2); + + if (rRegResult != rRegOperand1) + x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); + + if (imlInstruction->operation == PPCREC_IML_OP_OR) + x64Gen_or_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); + else if (imlInstruction->operation == PPCREC_IML_OP_AND) + x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); + else + x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); + } + else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) + { + // registerResult = registerOperand1 * registerOperand2 + if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) ) + { + // be careful not to overwrite the operand before we use it + if( rRegResult == rRegOperand1 ) + x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); + else + x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); + } + else + { + // copy operand1 to destination register before doing multiplication + x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); + // add operand2 + x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); + } + } + else if( imlInstruction->operation == PPCREC_IML_OP_SLW || imlInstruction->operation == PPCREC_IML_OP_SRW ) + { + // registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits) + + if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SRW) + { + // use BMI2 SHRX if available + x64Gen_shrx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); + } + else if (g_CPUFeatures.x86.bmi2 && imlInstruction->operation == PPCREC_IML_OP_SLW) + { + // use BMI2 SHLX if available + x64Gen_shlx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); + x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); // trim result to 32bit + } + else + { + // lazy and slow way to do shift by register without relying on ECX/CL or BMI2 + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1); + for (sint32 b = 0; b < 6; b++) + { + x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1 << b)); + sint32 jumpInstructionOffset = x64GenContext->emitter->GetWriteIndex(); + x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set + if (b == 5) + { + x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); + } + else + { + if (imlInstruction->operation == PPCREC_IML_OP_SLW) + x64Gen_shl_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1 << b)); + else + x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1 << b)); + } + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->emitter->GetWriteIndex()); + } + x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); + } + } + else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) + { + // todo: Use BMI2 rotate if available + // check if CL/ECX/RCX is available + if( rRegResult != X86_REG_RCX && rRegOperand1 != X86_REG_RCX && rRegOperand2 != X86_REG_RCX ) + { + // swap operand 2 with RCX + x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2); + // move operand 1 to temp register + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1); + // rotate + x64Gen_rol_reg64Low32_cl(x64GenContext, REG_RESV_TEMP); + // undo swap operand 2 with RCX + x64Gen_xchg_reg64_reg64(x64GenContext, X86_REG_RCX, rRegOperand2); + // copy to result register + x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); + } + else + { + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1); + // lazy and slow way to do shift by register without relying on ECX/CL + for(sint32 b=0; b<5; b++) + { + x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1<emitter->GetWriteIndex(); + x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set + x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<emitter->GetWriteIndex()); + } + x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); + } + } + else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S || + imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U || + imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) + { + if(g_CPUFeatures.x86.bmi2) + { + if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) + x64Gen_sarx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); + else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) + x64Gen_shrx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); + else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) + x64Gen_shlx_reg32_reg32_reg32(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); + } + else + { + cemu_assert_debug(rRegOperand2 == X86_REG_ECX); + bool useTempReg = rRegResult == X86_REG_ECX && rRegOperand1 != X86_REG_ECX; + auto origRegResult = rRegResult; + if(useTempReg) + { + x64GenContext->emitter->MOV_dd(REG_RESV_TEMP, rRegOperand1); + rRegResult = REG_RESV_TEMP; + } + if(rRegOperand1 != rRegResult) + x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); + if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) + x64GenContext->emitter->SAR_d_CL(rRegResult); + else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) + x64GenContext->emitter->SHR_d_CL(rRegResult); + else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) + x64GenContext->emitter->SHL_d_CL(rRegResult); + if(useTempReg) + x64GenContext->emitter->MOV_dd(origRegResult, REG_RESV_TEMP); + } + } + else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) + { + x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX); + x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX); + // mov operand 2 to temp register + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2); + // mov operand1 to EAX + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, X86_REG_EAX, rRegOperand1); + // sign or zero extend EAX to EDX:EAX based on division sign mode + if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED ) + x64Gen_cdq(x64GenContext); + else + x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, X86_REG_EDX, X86_REG_EDX); + // make sure we avoid division by zero + x64Gen_test_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); + x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 3); + // divide + if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED ) + x64Gen_idiv_reg64Low32(x64GenContext, REG_RESV_TEMP); + else + x64Gen_div_reg64Low32(x64GenContext, REG_RESV_TEMP); + // result of division is now stored in EAX, move it to result register + if( rRegResult != X86_REG_EAX ) + x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, X86_REG_EAX); + // restore EAX / EDX + if( rRegResult != X86_REG_RAX ) + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); + if( rRegResult != X86_REG_RDX ) + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EDX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1])); + } + else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED ) + { + x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), X86_REG_EAX); + x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), X86_REG_EDX); + // mov operand 2 to temp register + x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2); + // mov operand1 to EAX + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, X86_REG_EAX, rRegOperand1); + if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED ) + { + // zero extend EAX to EDX:EAX + x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, X86_REG_EDX, X86_REG_EDX); + } + else + { + // sign extend EAX to EDX:EAX + x64Gen_cdq(x64GenContext); + } + // multiply + if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED ) + x64Gen_imul_reg64Low32(x64GenContext, REG_RESV_TEMP); + else + x64Gen_mul_reg64Low32(x64GenContext, REG_RESV_TEMP); + // result of multiplication is now stored in EDX:EAX, move it to result register + if( rRegResult != X86_REG_EDX ) + x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, X86_REG_EDX); + // restore EAX / EDX + if( rRegResult != X86_REG_RAX ) + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); + if( rRegResult != X86_REG_RDX ) + x64Emit_mov_reg64_mem32(x64GenContext, X86_REG_EDX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1])); + } + else + { + cemuLog_logDebug(LogType::Force, "PPCRecompilerX64Gen_imlInstruction_r_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation); + return false; + } + return true; +} + +bool PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + auto regR = _reg32(imlInstruction->op_r_r_r_carry.regR); + auto regA = _reg32(imlInstruction->op_r_r_r_carry.regA); + auto regB = _reg32(imlInstruction->op_r_r_r_carry.regB); + auto regCarry = _reg32(imlInstruction->op_r_r_r_carry.regCarry); + bool carryRegIsShared = regCarry == regA || regCarry == regB; + cemu_assert_debug(regCarry != regR); // two outputs sharing the same register is undefined behavior + + switch (imlInstruction->operation) + { + case PPCREC_IML_OP_ADD: + if (regB == regR) + std::swap(regB, regA); + if (regR != regA) + x64GenContext->emitter->MOV_dd(regR, regA); + if(!carryRegIsShared) + x64GenContext->emitter->XOR_dd(regCarry, regCarry); + x64GenContext->emitter->ADD_dd(regR, regB); + x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); // below condition checks carry flag + if(carryRegIsShared) + x64GenContext->emitter->AND_di8(regCarry, 1); // clear upper bits + break; + case PPCREC_IML_OP_ADD_WITH_CARRY: + // assumes that carry is already correctly initialized as 0 or 1 + if (regB == regR) + std::swap(regB, regA); + if (regR != regA) + x64GenContext->emitter->MOV_dd(regR, regA); + x64GenContext->emitter->BT_du8(regCarry, 0); // copy carry register to x86 carry flag + x64GenContext->emitter->ADC_dd(regR, regB); + x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); + break; + default: + cemu_assert_unimplemented(); + return false; + } + return true; +} + +bool PPCRecompilerX64Gen_IsSameCompare(IMLInstruction* imlInstructionA, IMLInstruction* imlInstructionB) +{ + if(imlInstructionA->type != imlInstructionB->type) + return false; + if(imlInstructionA->type == PPCREC_IML_TYPE_COMPARE) + return imlInstructionA->op_compare.regA == imlInstructionB->op_compare.regA && imlInstructionA->op_compare.regB == imlInstructionB->op_compare.regB; + else if(imlInstructionA->type == PPCREC_IML_TYPE_COMPARE_S32) + return imlInstructionA->op_compare_s32.regA == imlInstructionB->op_compare_s32.regA && imlInstructionA->op_compare_s32.immS32 == imlInstructionB->op_compare_s32.immS32; + return false; +} + +bool PPCRecompilerX64Gen_imlInstruction_compare_x(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, sint32& extraInstructionsProcessed) +{ + extraInstructionsProcessed = 0; + boost::container::static_vector compareInstructions; + compareInstructions.push_back(imlInstruction); + for(sint32 i=1; i<4; i++) + { + IMLInstruction* nextIns = x64GenContext->GetNextInstruction(i); + if(!nextIns || !PPCRecompilerX64Gen_IsSameCompare(imlInstruction, nextIns)) + break; + compareInstructions.push_back(nextIns); + } + auto OperandOverlapsWithR = [&](IMLInstruction* ins) -> bool + { + cemu_assert_debug(ins->type == PPCREC_IML_TYPE_COMPARE || ins->type == PPCREC_IML_TYPE_COMPARE_S32); + if(ins->type == PPCREC_IML_TYPE_COMPARE) + return _reg32_from_reg8(_reg8(ins->op_compare.regR)) == _reg32(ins->op_compare.regA) || _reg32_from_reg8(_reg8(ins->op_compare.regR)) == _reg32(ins->op_compare.regB); + else /* PPCREC_IML_TYPE_COMPARE_S32 */ + return _reg32_from_reg8(_reg8(ins->op_compare_s32.regR)) == _reg32(ins->op_compare_s32.regA); + }; + auto GetRegR = [](IMLInstruction* insn) + { + return insn->type == PPCREC_IML_TYPE_COMPARE ? _reg32_from_reg8(_reg8(insn->op_compare.regR)) : _reg32_from_reg8(_reg8(insn->op_compare_s32.regR)); + }; + // prefer XOR method for zeroing out registers if possible + for(auto& it : compareInstructions) + { + if(OperandOverlapsWithR(it)) + continue; + auto regR = GetRegR(it); + x64GenContext->emitter->XOR_dd(regR, regR); // zero bytes unaffected by SETcc + } + // emit the compare instruction + if(imlInstruction->type == PPCREC_IML_TYPE_COMPARE) + { + auto regA = _reg32(imlInstruction->op_compare.regA); + auto regB = _reg32(imlInstruction->op_compare.regB); + x64GenContext->emitter->CMP_dd(regA, regB); + } + else if(imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32) + { + auto regA = _reg32(imlInstruction->op_compare_s32.regA); + sint32 imm = imlInstruction->op_compare_s32.immS32; + x64GenContext->emitter->CMP_di32(regA, imm); + } + // emit the SETcc instructions + for(auto& it : compareInstructions) + { + auto regR = _reg8(it->op_compare.regR); + X86Cond cond = _x86Cond(it->op_compare.cond); + if(OperandOverlapsWithR(it)) + x64GenContext->emitter->MOV_di32(_reg32_from_reg8(regR), 0); + x64GenContext->emitter->SETcc_b(cond, regR); + } + extraInstructionsProcessed = (sint32)compareInstructions.size() - 1; + return true; +} + +bool PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment) +{ + auto regBool = _reg8(imlInstruction->op_conditional_jump.registerBool); + bool mustBeTrue = imlInstruction->op_conditional_jump.mustBeTrue; + x64GenContext->emitter->TEST_bb(regBool, regBool); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); + x64GenContext->emitter->Jcc_j32(mustBeTrue ? X86_CONDITION_NZ : X86_CONDITION_Z, 0); + return true; +} + +void PPCRecompilerX64Gen_imlInstruction_x86_eflags_jcc(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment) +{ + X86Cond cond = _x86Cond(imlInstruction->op_x86_eflags_jcc.cond, imlInstruction->op_x86_eflags_jcc.invertedCondition); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); + x64GenContext->emitter->Jcc_j32(cond, 0); +} + +bool PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, IMLSegment* imlSegment) +{ + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, imlSegment->nextSegmentBranchTaken); + x64GenContext->emitter->JMP_j32(0); + return true; +} + +bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + auto regR = _reg32(imlInstruction->op_r_r_s32.regR); + auto regA = _reg32(imlInstruction->op_r_r_s32.regA); + uint32 immS32 = imlInstruction->op_r_r_s32.immS32; + + if( imlInstruction->operation == PPCREC_IML_OP_ADD ) + { + uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; + if(regR != regA) + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); + x64Gen_add_reg64Low32_imm32(x64GenContext, regR, (uint32)immU32); + } + else if (imlInstruction->operation == PPCREC_IML_OP_SUB) + { + if (regR != regA) + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); + x64Gen_sub_reg64Low32_imm32(x64GenContext, regR, immS32); + } + else if (imlInstruction->operation == PPCREC_IML_OP_AND || + imlInstruction->operation == PPCREC_IML_OP_OR || + imlInstruction->operation == PPCREC_IML_OP_XOR) + { + if (regR != regA) + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); + if (imlInstruction->operation == PPCREC_IML_OP_AND) + x64Gen_and_reg64Low32_imm32(x64GenContext, regR, immS32); + else if (imlInstruction->operation == PPCREC_IML_OP_OR) + x64Gen_or_reg64Low32_imm32(x64GenContext, regR, immS32); + else // XOR + x64Gen_xor_reg64Low32_imm32(x64GenContext, regR, immS32); + } + else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) + { + // registerResult = registerOperand * immS32 + sint32 immS32 = (uint32)imlInstruction->op_r_r_s32.immS32; + x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (sint64)immS32); // todo: Optimize + if( regR != regA ) + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); + x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, regR, REG_RESV_TEMP); + } + else if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT || + imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U || + imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S) + { + if( regA != regR ) + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, regR, regA); + if (imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT) + x64Gen_shl_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32); + else if (imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) + x64Gen_shr_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32); + else // RIGHT_SHIFT_S + x64Gen_sar_reg64Low32_imm8(x64GenContext, regR, imlInstruction->op_r_r_s32.immS32); + } + else + { + debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation); + return false; + } + return true; +} + +bool PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + auto regR = _reg32(imlInstruction->op_r_r_s32_carry.regR); + auto regA = _reg32(imlInstruction->op_r_r_s32_carry.regA); + sint32 immS32 = imlInstruction->op_r_r_s32_carry.immS32; + auto regCarry = _reg32(imlInstruction->op_r_r_s32_carry.regCarry); + cemu_assert_debug(regCarry != regR); // we dont allow two different outputs sharing the same register + + bool delayCarryInit = regCarry == regA; + + switch (imlInstruction->operation) + { + case PPCREC_IML_OP_ADD: + if(!delayCarryInit) + x64GenContext->emitter->XOR_dd(regCarry, regCarry); + if (regR != regA) + x64GenContext->emitter->MOV_dd(regR, regA); + x64GenContext->emitter->ADD_di32(regR, immS32); + if(delayCarryInit) + x64GenContext->emitter->MOV_di32(regCarry, 0); + x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); + break; + case PPCREC_IML_OP_ADD_WITH_CARRY: + // assumes that carry is already correctly initialized as 0 or 1 + cemu_assert_debug(regCarry != regR); + if (regR != regA) + x64GenContext->emitter->MOV_dd(regR, regA); + x64GenContext->emitter->BT_du8(regCarry, 0); // copy carry register to x86 carry flag + x64GenContext->emitter->ADC_di32(regR, immS32); + x64GenContext->emitter->SETcc_b(X86_CONDITION_B, _reg8_from_reg32(regCarry)); + break; + default: + cemu_assert_unimplemented(); + return false; + } + return true; +} + +bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + // some tests (all performed on a i7-4790K) + // 1) DEC [mem] + JNS has significantly worse performance than BT + JNC (probably due to additional memory write and direct dependency) + // 2) CMP [mem], 0 + JG has about equal (or slightly worse) performance than BT + JNC + + // BT + x64Gen_bt_mem8(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative + cemu_assert_debug(x64GenContext->currentSegment->GetBranchTaken()); + PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, x64GenContext->currentSegment->GetBranchTaken()); + x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); + return true; +} + +void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + uint32 name = imlInstruction->op_r_name.name; + if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64) + { + auto regR = _reg64(imlInstruction->op_r_name.regR); + if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32) + { + x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0)); + } + else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999) + { + sint32 sprIndex = (name - PPCREC_NAME_SPR0); + if (sprIndex == SPR_LR) + x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.LR)); + else if (sprIndex == SPR_CTR) + x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.CTR)); + else if (sprIndex == SPR_XER) + x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.XER)); + else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) + { + sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); + x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, memOffset); + } + else + assert_dbg(); + } + else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) + { + x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY)); + } + else if (name == PPCREC_NAME_XER_CA) + { + x64Emit_movZX_reg64_mem8(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_ca)); + } + else if (name == PPCREC_NAME_XER_SO) + { + x64Emit_movZX_reg64_mem8(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_so)); + } + else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) + { + x64Emit_movZX_reg64_mem8(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR)); + } + else if (name == PPCREC_NAME_CPU_MEMRES_EA) + { + x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr)); + } + else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + { + x64Emit_mov_reg64_mem32(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue)); + } + else + assert_dbg(); + } + else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64) + { + auto regR = _regF64(imlInstruction->op_r_name.regR); + if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64)) + { + sint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2; + sint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2; + x64Gen_movsd_xmmReg_memReg64(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + pairIndex * sizeof(double)); + } + else if (name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) + { + x64Gen_movupd_xmmReg_memReg128(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); + } + else + { + cemu_assert_debug(false); + } + } + else + DEBUG_BREAK; + +} + +void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + uint32 name = imlInstruction->op_r_name.name; + + if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::I64) + { + auto regR = _reg64(imlInstruction->op_r_name.regR); + if (name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0 + 32) + { + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, gpr) + sizeof(uint32) * (name - PPCREC_NAME_R0), regR); + } + else if (name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0 + 999) + { + uint32 sprIndex = (name - PPCREC_NAME_SPR0); + if (sprIndex == SPR_LR) + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.LR), regR); + else if (sprIndex == SPR_CTR) + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.CTR), regR); + else if (sprIndex == SPR_XER) + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, spr.XER), regR); + else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) + { + sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, memOffset, regR); + } + else + assert_dbg(); + } + else if (name >= PPCREC_NAME_TEMPORARY && name < PPCREC_NAME_TEMPORARY + 4) + { + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR_reg) + sizeof(uint32) * (name - PPCREC_NAME_TEMPORARY), regR); + } + else if (name == PPCREC_NAME_XER_CA) + { + x64GenContext->emitter->MOV_bb_l(REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_ca), X86_REG_NONE, 0, _reg8_from_reg64(regR)); + } + else if (name == PPCREC_NAME_XER_SO) + { + x64GenContext->emitter->MOV_bb_l(REG_RESV_HCPU, offsetof(PPCInterpreter_t, xer_so), X86_REG_NONE, 0, _reg8_from_reg64(regR)); + } + else if (name >= PPCREC_NAME_CR && name <= PPCREC_NAME_CR_LAST) + { + x64GenContext->emitter->MOV_bb_l(REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + (name - PPCREC_NAME_CR), X86_REG_NONE, 0, _reg8_from_reg64(regR)); + } + else if (name == PPCREC_NAME_CPU_MEMRES_EA) + { + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr), regR); + } + else if (name == PPCREC_NAME_CPU_MEMRES_VAL) + { + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue), regR); + } + else + assert_dbg(); + } + else if (imlInstruction->op_r_name.regR.GetBaseFormat() == IMLRegFormat::F64) + { + auto regR = _regF64(imlInstruction->op_r_name.regR); + uint32 name = imlInstruction->op_r_name.name; + if (name >= PPCREC_NAME_FPR_HALF && name < (PPCREC_NAME_FPR_HALF + 64)) + { + sint32 regIndex = (name - PPCREC_NAME_FPR_HALF) / 2; + sint32 pairIndex = (name - PPCREC_NAME_FPR_HALF) % 2; + x64Gen_movsd_memReg64_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, fpr) + sizeof(FPR_t) * regIndex + (pairIndex ? sizeof(double) : 0)); + } + else if (name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0 + 8)) + { + x64Gen_movupd_memReg128_xmmReg(x64GenContext, regR, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR) + sizeof(FPR_t) * (name - PPCREC_NAME_TEMPORARY_FPR0)); + } + else + { + cemu_assert_debug(false); + } + } + else + DEBUG_BREAK; + + +} + +uint8* codeMemoryBlock = nullptr; +sint32 codeMemoryBlockIndex = 0; +sint32 codeMemoryBlockSize = 0; + +std::mutex mtx_allocExecutableMemory; + +uint8* PPCRecompilerX86_allocateExecutableMemory(sint32 size) +{ + std::lock_guard lck(mtx_allocExecutableMemory); + if( codeMemoryBlockIndex+size > codeMemoryBlockSize ) + { + // allocate new block + codeMemoryBlockSize = std::max(1024*1024*4, size+1024); // 4MB (or more if the function is larger than 4MB) + codeMemoryBlockIndex = 0; + codeMemoryBlock = (uint8*)MemMapper::AllocateMemory(nullptr, codeMemoryBlockSize, MemMapper::PAGE_PERMISSION::P_RWX); + } + uint8* codeMem = codeMemoryBlock + codeMemoryBlockIndex; + codeMemoryBlockIndex += size; + // pad to 4 byte alignment + while (codeMemoryBlockIndex & 3) + { + codeMemoryBlock[codeMemoryBlockIndex] = 0x90; + codeMemoryBlockIndex++; + } + return codeMem; +} + +bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext) +{ + x64GenContext_t x64GenContext{}; + + // generate iml instruction code + bool codeGenerationFailed = false; + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + { + x64GenContext.currentSegment = segIt; + segIt->x64Offset = x64GenContext.emitter->GetWriteIndex(); + for(size_t i=0; iimlList.size(); i++) + { + x64GenContext.m_currentInstructionEmitIndex = i; + IMLInstruction* imlInstruction = segIt->imlList.data() + i; + + if( imlInstruction->type == PPCREC_IML_TYPE_R_NAME ) + { + PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } + else if( imlInstruction->type == PPCREC_IML_TYPE_NAME_R ) + { + PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } + else if( imlInstruction->type == PPCREC_IML_TYPE_R_R ) + { + if( PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false ) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) + { + if (PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32) + { + if (PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32_CARRY) + { + if (PPCRecompilerX64Gen_imlInstruction_r_r_s32_carry(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R) + { + if (PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R_CARRY) + { + if (PPCRecompilerX64Gen_imlInstruction_r_r_r_carry(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) + codeGenerationFailed = true; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_COMPARE || imlInstruction->type == PPCREC_IML_TYPE_COMPARE_S32) + { + sint32 extraInstructionsProcessed; + PPCRecompilerX64Gen_imlInstruction_compare_x(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, extraInstructionsProcessed); + i += extraInstructionsProcessed; + } + else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + if (PPCRecompilerX64Gen_imlInstruction_cjump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false) + codeGenerationFailed = true; + } + else if(imlInstruction->type == PPCREC_IML_TYPE_X86_EFLAGS_JCC) + { + PPCRecompilerX64Gen_imlInstruction_x86_eflags_jcc(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_JUMP) + { + if (PPCRecompilerX64Gen_imlInstruction_jump2(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, segIt) == false) + codeGenerationFailed = true; + } + else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) + { + PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } + else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO ) + { + if( PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false ) + { + codeGenerationFailed = true; + } + } + else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD ) + { + if( PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false ) + { + codeGenerationFailed = true; + } + } + else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED ) + { + if( PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false ) + { + codeGenerationFailed = true; + } + } + else if( imlInstruction->type == PPCREC_IML_TYPE_STORE ) + { + if( PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false ) + { + codeGenerationFailed = true; + } + } + else if( imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED ) + { + if( PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false ) + { + codeGenerationFailed = true; + } + } + else if (imlInstruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) + { + PPCRecompilerX64Gen_imlInstruction_atomic_cmp_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_CALL_IMM) + { + PPCRecompilerX64Gen_imlInstruction_call_imm(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } + else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP ) + { + // no op + } + else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD ) + { + if( PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false ) + { + codeGenerationFailed = true; + } + } + else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED ) + { + if( PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false ) + { + codeGenerationFailed = true; + } + } + else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE ) + { + if( PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false ) + { + codeGenerationFailed = true; + } + } + else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED ) + { + if( PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false ) + { + codeGenerationFailed = true; + } + } + else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R ) + { + PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } + else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R ) + { + PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } + else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R ) + { + PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } + else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R ) + { + PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_COMPARE) + { + PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); + } + else + { + debug_printf("PPCRecompiler_generateX64Code(): Unsupported iml type 0x%x\n", imlInstruction->type); + assert_dbg(); + } + } + } + // handle failed code generation + if( codeGenerationFailed ) + { + return false; + } + // allocate executable memory + uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes()); + size_t baseAddress = (size_t)executableMemory; + // fix relocs + for(auto& relocIt : x64GenContext.relocateOffsetTable2) + { + // search for segment that starts with this offset + uint32 ppcOffset = (uint32)(size_t)relocIt.extraInfo; + uint32 x64Offset = 0xFFFFFFFF; + + IMLSegment* destSegment = (IMLSegment*)relocIt.extraInfo; + x64Offset = destSegment->x64Offset; + + uint32 relocBase = relocIt.offset; + uint8* relocInstruction = x64GenContext.emitter->GetBufferPtr()+relocBase; + if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) ) + { + // Jcc relativeImm32 + sint32 distanceNearJump = (sint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 2)); + if (distanceNearJump >= -128 && distanceNearJump < 127) // disabled + { + // convert to near Jcc + *(uint8*)(relocInstruction + 0) = (uint8)(relocInstruction[1]-0x80 + 0x70); + // patch offset + *(uint8*)(relocInstruction + 1) = (uint8)distanceNearJump; + // replace unused 4 bytes with NOP instruction + relocInstruction[2] = 0x0F; + relocInstruction[3] = 0x1F; + relocInstruction[4] = 0x40; + relocInstruction[5] = 0x00; + } + else + { + // patch offset + *(uint32*)(relocInstruction + 2) = (uint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 6)); + } + } + else if( relocInstruction[0] == 0xE9 ) + { + // JMP relativeImm32 + *(uint32*)(relocInstruction+1) = (uint32)((baseAddress+x64Offset)-(baseAddress+relocBase+5)); + } + else + assert_dbg(); + } + + // copy code to executable memory + std::span codeBuffer = x64GenContext.emitter->GetBuffer(); + memcpy(executableMemory, codeBuffer.data(), codeBuffer.size_bytes()); + // set code + PPCRecFunction->x86Code = executableMemory; + PPCRecFunction->x86Size = codeBuffer.size_bytes(); + return true; +} + +void PPCRecompilerX64Gen_generateEnterRecompilerCode() +{ + x64GenContext_t x64GenContext{}; + + // start of recompiler entry function (15 regs) + x64Gen_push_reg64(&x64GenContext, X86_REG_RAX); + x64Gen_push_reg64(&x64GenContext, X86_REG_RCX); + x64Gen_push_reg64(&x64GenContext, X86_REG_RDX); + x64Gen_push_reg64(&x64GenContext, X86_REG_RBX); + x64Gen_push_reg64(&x64GenContext, X86_REG_RBP); + x64Gen_push_reg64(&x64GenContext, X86_REG_RDI); + x64Gen_push_reg64(&x64GenContext, X86_REG_RSI); + x64Gen_push_reg64(&x64GenContext, X86_REG_R8); + x64Gen_push_reg64(&x64GenContext, X86_REG_R9); + x64Gen_push_reg64(&x64GenContext, X86_REG_R10); + x64Gen_push_reg64(&x64GenContext, X86_REG_R11); + x64Gen_push_reg64(&x64GenContext, X86_REG_R12); + x64Gen_push_reg64(&x64GenContext, X86_REG_R13); + x64Gen_push_reg64(&x64GenContext, X86_REG_R14); + x64Gen_push_reg64(&x64GenContext, X86_REG_R15); + + // 000000007775EF04 | E8 00 00 00 00 call +0x00 + x64Gen_writeU8(&x64GenContext, 0xE8); + x64Gen_writeU8(&x64GenContext, 0x00); + x64Gen_writeU8(&x64GenContext, 0x00); + x64Gen_writeU8(&x64GenContext, 0x00); + x64Gen_writeU8(&x64GenContext, 0x00); + //000000007775EF09 | 48 83 04 24 05 add qword ptr ss:[rsp],5 + x64Gen_writeU8(&x64GenContext, 0x48); + x64Gen_writeU8(&x64GenContext, 0x83); + x64Gen_writeU8(&x64GenContext, 0x04); + x64Gen_writeU8(&x64GenContext, 0x24); + uint32 jmpPatchOffset = x64GenContext.emitter->GetWriteIndex(); + x64Gen_writeU8(&x64GenContext, 0); // skip the distance until after the JMP + x64Emit_mov_mem64_reg64(&x64GenContext, X86_REG_RDX, offsetof(PPCInterpreter_t, rspTemp), X86_REG_RSP); + + // MOV RSP, RDX (ppc interpreter instance) + x64Gen_mov_reg64_reg64(&x64GenContext, REG_RESV_HCPU, X86_REG_RDX); + // MOV R15, ppcRecompilerInstanceData + x64Gen_mov_reg64_imm64(&x64GenContext, REG_RESV_RECDATA, (uint64)ppcRecompilerInstanceData); + // MOV R13, memory_base + x64Gen_mov_reg64_imm64(&x64GenContext, REG_RESV_MEMBASE, (uint64)memory_base); + + //JMP recFunc + x64Gen_jmp_reg64(&x64GenContext, X86_REG_RCX); // call argument 1 + + x64GenContext.emitter->GetBuffer()[jmpPatchOffset] = (x64GenContext.emitter->GetWriteIndex() -(jmpPatchOffset-4)); + + //recompilerExit1: + x64Gen_pop_reg64(&x64GenContext, X86_REG_R15); + x64Gen_pop_reg64(&x64GenContext, X86_REG_R14); + x64Gen_pop_reg64(&x64GenContext, X86_REG_R13); + x64Gen_pop_reg64(&x64GenContext, X86_REG_R12); + x64Gen_pop_reg64(&x64GenContext, X86_REG_R11); + x64Gen_pop_reg64(&x64GenContext, X86_REG_R10); + x64Gen_pop_reg64(&x64GenContext, X86_REG_R9); + x64Gen_pop_reg64(&x64GenContext, X86_REG_R8); + x64Gen_pop_reg64(&x64GenContext, X86_REG_RSI); + x64Gen_pop_reg64(&x64GenContext, X86_REG_RDI); + x64Gen_pop_reg64(&x64GenContext, X86_REG_RBP); + x64Gen_pop_reg64(&x64GenContext, X86_REG_RBX); + x64Gen_pop_reg64(&x64GenContext, X86_REG_RDX); + x64Gen_pop_reg64(&x64GenContext, X86_REG_RCX); + x64Gen_pop_reg64(&x64GenContext, X86_REG_RAX); + // RET + x64Gen_ret(&x64GenContext); + + uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes()); + // copy code to executable memory + memcpy(executableMemory, x64GenContext.emitter->GetBuffer().data(), x64GenContext.emitter->GetBuffer().size_bytes()); + PPCRecompiler_enterRecompilerCode = (void ATTR_MS_ABI (*)(uint64,uint64))executableMemory; +} + + +void* PPCRecompilerX64Gen_generateLeaveRecompilerCode() +{ + x64GenContext_t x64GenContext{}; + + // update instruction pointer + // LR is in EDX + x64Emit_mov_mem32_reg32(&x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, instructionPointer), X86_REG_EDX); + // MOV RSP, [hCPU->rspTemp] + x64Emit_mov_reg64_mem64(&x64GenContext, X86_REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp)); + // RET + x64Gen_ret(&x64GenContext); + + uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.emitter->GetBuffer().size_bytes()); + // copy code to executable memory + memcpy(executableMemory, x64GenContext.emitter->GetBuffer().data(), x64GenContext.emitter->GetBuffer().size_bytes()); + return executableMemory; +} + +void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions() +{ + PPCRecompilerX64Gen_generateEnterRecompilerCode(); + PPCRecompiler_leaveRecompilerCode_unvisited = (void ATTR_MS_ABI (*)())PPCRecompilerX64Gen_generateLeaveRecompilerCode(); + PPCRecompiler_leaveRecompilerCode_visited = (void ATTR_MS_ABI (*)())PPCRecompilerX64Gen_generateLeaveRecompilerCode(); + cemu_assert_debug(PPCRecompiler_leaveRecompilerCode_unvisited != PPCRecompiler_leaveRecompilerCode_visited); +} + diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h similarity index 81% rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h index 1d37a77e..de415ca9 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.h +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64.h @@ -1,104 +1,56 @@ -typedef struct +#include "../PPCRecompiler.h" // todo - get rid of dependency + +#include "x86Emitter.h" + +struct x64RelocEntry_t { + x64RelocEntry_t(uint32 offset, void* extraInfo) : offset(offset), extraInfo(extraInfo) {}; + uint32 offset; - uint8 type; void* extraInfo; -}x64RelocEntry_t; +}; -typedef struct +struct x64GenContext_t { - uint8* codeBuffer; - sint32 codeBufferIndex; - sint32 codeBufferSize; - // cr state - sint32 activeCRRegister; // current x86 condition flags reflect this cr* register - sint32 activeCRState; // describes the way in which x86 flags map to the cr register (signed / unsigned) + IMLSegment* currentSegment{}; + x86Assembler64* emitter; + sint32 m_currentInstructionEmitIndex; + + x64GenContext_t() + { + emitter = new x86Assembler64(); + } + + ~x64GenContext_t() + { + delete emitter; + } + + IMLInstruction* GetNextInstruction(sint32 relativeIndex = 1) + { + sint32 index = m_currentInstructionEmitIndex + relativeIndex; + if(index < 0 || index >= (sint32)currentSegment->imlList.size()) + return nullptr; + return currentSegment->imlList.data() + index; + } + // relocate offsets - x64RelocEntry_t* relocateOffsetTable; - sint32 relocateOffsetTableSize; - sint32 relocateOffsetTableCount; -}x64GenContext_t; - -// Some of these are defined by winnt.h and gnu headers -#undef REG_EAX -#undef REG_ECX -#undef REG_EDX -#undef REG_EBX -#undef REG_ESP -#undef REG_EBP -#undef REG_ESI -#undef REG_EDI -#undef REG_NONE -#undef REG_RAX -#undef REG_RCX -#undef REG_RDX -#undef REG_RBX -#undef REG_RSP -#undef REG_RBP -#undef REG_RSI -#undef REG_RDI -#undef REG_R8 -#undef REG_R9 -#undef REG_R10 -#undef REG_R11 -#undef REG_R12 -#undef REG_R13 -#undef REG_R14 -#undef REG_R15 - -#define REG_EAX 0 -#define REG_ECX 1 -#define REG_EDX 2 -#define REG_EBX 3 -#define REG_ESP 4 // reserved for low half of hCPU pointer -#define REG_EBP 5 -#define REG_ESI 6 -#define REG_EDI 7 -#define REG_NONE -1 - -#define REG_RAX 0 -#define REG_RCX 1 -#define REG_RDX 2 -#define REG_RBX 3 -#define REG_RSP 4 // reserved for hCPU pointer -#define REG_RBP 5 -#define REG_RSI 6 -#define REG_RDI 7 -#define REG_R8 8 -#define REG_R9 9 -#define REG_R10 10 -#define REG_R11 11 -#define REG_R12 12 -#define REG_R13 13 // reserved to hold pointer to memory base? (Not decided yet) -#define REG_R14 14 // reserved as temporary register -#define REG_R15 15 // reserved for pointer to ppcRecompilerInstanceData - -#define REG_AL 0 -#define REG_CL 1 -#define REG_DL 2 -#define REG_BL 3 -#define REG_AH 4 -#define REG_CH 5 -#define REG_DH 6 -#define REG_BH 7 + std::vector relocateOffsetTable2; +}; // reserved registers -#define REG_RESV_TEMP (REG_R14) -#define REG_RESV_HCPU (REG_RSP) -#define REG_RESV_MEMBASE (REG_R13) -#define REG_RESV_RECDATA (REG_R15) +#define REG_RESV_TEMP (X86_REG_R14) +#define REG_RESV_HCPU (X86_REG_RSP) +#define REG_RESV_MEMBASE (X86_REG_R13) +#define REG_RESV_RECDATA (X86_REG_R15) // reserved floating-point registers #define REG_RESV_FPR_TEMP (15) +#define reg32ToReg16(__x) (__x) // deprecated -extern sint32 x64Gen_registerMap[12]; - -#define tempToRealRegister(__x) (x64Gen_registerMap[__x]) -#define tempToRealFPRRegister(__x) (__x) -#define reg32ToReg16(__x) (__x) - +// deprecated condition flags enum { X86_CONDITION_EQUAL, // or zero @@ -119,36 +71,23 @@ enum X86_CONDITION_NONE, // no condition, jump always }; -#define PPCREC_CR_TEMPORARY (8) // never stored -#define PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC (0) // for signed arithmetic operations (ADD, CMPI) -#define PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC (1) // for unsigned arithmetic operations (ADD, CMPI) -#define PPCREC_CR_STATE_TYPE_LOGICAL (2) // for unsigned operations (CMPLI) - -#define X86_RELOC_MAKE_RELATIVE (0) // make code imm relative to instruction -#define X64_RELOC_LINK_TO_PPC (1) // translate from ppc address to x86 offset -#define X64_RELOC_LINK_TO_SEGMENT (2) // link to beginning of segment - -#define PPC_X64_GPR_USABLE_REGISTERS (16-4) -#define PPC_X64_FPR_USABLE_REGISTERS (16-1) // Use XMM0 - XMM14, XMM15 is the temp register - - -bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext); - -void PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext); +bool PPCRecompiler_generateX64Code(struct PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext); void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset); void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions(); -void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction); -void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction); -bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed); -bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed); +void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); +void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); +bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed); +bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed); -void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction); -void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction); -void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction); -void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction); +void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); +void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); +void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); +void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); + +void PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction); // ASM gen void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v); @@ -196,9 +135,6 @@ void x64Gen_or_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstReg void x64Gen_and_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32); void x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32); -void x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister); -void x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegister64, sint32 memImmS32, sint32 srcRegister); - void x64Gen_add_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister); void x64Gen_add_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister); void x64Gen_add_reg64_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32); @@ -207,9 +143,6 @@ void x64Gen_sub_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 des void x64Gen_sub_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32); void x64Gen_sub_reg64_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32); void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, sint32 memImmS32, uint64 immU32); -void x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister); -void x64Gen_adc_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister); -void x64Gen_adc_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32); void x64Gen_dec_mem32(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint32 memoryImmU32); void x64Gen_imul_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 operandRegister); void x64Gen_idiv_reg64Low32(x64GenContext_t* x64GenContext, sint32 operandRegister); @@ -241,9 +174,7 @@ void x64Gen_not_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister); void x64Gen_neg_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister); void x64Gen_cdq(x64GenContext_t* x64GenContext); -void x64Gen_bswap_reg64(x64GenContext_t* x64GenContext, sint32 destRegister); void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destRegister); -void x64Gen_bswap_reg64Lower16bit(x64GenContext_t* x64GenContext, sint32 destRegister); void x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister); void x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister); @@ -274,6 +205,7 @@ void x64Gen_movddup_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi void x64Gen_movhlps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_movsd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32); +void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32); void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32); void x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); @@ -299,6 +231,7 @@ void x64Gen_andps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, uint32 memReg, uint32 memImmS32); void x64Gen_cvttpd2dq_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 registerDest, sint32 xmmRegisterSrc); +void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc); void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); void x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc); @@ -329,4 +262,8 @@ void x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext_t* x64G void x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister); void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); -void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); \ No newline at end of file +void x64Gen_shrx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); +void x64Gen_sarx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); +void x64Gen_sarx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); +void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); +void x64Gen_shlx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB); \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp similarity index 92% rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp index 619c3985..b0ef8640 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64AVX.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64AVX.cpp @@ -1,5 +1,4 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerX64.h" +#include "BackendX64.h" void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32); @@ -21,11 +20,10 @@ void _x64Gen_vex128_nds(x64GenContext_t* x64GenContext, uint8 opcodeMap, uint8 a x64Gen_writeU8(x64GenContext, opcode); } -#define VEX_PP_0F 0 // guessed +#define VEX_PP_0F 0 #define VEX_PP_66_0F 1 -#define VEX_PP_F3_0F 2 // guessed -#define VEX_PP_F2_0F 3 // guessed - +#define VEX_PP_F3_0F 2 +#define VEX_PP_F2_0F 3 void x64Gen_avx_VPUNPCKHQDQ_xmm_xmm_xmm(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 srcRegisterA, sint32 srcRegisterB) { diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp similarity index 67% rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp index 5a71e93d..bbb707e0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64BMI.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64BMI.cpp @@ -1,5 +1,4 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerX64.h" +#include "BackendX64.h" void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32); @@ -69,6 +68,34 @@ void x64Gen_shrx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 regist x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); } +void x64Gen_shrx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB) +{ + x64Gen_writeU8(x64GenContext, 0xC4); + x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0)); + x64Gen_writeU8(x64GenContext, 0x7B - registerB * 8); + x64Gen_writeU8(x64GenContext, 0xF7); + x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); +} + +void x64Gen_sarx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB) +{ + // SARX reg64, reg64, reg64 + x64Gen_writeU8(x64GenContext, 0xC4); + x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0)); + x64Gen_writeU8(x64GenContext, 0xFA - registerB * 8); + x64Gen_writeU8(x64GenContext, 0xF7); + x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); +} + +void x64Gen_sarx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB) +{ + x64Gen_writeU8(x64GenContext, 0xC4); + x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0)); + x64Gen_writeU8(x64GenContext, 0x7A - registerB * 8); + x64Gen_writeU8(x64GenContext, 0xF7); + x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); +} + void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB) { // SHLX reg64, reg64, reg64 @@ -77,4 +104,13 @@ void x64Gen_shlx_reg64_reg64_reg64(x64GenContext_t* x64GenContext, sint32 regist x64Gen_writeU8(x64GenContext, 0xF9 - registerB * 8); x64Gen_writeU8(x64GenContext, 0xF7); x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); +} + +void x64Gen_shlx_reg32_reg32_reg32(x64GenContext_t* x64GenContext, sint32 registerDst, sint32 registerA, sint32 registerB) +{ + x64Gen_writeU8(x64GenContext, 0xC4); + x64Gen_writeU8(x64GenContext, 0xE2 - ((registerDst >= 8) ? 0x80 : 0) - ((registerA >= 8) ? 0x20 : 0)); + x64Gen_writeU8(x64GenContext, 0x79 - registerB * 8); + x64Gen_writeU8(x64GenContext, 0xF7); + x64Gen_writeU8(x64GenContext, 0xC0 + (registerDst & 7) * 8 + (registerA & 7)); } \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp new file mode 100644 index 00000000..6a8b1b97 --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64FPU.cpp @@ -0,0 +1,469 @@ +#include "../PPCRecompiler.h" +#include "../IML/IML.h" +#include "BackendX64.h" +#include "Common/cpu_features.h" + +uint32 _regF64(IMLReg physReg); + +uint32 _regI32(IMLReg r) +{ + cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::I32); + return (uint32)r.GetRegID(); +} + +static x86Assembler64::GPR32 _reg32(sint8 physRegId) +{ + return (x86Assembler64::GPR32)physRegId; +} + +static x86Assembler64::GPR8_REX _reg8(IMLReg r) +{ + cemu_assert_debug(r.GetRegFormat() == IMLRegFormat::I32); // currently bool regs are implemented as 32bit registers + return (x86Assembler64::GPR8_REX)r.GetRegID(); +} + +static x86Assembler64::GPR32 _reg32_from_reg8(x86Assembler64::GPR8_REX regId) +{ + return (x86Assembler64::GPR32)regId; +} + +static x86Assembler64::GPR8_REX _reg8_from_reg32(x86Assembler64::GPR32 regId) +{ + return (x86Assembler64::GPR8_REX)regId; +} + +// load from memory +bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) +{ + sint32 realRegisterXMM = _regF64(imlInstruction->op_storeLoad.registerData); + sint32 realRegisterMem = _regI32(imlInstruction->op_storeLoad.registerMem); + sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; + if( indexed ) + realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2); + uint8 mode = imlInstruction->op_storeLoad.mode; + + if( mode == PPCREC_FPR_LD_MODE_SINGLE ) + { + // load byte swapped single into temporary FPR + if( indexed ) + { + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2); + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem); + if(g_CPUFeatures.x86.movbe) + x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); + else + x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); + } + else + { + if(g_CPUFeatures.x86.movbe) + x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); + else + x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); + } + if(g_CPUFeatures.x86.movbe == false ) + x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); + x64Gen_movd_xmmReg_reg64Low32(x64GenContext, realRegisterXMM, REG_RESV_TEMP); + + if (imlInstruction->op_storeLoad.flags2.notExpanded) + { + // leave value as single + } + else + { + x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, realRegisterXMM); + } + } + else if( mode == PPCREC_FPR_LD_MODE_DOUBLE ) + { + if( g_CPUFeatures.x86.avx ) + { + if( indexed ) + { + // calculate offset + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem); + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2); + // load value + x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0); + x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP); + x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP); + } + else + { + x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0); + x64GenContext->emitter->BSWAP_q(REG_RESV_TEMP); + x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP); + } + } + else + { + if( indexed ) + { + // calculate offset + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem); + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2); + // load double low part to temporaryFPR + x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0); + x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP); + // calculate offset again + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem); + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2); + // load double high part to temporaryFPR + x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+4); + x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP); + // load double from temporaryFPR + x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)); + } + else + { + // load double low part to temporaryFPR + x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0); + x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP); + // load double high part to temporaryFPR + x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+4); + x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); + x64Emit_mov_mem32_reg64(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP); + // load double from temporaryFPR + x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)); + } + } + } + else + { + return false; + } + return true; +} + +// store to memory +bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction, bool indexed) +{ + sint32 realRegisterXMM = _regF64(imlInstruction->op_storeLoad.registerData); + sint32 realRegisterMem = _regI32(imlInstruction->op_storeLoad.registerMem); + sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; + if( indexed ) + realRegisterMem2 = _regI32(imlInstruction->op_storeLoad.registerMem2); + uint8 mode = imlInstruction->op_storeLoad.mode; + if( mode == PPCREC_FPR_ST_MODE_SINGLE ) + { + if (imlInstruction->op_storeLoad.flags2.notExpanded) + { + // value is already in single format + x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM); + } + else + { + x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, realRegisterXMM); + x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); + } + if(g_CPUFeatures.x86.movbe == false ) + x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); + if( indexed ) + { + if( realRegisterMem == realRegisterMem2 ) + assert_dbg(); + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + } + if(g_CPUFeatures.x86.movbe) + x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); + else + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); + if( indexed ) + { + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + } + } + else if( mode == PPCREC_FPR_ST_MODE_DOUBLE ) + { + if( indexed ) + { + if( realRegisterMem == realRegisterMem2 ) + assert_dbg(); + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + } + x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)); + // store double low part + x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+0); + x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+4, REG_RESV_TEMP); + // store double high part + x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryFPR)+4); + x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32+0, REG_RESV_TEMP); + if( indexed ) + { + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + } + } + else if( mode == PPCREC_FPR_ST_MODE_UI32_FROM_PS0 ) + { + x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM); + x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); + if( indexed ) + { + cemu_assert_debug(realRegisterMem == realRegisterMem2); + x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); + x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); + } + else + { + x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); + } + } + else + { + debug_printf("PPCRecompilerX64Gen_imlInstruction_fpr_store(): Unsupported mode %d\n", mode); + return false; + } + return true; +} + +// FPR op FPR +void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + if( imlInstruction->operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT ) + { + uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regR); + uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regA); + x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, regGpr, regFpr); + return; + } + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT ) + { + uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR); + uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA); + x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext, regFpr, regGpr); + return; + } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT) + { + cemu_assert_debug(imlInstruction->op_fpr_r_r.regR.GetRegFormat() == IMLRegFormat::F64); // assuming target is always F64 for now + cemu_assert_debug(imlInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::I32); // supporting only 32bit floats as input for now + // exact operation depends on size of types. Floats are automatically promoted to double if the target is F64 + uint32 regFpr = _regF64(imlInstruction->op_fpr_r_r.regR); + if (imlInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::I32) + { + uint32 regGpr = _regI32(imlInstruction->op_fpr_r_r.regA); + x64Gen_movq_xmmReg_reg64(x64GenContext, regFpr, regGpr); // using reg32 as reg64 param here is ok. We'll refactor later + // float to double + x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regFpr, regFpr); + } + else + { + cemu_assert_unimplemented(); + } + return; + } + + uint32 regR = _regF64(imlInstruction->op_fpr_r_r.regR); + uint32 regA = _regF64(imlInstruction->op_fpr_r_r.regA); + if( imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN ) + { + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA); + } + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY ) + { + x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA); + } + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE ) + { + x64Gen_divsd_xmmReg_xmmReg(x64GenContext, regR, regA); + } + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD ) + { + x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA); + } + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB ) + { + x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regA); + } + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FCTIWZ ) + { + x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, regA); + x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); + // move to FPR register + x64Gen_movq_xmmReg_reg64(x64GenContext, regR, REG_RESV_TEMP); + } + else + { + assert_dbg(); + } +} + +/* + * FPR = op (fprA, fprB) + */ +void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + uint32 regR = _regF64(imlInstruction->op_fpr_r_r_r.regR); + uint32 regA = _regF64(imlInstruction->op_fpr_r_r_r.regA); + uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r.regB); + + if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY) + { + if (regR == regA) + { + x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regB); + } + else if (regR == regB) + { + x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regA); + } + else + { + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA); + x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, regR, regB); + } + } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD) + { + // todo: Use AVX 3-operand VADDSD if available + if (regR == regA) + { + x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB); + } + else if (regR == regB) + { + x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regA); + } + else + { + x64Gen_movaps_xmmReg_xmmReg(x64GenContext, regR, regA); + x64Gen_addsd_xmmReg_xmmReg(x64GenContext, regR, regB); + } + } + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB ) + { + if( regR == regA ) + { + x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regB); + } + else if( regR == regB ) + { + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regA); + x64Gen_subsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, regB); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, REG_RESV_FPR_TEMP); + } + else + { + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regA); + x64Gen_subsd_xmmReg_xmmReg(x64GenContext, regR, regB); + } + } + else + assert_dbg(); +} + +/* + * FPR = op (fprA, fprB, fprC) + */ +void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + uint32 regR = _regF64(imlInstruction->op_fpr_r_r_r_r.regR); + uint32 regA = _regF64(imlInstruction->op_fpr_r_r_r_r.regA); + uint32 regB = _regF64(imlInstruction->op_fpr_r_r_r_r.regB); + uint32 regC = _regF64(imlInstruction->op_fpr_r_r_r_r.regC); + + if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT ) + { + x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, regA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); + sint32 jumpInstructionOffset1 = x64GenContext->emitter->GetWriteIndex(); + x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); + // select C + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regC); + sint32 jumpInstructionOffset2 = x64GenContext->emitter->GetWriteIndex(); + x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); + // select B + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->emitter->GetWriteIndex()); + x64Gen_movsd_xmmReg_xmmReg(x64GenContext, regR, regB); + // end + PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->emitter->GetWriteIndex()); + } + else + assert_dbg(); +} + +void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + uint32 regR = _regF64(imlInstruction->op_fpr_r.regR); + + if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE ) + { + x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); + } + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_LOAD_ONE ) + { + x64Gen_movsd_xmmReg_memReg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble1_1)); + } + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS ) + { + x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom)); + } + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS ) + { + x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, regR, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); + } + else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM ) + { + // convert to 32bit single + x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, regR, regR); + // convert back to 64bit double + x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR); + } + else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64) + { + // convert bottom to 64bit double + x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, regR, regR); + } + else + { + cemu_assert_unimplemented(); + } +} + +void PPCRecompilerX64Gen_imlInstruction_fpr_compare(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, IMLInstruction* imlInstruction) +{ + auto regR = _reg8(imlInstruction->op_fpr_compare.regR); + auto regA = _regF64(imlInstruction->op_fpr_compare.regA); + auto regB = _regF64(imlInstruction->op_fpr_compare.regB); + + x64GenContext->emitter->XOR_dd(_reg32_from_reg8(regR), _reg32_from_reg8(regR)); + x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, regA, regB); + + if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_GT) + { + // GT case can be covered with a single SETnbe which checks CF==0 && ZF==0 (unordered sets both) + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_NBE, regR); + return; + } + else if (imlInstruction->op_fpr_compare.cond == IMLCondition::UNORDERED_U) + { + // unordered case can be checked via PF + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PE, regR); + return; + } + + // remember unordered state + auto regTmp = _reg32_from_reg8(_reg32(REG_RESV_TEMP)); + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_PO, regTmp); // by reversing the parity we can avoid having to XOR the value for masking the LT/EQ conditions + + X86Cond x86Cond; + switch (imlInstruction->op_fpr_compare.cond) + { + case IMLCondition::UNORDERED_LT: + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_B, regR); + break; + case IMLCondition::UNORDERED_EQ: + x64GenContext->emitter->SETcc_b(X86Cond::X86_CONDITION_Z, regR); + break; + default: + cemu_assert_unimplemented(); + } + x64GenContext->emitter->AND_bb(_reg8_from_reg32(regR), _reg8_from_reg32(regTmp)); // if unordered (PF=1) then force LT/GT/EQ to zero +} \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp similarity index 90% rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp index 19327f46..efe929d0 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64Gen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64Gen.cpp @@ -1,62 +1,31 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" +#include "BackendX64.h" // x86/x64 extension opcodes that could be useful: // ANDN // mulx, rorx, sarx, shlx, shrx // PDEP, PEXT -void x64Gen_checkBuffer(x64GenContext_t* x64GenContext) -{ - // todo -} - void x64Gen_writeU8(x64GenContext_t* x64GenContext, uint8 v) { - if( x64GenContext->codeBufferIndex+1 > x64GenContext->codeBufferSize ) - { - x64GenContext->codeBufferSize *= 2; - x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize); - } - *(uint8*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v; - x64GenContext->codeBufferIndex++; + x64GenContext->emitter->_emitU8(v); } void x64Gen_writeU16(x64GenContext_t* x64GenContext, uint32 v) { - if( x64GenContext->codeBufferIndex+2 > x64GenContext->codeBufferSize ) - { - x64GenContext->codeBufferSize *= 2; - x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize); - } - *(uint16*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v; - x64GenContext->codeBufferIndex += 2; + x64GenContext->emitter->_emitU16(v); } void x64Gen_writeU32(x64GenContext_t* x64GenContext, uint32 v) { - if( x64GenContext->codeBufferIndex+4 > x64GenContext->codeBufferSize ) - { - x64GenContext->codeBufferSize *= 2; - x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize); - } - *(uint32*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v; - x64GenContext->codeBufferIndex += 4; + x64GenContext->emitter->_emitU32(v); } void x64Gen_writeU64(x64GenContext_t* x64GenContext, uint64 v) { - if( x64GenContext->codeBufferIndex+8 > x64GenContext->codeBufferSize ) - { - x64GenContext->codeBufferSize *= 2; - x64GenContext->codeBuffer = (uint8*)realloc(x64GenContext->codeBuffer, x64GenContext->codeBufferSize); - } - *(uint64*)(x64GenContext->codeBuffer+x64GenContext->codeBufferIndex) = v; - x64GenContext->codeBufferIndex += 8; + x64GenContext->emitter->_emitU64(v); } -#include "x64Emit.hpp" +#include "X64Emit.hpp" void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataRegister, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32) { @@ -67,7 +36,7 @@ void _x64Gen_writeMODRMDeprecated(x64GenContext_t* x64GenContext, sint32 dataReg forceUseOffset = true; } - if (memRegisterB64 == REG_NONE) + if (memRegisterB64 == X86_REG_NONE) { // memRegisterA64 + memImmS32 uint8 modRM = (dataRegister & 7) * 8 + (memRegisterA64 & 7); @@ -352,7 +321,7 @@ void x64Gen_mov_mem32Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis void x64Gen_mov_mem64Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint32 dataImmU32) { // MOV QWORD [+], dataImmU32 - if( memRegister == REG_R14 ) + if( memRegister == X86_REG_R14 ) { sint32 memImmS32 = (sint32)memImmU32; if( memImmS32 == 0 ) @@ -384,7 +353,7 @@ void x64Gen_mov_mem64Reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis void x64Gen_mov_mem8Reg64_imm8(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 memImmU32, uint8 dataImmU8) { // MOV BYTE [+], dataImmU8 - if( memRegister == REG_RSP ) + if( memRegister == X86_REG_RSP ) { sint32 memImmS32 = (sint32)memImmU32; if( memImmS32 >= -128 && memImmS32 <= 127 ) @@ -625,7 +594,7 @@ void _x64_op_reg64Low_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegist if (memRegister64 >= 8) x64Gen_writeU8(x64GenContext, 0x41); x64Gen_writeU8(x64GenContext, opByte); - _x64Gen_writeMODRMDeprecated(x64GenContext, dstRegister, memRegister64, REG_NONE, memImmS32); + _x64Gen_writeMODRMDeprecated(x64GenContext, dstRegister, memRegister64, X86_REG_NONE, memImmS32); } void x64Gen_or_reg64Low8_mem8Reg64(x64GenContext_t* x64GenContext, sint32 dstRegister, sint32 memRegister64, sint32 memImmS32) @@ -643,40 +612,6 @@ void x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext_t* x64GenContext, sint32 dstRe _x64_op_reg64Low_mem8Reg64(x64GenContext, dstRegister, memRegister64, memImmS32, 0x88); } -void x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegisterA64, sint32 memRegisterB64, sint32 memImmS32, sint32 srcRegister) -{ - // LOCK CMPXCHG DWORD [ + + ], (low dword) - x64Gen_writeU8(x64GenContext, 0xF0); // LOCK prefix - - if( srcRegister >= 8 || memRegisterA64 >= 8|| memRegisterB64 >= 8 ) - x64Gen_writeU8(x64GenContext, 0x40+((srcRegister>=8)?4:0)+((memRegisterA64>=8)?1:0)+((memRegisterB64>=8)?2:0)); - - x64Gen_writeU8(x64GenContext, 0x0F); - x64Gen_writeU8(x64GenContext, 0xB1); - - _x64Gen_writeMODRMDeprecated(x64GenContext, srcRegister, memRegisterA64, memRegisterB64, memImmS32); -} - -void x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext_t* x64GenContext, sint32 memRegister64, sint32 memImmS32, sint32 srcRegister) -{ - // LOCK CMPXCHG DWORD [ + ], (low dword) - x64Gen_writeU8(x64GenContext, 0xF0); // LOCK prefix - - if( srcRegister >= 8 || memRegister64 >= 8 ) - x64Gen_writeU8(x64GenContext, 0x40+((srcRegister>=8)?4:0)+((memRegister64>=8)?1:0)); - - x64Gen_writeU8(x64GenContext, 0x0F); - x64Gen_writeU8(x64GenContext, 0xB1); - - if( memImmS32 == 0 ) - { - x64Gen_writeU8(x64GenContext, 0x45+(srcRegister&7)*8); - x64Gen_writeU8(x64GenContext, 0x00); - } - else - assert_dbg(); -} - void x64Gen_add_reg64_reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister) { // ADD , @@ -732,7 +667,7 @@ void x64Gen_add_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis } else { - if( srcRegister == REG_RAX ) + if( srcRegister == X86_REG_RAX ) { // special EAX short form x64Gen_writeU8(x64GenContext, 0x05); @@ -772,7 +707,7 @@ void x64Gen_sub_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis } else { - if( srcRegister == REG_RAX ) + if( srcRegister == X86_REG_RAX ) { // special EAX short form x64Gen_writeU8(x64GenContext, 0x2D); @@ -811,7 +746,7 @@ void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis { // SUB , sint32 immS32 = (sint32)immU32; - if( memRegister == REG_RSP ) + if( memRegister == X86_REG_RSP ) { if( memImmS32 >= 128 ) { @@ -843,64 +778,11 @@ void x64Gen_sub_mem32reg64_imm32(x64GenContext_t* x64GenContext, sint32 memRegis } } -void x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister) -{ - // SBB , - if( destRegister >= 8 && srcRegister >= 8 ) - x64Gen_writeU8(x64GenContext, 0x45); - else if( srcRegister >= 8 ) - x64Gen_writeU8(x64GenContext, 0x44); - else if( destRegister >= 8 ) - x64Gen_writeU8(x64GenContext, 0x41); - x64Gen_writeU8(x64GenContext, 0x19); - x64Gen_writeU8(x64GenContext, 0xC0+(srcRegister&7)*8+(destRegister&7)); -} - -void x64Gen_adc_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister) -{ - // ADC , - if( destRegister >= 8 && srcRegister >= 8 ) - x64Gen_writeU8(x64GenContext, 0x45); - else if( srcRegister >= 8 ) - x64Gen_writeU8(x64GenContext, 0x44); - else if( destRegister >= 8 ) - x64Gen_writeU8(x64GenContext, 0x41); - x64Gen_writeU8(x64GenContext, 0x11); - x64Gen_writeU8(x64GenContext, 0xC0+(srcRegister&7)*8+(destRegister&7)); -} - -void x64Gen_adc_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegister, uint32 immU32) -{ - sint32 immS32 = (sint32)immU32; - if( srcRegister >= 8 ) - x64Gen_writeU8(x64GenContext, 0x41); - if( immS32 >= -128 && immS32 <= 127 ) - { - x64Gen_writeU8(x64GenContext, 0x83); - x64Gen_writeU8(x64GenContext, 0xD0+(srcRegister&7)); - x64Gen_writeU8(x64GenContext, (uint8)immS32); - } - else - { - if( srcRegister == REG_RAX ) - { - // special EAX short form - x64Gen_writeU8(x64GenContext, 0x15); - } - else - { - x64Gen_writeU8(x64GenContext, 0x81); - x64Gen_writeU8(x64GenContext, 0xD0+(srcRegister&7)); - } - x64Gen_writeU32(x64GenContext, immU32); - } -} - void x64Gen_dec_mem32(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint32 memoryImmU32) { // DEC dword [+imm] sint32 memoryImmS32 = (sint32)memoryImmU32; - if (memoryRegister != REG_RSP) + if (memoryRegister != X86_REG_RSP) assert_dbg(); // not supported yet if (memoryImmS32 >= -128 && memoryImmS32 <= 127) { @@ -981,7 +863,7 @@ void x64Gen_and_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis } else { - if( srcRegister == REG_RAX ) + if( srcRegister == X86_REG_RAX ) { // special EAX short form x64Gen_writeU8(x64GenContext, 0x25); @@ -1026,7 +908,7 @@ void x64Gen_test_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegi sint32 immS32 = (sint32)immU32; if( srcRegister >= 8 ) x64Gen_writeU8(x64GenContext, 0x41); - if( srcRegister == REG_RAX ) + if( srcRegister == X86_REG_RAX ) { // special EAX short form x64Gen_writeU8(x64GenContext, 0xA9); @@ -1052,7 +934,7 @@ void x64Gen_cmp_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis } else { - if( srcRegister == REG_RAX ) + if( srcRegister == X86_REG_RAX ) { // special RAX short form x64Gen_writeU8(x64GenContext, 0x3D); @@ -1082,7 +964,7 @@ void x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 des void x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 memRegister, sint32 memImmS32) { // CMP , DWORD [+] - if( memRegister == REG_RSP ) + if( memRegister == X86_REG_RSP ) { if( memImmS32 >= -128 && memImmS32 <= 127 ) assert_dbg(); // todo -> Shorter instruction form @@ -1112,7 +994,7 @@ void x64Gen_or_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegist } else { - if( srcRegister == REG_RAX ) + if( srcRegister == X86_REG_RAX ) { // special EAX short form x64Gen_writeU8(x64GenContext, 0x0D); @@ -1172,7 +1054,7 @@ void x64Gen_xor_reg64Low32_imm32(x64GenContext_t* x64GenContext, sint32 srcRegis } else { - if( srcRegister == REG_RAX ) + if( srcRegister == X86_REG_RAX ) { // special EAX short form x64Gen_writeU8(x64GenContext, 0x35); @@ -1326,16 +1208,6 @@ void x64Gen_cdq(x64GenContext_t* x64GenContext) x64Gen_writeU8(x64GenContext, 0x99); } -void x64Gen_bswap_reg64(x64GenContext_t* x64GenContext, sint32 destRegister) -{ - if( destRegister >= 8 ) - x64Gen_writeU8(x64GenContext, 0x41|8); - else - x64Gen_writeU8(x64GenContext, 0x40|8); - x64Gen_writeU8(x64GenContext, 0x0F); - x64Gen_writeU8(x64GenContext, 0xC8+(destRegister&7)); -} - void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destRegister) { if( destRegister >= 8 ) @@ -1344,16 +1216,6 @@ void x64Gen_bswap_reg64Lower32bit(x64GenContext_t* x64GenContext, sint32 destReg x64Gen_writeU8(x64GenContext, 0xC8+(destRegister&7)); } -void x64Gen_bswap_reg64Lower16bit(x64GenContext_t* x64GenContext, sint32 destRegister) -{ - assert_dbg(); // do not use this instruction, it's result is always undefined. Instead use ROL , 8 - //x64Gen_writeU8(x64GenContext, 0x66); - //if( destRegister >= 8 ) - // x64Gen_writeU8(x64GenContext, 0x41); - //x64Gen_writeU8(x64GenContext, 0x0F); - //x64Gen_writeU8(x64GenContext, 0xC8+(destRegister&7)); -} - void x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext_t* x64GenContext, sint32 destRegister, sint32 srcRegister) { // SSE4 @@ -1388,7 +1250,7 @@ void x64Gen_setcc_mem8(x64GenContext_t* x64GenContext, sint32 conditionType, sin { // SETcc [+imm] sint32 memoryImmS32 = (sint32)memoryImmU32; - if( memoryRegister != REG_RSP ) + if( memoryRegister != X86_REG_RSP ) assert_dbg(); // not supported if( memoryRegister >= 8 ) assert_dbg(); // not supported @@ -1627,7 +1489,7 @@ void x64Gen_bt_mem8(x64GenContext_t* x64GenContext, sint32 memoryRegister, uint3 { // BT [+imm], bitIndex (bit test) sint32 memoryImmS32 = (sint32)memoryImmU32; - if( memoryRegister != REG_RSP ) + if( memoryRegister != X86_REG_RSP ) assert_dbg(); // not supported yet if( memoryImmS32 >= -128 && memoryImmS32 <= 127 ) { @@ -1662,7 +1524,7 @@ void x64Gen_jmp_imm32(x64GenContext_t* x64GenContext, uint32 destImm32) void x64Gen_jmp_memReg64(x64GenContext_t* x64GenContext, sint32 memRegister, uint32 immU32) { - if( memRegister == REG_NONE ) + if( memRegister == X86_REG_NONE ) { assert_dbg(); } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp similarity index 92% rename from src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp index 92289d68..4bbcc025 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64GenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/BackendX64GenFPU.cpp @@ -1,6 +1,4 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" +#include "BackendX64.h" void x64Gen_genSSEVEXPrefix2(x64GenContext_t* x64GenContext, sint32 xmmRegister1, sint32 xmmRegister2, bool use64BitMode) { @@ -44,7 +42,7 @@ void x64Gen_movupd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRe // SSE2 // move two doubles from memory into xmm register // MOVUPD , [+] - if( memRegister == REG_ESP ) + if( memRegister == X86_REG_ESP ) { // todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range // 66 0F 10 84 E4 23 01 00 00 @@ -56,7 +54,7 @@ void x64Gen_movupd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRe x64Gen_writeU8(x64GenContext, 0xE4); x64Gen_writeU32(x64GenContext, memImmU32); } - else if( memRegister == REG_NONE ) + else if( memRegister == X86_REG_NONE ) { assert_dbg(); //x64Gen_writeU8(x64GenContext, 0x66); @@ -76,7 +74,7 @@ void x64Gen_movupd_memReg128_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRe // SSE2 // move two doubles from memory into xmm register // MOVUPD [+], - if( memRegister == REG_ESP ) + if( memRegister == X86_REG_ESP ) { // todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range x64Gen_writeU8(x64GenContext, 0x66); @@ -87,7 +85,7 @@ void x64Gen_movupd_memReg128_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRe x64Gen_writeU8(x64GenContext, 0xE4); x64Gen_writeU32(x64GenContext, memImmU32); } - else if( memRegister == REG_NONE ) + else if( memRegister == X86_REG_NONE ) { assert_dbg(); //x64Gen_writeU8(x64GenContext, 0x66); @@ -106,7 +104,7 @@ void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRe { // SSE3 // move one double from memory into lower and upper half of a xmm register - if( memRegister == REG_RSP ) + if( memRegister == X86_REG_RSP ) { // MOVDDUP , [+] // todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range @@ -119,7 +117,7 @@ void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRe x64Gen_writeU8(x64GenContext, 0xE4); x64Gen_writeU32(x64GenContext, memImmU32); } - else if( memRegister == REG_R15 ) + else if( memRegister == X86_REG_R15 ) { // MOVDDUP , [+] // todo: Short form of instruction if memImmU32 is 0 or in -128 to 127 range @@ -131,7 +129,7 @@ void x64Gen_movddup_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRe x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8); x64Gen_writeU32(x64GenContext, memImmU32); } - else if( memRegister == REG_NONE ) + else if( memRegister == X86_REG_NONE ) { // MOVDDUP , [] // 36 F2 0F 12 05 - 00 00 00 00 @@ -185,7 +183,7 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi { // SSE2 // move lower 64bits (double) of xmm register to memory location - if( memRegister == REG_NONE ) + if( memRegister == X86_REG_NONE ) { // MOVSD [], // F2 0F 11 05 - 45 23 01 00 @@ -197,7 +195,7 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi //x64Gen_writeU8(x64GenContext, 0x05+xmmRegister*8); //x64Gen_writeU32(x64GenContext, memImmU32); } - else if( memRegister == REG_RSP ) + else if( memRegister == X86_REG_RSP ) { // MOVSD [RSP+], // F2 0F 11 84 24 - 33 22 11 00 @@ -215,11 +213,42 @@ void x64Gen_movsd_memReg64_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegi } } +void x64Gen_movsd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32) +{ + // SSE2 + if( memRegister == X86_REG_RSP ) + { + // MOVSD , [RSP+] + x64Gen_writeU8(x64GenContext, 0xF2); + x64Gen_genSSEVEXPrefix2(x64GenContext, 0, xmmRegister, false); + x64Gen_writeU8(x64GenContext, 0x0F); + x64Gen_writeU8(x64GenContext, 0x10); + x64Gen_writeU8(x64GenContext, 0x84+(xmmRegister&7)*8); + x64Gen_writeU8(x64GenContext, 0x24); + x64Gen_writeU32(x64GenContext, memImmU32); + } + else if( memRegister == 15 ) + { + // MOVSD , [R15+] + x64Gen_writeU8(x64GenContext, 0x36); + x64Gen_writeU8(x64GenContext, 0xF2); + x64Gen_genSSEVEXPrefix2(x64GenContext, memRegister, xmmRegister, false); + x64Gen_writeU8(x64GenContext, 0x0F); + x64Gen_writeU8(x64GenContext, 0x10); + x64Gen_writeU8(x64GenContext, 0x87+(xmmRegister&7)*8); + x64Gen_writeU32(x64GenContext, memImmU32); + } + else + { + assert_dbg(); + } +} + void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32) { // SSE3 // move one double from memory into lower half of a xmm register, leave upper half unchanged(?) - if( memRegister == REG_NONE ) + if( memRegister == X86_REG_NONE ) { // MOVLPD , [] //x64Gen_writeU8(x64GenContext, 0x66); @@ -229,7 +258,7 @@ void x64Gen_movlpd_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmReg //x64Gen_writeU32(x64GenContext, memImmU32); assert_dbg(); } - else if( memRegister == REG_RSP ) + else if( memRegister == X86_REG_RSP ) { // MOVLPD , [+] // 66 0F 12 84 24 - 33 22 11 00 @@ -348,11 +377,11 @@ void x64Gen_mulpd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist void x64Gen_mulpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32) { // SSE2 - if (memRegister == REG_NONE) + if (memRegister == X86_REG_NONE) { assert_dbg(); } - else if (memRegister == REG_R14) + else if (memRegister == X86_REG_R14) { x64Gen_writeU8(x64GenContext, 0x66); x64Gen_writeU8(x64GenContext, (xmmRegister < 8) ? 0x41 : 0x45); @@ -404,7 +433,7 @@ void x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmR { // SSE2 // compare bottom double with double from memory location - if( memoryReg == REG_R15 ) + if( memoryReg == X86_REG_R15 ) { x64Gen_writeU8(x64GenContext, 0x66); x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); @@ -432,7 +461,7 @@ void x64Gen_comiss_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xmmR { // SSE2 // compare bottom float with float from memory location - if (memoryReg == REG_R15) + if (memoryReg == X86_REG_R15) { x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); x64Gen_writeU8(x64GenContext, 0x0F); @@ -448,7 +477,7 @@ void x64Gen_orps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmRe { // SSE2 // and xmm register with 128 bit value from memory - if( memReg == REG_R15 ) + if( memReg == X86_REG_R15 ) { x64Gen_genSSEVEXPrefix2(x64GenContext, memReg, xmmRegisterDest, false); x64Gen_writeU8(x64GenContext, 0x0F); @@ -464,7 +493,7 @@ void x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmR { // SSE2 // xor xmm register with 128 bit value from memory - if( memReg == REG_R15 ) + if( memReg == X86_REG_R15 ) { x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); // todo: should be x64Gen_genSSEVEXPrefix2() with memReg? x64Gen_writeU8(x64GenContext, 0x0F); @@ -479,11 +508,11 @@ void x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmR void x64Gen_andpd_xmmReg_memReg128(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32) { // SSE2 - if (memRegister == REG_NONE) + if (memRegister == X86_REG_NONE) { assert_dbg(); } - else if (memRegister == REG_R14) + else if (memRegister == X86_REG_R14) { x64Gen_writeU8(x64GenContext, 0x66); x64Gen_writeU8(x64GenContext, (xmmRegister < 8) ? 0x41 : 0x45); @@ -502,7 +531,7 @@ void x64Gen_andps_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xmmR { // SSE2 // and xmm register with 128 bit value from memory - if( memReg == REG_R15 ) + if( memReg == X86_REG_R15 ) { x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); // todo: should be x64Gen_genSSEVEXPrefix2() with memReg? x64Gen_writeU8(x64GenContext, 0x0F); @@ -528,7 +557,7 @@ void x64Gen_pcmpeqd_xmmReg_mem128Reg64(x64GenContext_t* x64GenContext, sint32 xm { // SSE2 // doubleword integer compare - if( memReg == REG_R15 ) + if( memReg == X86_REG_R15 ) { x64Gen_writeU8(x64GenContext, 0x66); x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, true); @@ -563,6 +592,16 @@ void x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 regis x64Gen_writeU8(x64GenContext, 0xC0+(registerDest&7)*8+(xmmRegisterSrc&7)); } +void x64Gen_cvtsi2sd_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 registerSrc) +{ + // SSE2 + x64Gen_writeU8(x64GenContext, 0xF2); + x64Gen_genSSEVEXPrefix2(x64GenContext, registerSrc, xmmRegisterDest, false); + x64Gen_writeU8(x64GenContext, 0x0F); + x64Gen_writeU8(x64GenContext, 0x2A); + x64Gen_writeU8(x64GenContext, 0xC0+(xmmRegisterDest&7)*8+(registerSrc&7)); +} + void x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegisterDest, sint32 xmmRegisterSrc) { // SSE2 @@ -610,7 +649,7 @@ void x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext_t* x64GenContext, sint32 xm { // SSE2 // converts two signed 32bit integers to two doubles - if( memReg == REG_RSP ) + if( memReg == X86_REG_RSP ) { x64Gen_writeU8(x64GenContext, 0x66); x64Gen_genSSEVEXPrefix1(x64GenContext, xmmRegisterDest, false); @@ -684,7 +723,7 @@ void x64Gen_rcpss_xmmReg_xmmReg(x64GenContext_t* x64GenContext, sint32 xmmRegist void x64Gen_mulss_xmmReg_memReg64(x64GenContext_t* x64GenContext, sint32 xmmRegister, sint32 memRegister, uint32 memImmU32) { // SSE2 - if( memRegister == REG_NONE ) + if( memRegister == X86_REG_NONE ) { assert_dbg(); } diff --git a/src/Cafe/HW/Espresso/Recompiler/x64Emit.hpp b/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp similarity index 99% rename from src/Cafe/HW/Espresso/Recompiler/x64Emit.hpp rename to src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp index e936f1d8..b4021931 100644 --- a/src/Cafe/HW/Espresso/Recompiler/x64Emit.hpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/X64Emit.hpp @@ -203,7 +203,6 @@ template void _x64Gen_writeMODRM_internal(x64GenContext_t* x64GenContext, TA opA, TB opB) { static_assert(TA::getType() == MODRM_OPR_TYPE::REG); - x64Gen_checkBuffer(x64GenContext); // REX prefix // 0100 WRXB if constexpr (TA::getType() == MODRM_OPR_TYPE::REG && TB::getType() == MODRM_OPR_TYPE::REG) diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h new file mode 100644 index 00000000..eae3835d --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/BackendX64/x86Emitter.h @@ -0,0 +1,4335 @@ +#pragma once + +// x86-64 assembler/emitter +// auto generated. Do not edit this file manually + +typedef unsigned long long u64; +typedef unsigned int u32; +typedef unsigned short u16; +typedef unsigned char u8; +typedef signed long long s64; +typedef signed int s32; +typedef signed short s16; +typedef signed char s8; + +enum X86Reg : sint8 +{ + X86_REG_NONE = -1, + X86_REG_EAX = 0, + X86_REG_ECX = 1, + X86_REG_EDX = 2, + X86_REG_EBX = 3, + X86_REG_ESP = 4, + X86_REG_EBP = 5, + X86_REG_ESI = 6, + X86_REG_EDI = 7, + X86_REG_R8D = 8, + X86_REG_R9D = 9, + X86_REG_R10D = 10, + X86_REG_R11D = 11, + X86_REG_R12D = 12, + X86_REG_R13D = 13, + X86_REG_R14D = 14, + X86_REG_R15D = 15, + X86_REG_RAX = 0, + X86_REG_RCX = 1, + X86_REG_RDX = 2, + X86_REG_RBX = 3, + X86_REG_RSP = 4, + X86_REG_RBP = 5, + X86_REG_RSI = 6, + X86_REG_RDI = 7, + X86_REG_R8 = 8, + X86_REG_R9 = 9, + X86_REG_R10 = 10, + X86_REG_R11 = 11, + X86_REG_R12 = 12, + X86_REG_R13 = 13, + X86_REG_R14 = 14, + X86_REG_R15 = 15 +}; + +enum X86Cond : u8 +{ + X86_CONDITION_O = 0, + X86_CONDITION_NO = 1, + X86_CONDITION_B = 2, + X86_CONDITION_NB = 3, + X86_CONDITION_Z = 4, + X86_CONDITION_NZ = 5, + X86_CONDITION_BE = 6, + X86_CONDITION_NBE = 7, + X86_CONDITION_S = 8, + X86_CONDITION_NS = 9, + X86_CONDITION_PE = 10, + X86_CONDITION_PO = 11, + X86_CONDITION_L = 12, + X86_CONDITION_NL = 13, + X86_CONDITION_LE = 14, + X86_CONDITION_NLE = 15 +}; +class x86Assembler64 +{ +private: + std::vector m_buffer; + +public: + u8* GetBufferPtr() { return m_buffer.data(); }; + std::span GetBuffer() { return m_buffer; }; + u32 GetWriteIndex() { return (u32)m_buffer.size(); }; + void _emitU8(u8 v) { m_buffer.emplace_back(v); }; + void _emitU16(u16 v) { size_t writeIdx = m_buffer.size(); m_buffer.resize(writeIdx + 2); *(u16*)(m_buffer.data() + writeIdx) = v; }; + void _emitU32(u32 v) { size_t writeIdx = m_buffer.size(); m_buffer.resize(writeIdx + 4); *(u32*)(m_buffer.data() + writeIdx) = v; }; + void _emitU64(u64 v) { size_t writeIdx = m_buffer.size(); m_buffer.resize(writeIdx + 8); *(u64*)(m_buffer.data() + writeIdx) = v; }; + using GPR64 = X86Reg; + using GPR32 = X86Reg; + using GPR8_REX = X86Reg; + void LockPrefix() { _emitU8(0xF0); }; + void ADD_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x00); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void ADD_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x00); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADD_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x02); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADD_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x01); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void ADD_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x01); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void ADD_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x01); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADD_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x01); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADD_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x03); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADD_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x03); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void OR_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x08); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void OR_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x08); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void OR_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x0a); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void OR_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x09); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void OR_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x09); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void OR_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x09); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void OR_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x09); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void OR_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x0b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void OR_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x0b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADC_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x10); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void ADC_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x10); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADC_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x12); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADC_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x11); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void ADC_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x11); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void ADC_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x11); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADC_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x11); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADC_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x13); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADC_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x13); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SBB_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x18); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void SBB_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x18); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SBB_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x1a); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SBB_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x19); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void SBB_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x19); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void SBB_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x19); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SBB_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x19); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SBB_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x1b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SBB_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x1b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void AND_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x20); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void AND_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x20); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void AND_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x22); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void AND_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x21); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void AND_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x21); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void AND_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x21); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void AND_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x21); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void AND_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x23); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void AND_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x23); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SUB_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x28); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void SUB_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x28); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SUB_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x2a); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SUB_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x29); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void SUB_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x29); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void SUB_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x29); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SUB_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x29); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SUB_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x2b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SUB_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x2b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x30); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void XOR_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x30); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x32); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x31); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void XOR_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x31); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void XOR_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x31); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x31); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x33); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XOR_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x33); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x38); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void CMP_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x38); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x3a); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x39); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void CMP_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x39); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void CMP_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x39); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x39); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x3b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMP_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x3b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void ADD_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void ADD_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void ADD_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void ADD_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void OR_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void OR_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void OR_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void OR_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void ADC_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void ADC_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void ADC_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void ADC_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void SBB_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void SBB_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void SBB_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void SBB_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void AND_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void AND_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void AND_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void AND_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void SUB_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void SUB_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void SUB_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void SUB_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void XOR_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void XOR_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void XOR_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void XOR_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void CMP_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x81); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void CMP_qi32(GPR64 dst, s32 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x81); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + _emitU32((u32)imm); + } + void CMP_di32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void CMP_qi32_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x81); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void ADD_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void ADD_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((0 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void ADD_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void ADD_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((0 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void OR_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void OR_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((1 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void OR_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void OR_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((1 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void ADC_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void ADC_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void ADC_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void ADC_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void SBB_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void SBB_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((3 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void SBB_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void SBB_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((3 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void AND_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void AND_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void AND_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void AND_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void SUB_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void SUB_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void SUB_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void SUB_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void XOR_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void XOR_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((6 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void XOR_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void XOR_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((6 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void CMP_di8(GPR32 dst, s8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x83); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void CMP_qi8(GPR64 dst, s8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x83); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void CMP_di8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void CMP_qi8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x83); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void TEST_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x84); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void TEST_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x84); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void TEST_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x85); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void TEST_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x85); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void TEST_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x85); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void TEST_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x85); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XCHG_bb(GPR8_REX dst, GPR8_REX src) + { + if ((dst >= 4) || (src >= 4)) + { + _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + } + _emitU8(0x86); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + } + void XCHG_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x86); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XCHG_dd(GPR32 dst, GPR32 src) + { + if (((dst & 8) != 0) || ((src & 8) != 0)) + { + _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + } + _emitU8(0x87); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + } + void XCHG_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + _emitU8(0x87); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + } + void XCHG_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x87); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void XCHG_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x87); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_bb(GPR8_REX dst, GPR8_REX src) + { + if ((src >= 4) || (dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x88); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void MOV_bb_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR8_REX src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src >= 4) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x88); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_bb_r(GPR8_REX dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst >= 4) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst >= 4) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x8a); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x89); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void MOV_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x89); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void MOV_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x89); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x89); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_dd_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x8b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_qq_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x8b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void MOV_di32(GPR32 dst, s32 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xb8 | ((dst) & 7)); + _emitU32((u32)imm); + } + void MOV_qi64(GPR64 dst, s64 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0xb8 | ((dst) & 7)); + _emitU64((u64)imm); + } + void CALL_q(GPR64 dst) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xff); + _emitU8((3 << 6) | ((2 & 7) << 3) | (dst & 7)); + } + void CALL_q_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xff); + _emitU8((mod << 6) | ((2 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void IMUL_ddi32(GPR32 dst, GPR32 src, s32 imm) + { + if (((dst & 8) != 0) || ((src & 8) != 0)) + { + _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + } + _emitU8(0x69); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + _emitU32((u32)imm); + } + void IMUL_qqi32(GPR64 dst, GPR64 src, s32 imm) + { + _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + _emitU8(0x69); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + _emitU32((u32)imm); + } + void IMUL_ddi32_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x69); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void IMUL_qqi32_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s32 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x69); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU32((u32)imm); + } + void IMUL_ddi8(GPR32 dst, GPR32 src, s8 imm) + { + if (((dst & 8) != 0) || ((src & 8) != 0)) + { + _emitU8(0x40 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + } + _emitU8(0x6b); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + _emitU8((u8)imm); + } + void IMUL_qqi8(GPR64 dst, GPR64 src, s8 imm) + { + _emitU8(0x48 | ((src & 8) >> 3) | ((dst & 8) >> 1)); + _emitU8(0x6b); + _emitU8((3 << 6) | ((dst & 7) << 3) | (src & 7)); + _emitU8((u8)imm); + } + void IMUL_ddi8_r(GPR32 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((dst & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((dst & 8) || (memReg & 8)) + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x6b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void IMUL_qqi8_r(GPR64 dst, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, s8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((dst & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x6b); + _emitU8((mod << 6) | ((dst & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void SHL_b_CL(GPR8_REX dst) + { + if ((dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xd2); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + } + void SHL_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xd2); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SHR_b_CL(GPR8_REX dst) + { + if ((dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xd2); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + } + void SHR_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xd2); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SAR_b_CL(GPR8_REX dst) + { + if ((dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xd2); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + } + void SAR_b_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xd2); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SHL_d_CL(GPR32 dst) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xd3); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + } + void SHL_q_CL(GPR64 dst) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0xd3); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + } + void SHL_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xd3); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SHL_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0xd3); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SHR_d_CL(GPR32 dst) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xd3); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + } + void SHR_q_CL(GPR64 dst) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0xd3); + _emitU8((3 << 6) | ((5 & 7) << 3) | (dst & 7)); + } + void SHR_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xd3); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SHR_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0xd3); + _emitU8((mod << 6) | ((5 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SAR_d_CL(GPR32 dst) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0xd3); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + } + void SAR_q_CL(GPR64 dst) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0xd3); + _emitU8((3 << 6) | ((7 & 7) << 3) | (dst & 7)); + } + void SAR_d_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0xd3); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void SAR_q_CL_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0xd3); + _emitU8((mod << 6) | ((7 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void JMP_j32(s32 imm) + { + _emitU8(0xe9); + _emitU32((u32)imm); + } + void Jcc_j32(X86Cond cond, s32 imm) + { + _emitU8(0x0f); + _emitU8(0x80 | (u8)cond); + _emitU32((u32)imm); + } + void SETcc_b(X86Cond cond, GPR8_REX dst) + { + if ((dst >= 4)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x0f); + _emitU8(0x90 | (u8)cond); + _emitU8((3 << 6) | (dst & 7)); + } + void SETcc_b_l(X86Cond cond, GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x0f); + _emitU8(0x90); + _emitU8((mod << 6) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMPXCHG_dd(GPR32 dst, GPR32 src) + { + if (((src & 8) != 0) || ((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + } + _emitU8(0x0f); + _emitU8(0xb1); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void CMPXCHG_qq(GPR64 dst, GPR64 src) + { + _emitU8(0x48 | ((dst & 8) >> 3) | ((src & 8) >> 1)); + _emitU8(0x0f); + _emitU8(0xb1); + _emitU8((3 << 6) | ((src & 7) << 3) | (dst & 7)); + } + void CMPXCHG_dd_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR32 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((src & 8) || (memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((src & 8) || (memReg & 8)) + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1)); + } + _emitU8(0x0f); + _emitU8(0xb1); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void CMPXCHG_qq_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, GPR64 src) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((src & 8) >> 1) | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x0f); + _emitU8(0xb1); + _emitU8((mod << 6) | ((src & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + } + void BSWAP_d(GPR32 dst) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x0f); + _emitU8(0xc8 | ((dst) & 7)); + } + void BSWAP_q(GPR64 dst) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x0f); + _emitU8(0xc8 | ((dst) & 7)); + } + void BT_du8(GPR32 dst, u8 imm) + { + if (((dst & 8) != 0)) + { + _emitU8(0x40 | ((dst & 8) >> 3)); + } + _emitU8(0x0f); + _emitU8(0xba); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void BT_qu8(GPR64 dst, u8 imm) + { + _emitU8(0x48 | ((dst & 8) >> 3)); + _emitU8(0x0f); + _emitU8(0xba); + _emitU8((3 << 6) | ((4 & 7) << 3) | (dst & 7)); + _emitU8((u8)imm); + } + void BT_du8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, u8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + if ((memReg & 8) || ((index != X86_REG_NONE) && (index & 8))) + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2)); + } + else + { + if ((memReg & 8)) + _emitU8(0x40 | ((memReg & 8) >> 1)); + } + _emitU8(0x0f); + _emitU8(0xba); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } + void BT_qu8_l(GPR64 memReg, sint32 offset, GPR64 index, uint8 scaler, u8 imm) + { + uint8 mod; + if (offset == 0 && (memReg & 7) != 5) mod = 0; + else if (offset == (s32)(s8)offset) mod = 1; + else mod = 2; + bool sib_use = (scaler != 0 && index != X86_REG_NONE); + if ((memReg & 7) == 4) + { + cemu_assert_debug(index == X86_REG_NONE); + index = memReg; + sib_use = true; + } + if (sib_use) + { + _emitU8(0x40 | ((memReg & 8) >> 3) | ((index & 8) >> 2) | 0x08); + } + else + { + _emitU8(0x40 | ((memReg & 8) >> 1) | 0x08); + } + _emitU8(0x0f); + _emitU8(0xba); + _emitU8((mod << 6) | ((4 & 7) << 3) | (sib_use ? 4 : (memReg & 7))); + if (sib_use) + { + _emitU8((0 << 6) | ((memReg & 7)) | ((index & 7) << 3)); + } + if (mod == 1) _emitU8((u8)offset); + else if (mod == 2) _emitU32((u32)offset); + _emitU8((u8)imm); + } +}; diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IML.h b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h new file mode 100644 index 00000000..bc0c27c5 --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IML.h @@ -0,0 +1,16 @@ +#pragma once + +#include "IMLInstruction.h" +#include "IMLSegment.h" + +// optimizer passes +void IMLOptimizer_OptimizeDirectFloatCopies(struct ppcImlGenContext_t* ppcImlGenContext); +void IMLOptimizer_OptimizeDirectIntegerCopies(struct ppcImlGenContext_t* ppcImlGenContext); +void PPCRecompiler_optimizePSQLoadAndStore(struct ppcImlGenContext_t* ppcImlGenContext); + +void IMLOptimizer_StandardOptimizationPass(ppcImlGenContext_t& ppcImlGenContext); + +// debug +void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& disassemblyLineOut); +void IMLDebug_DumpSegment(struct ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo = false); +void IMLDebug_Dump(struct ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo = false); diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp new file mode 100644 index 00000000..6ae4b591 --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLAnalyzer.cpp @@ -0,0 +1,5 @@ +#include "IML.h" +//#include "PPCRecompilerIml.h" +#include "util/helpers/fixedSizeList.h" + +#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h" diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp new file mode 100644 index 00000000..cd269869 --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLDebug.cpp @@ -0,0 +1,561 @@ +#include "IML.h" +#include "IMLInstruction.h" +#include "IMLSegment.h" +#include "IMLRegisterAllocatorRanges.h" +#include "util/helpers/StringBuf.h" + +#include "../PPCRecompiler.h" + +const char* IMLDebug_GetOpcodeName(const IMLInstruction* iml) +{ + static char _tempOpcodename[32]; + uint32 op = iml->operation; + if (op == PPCREC_IML_OP_ASSIGN) + return "MOV"; + else if (op == PPCREC_IML_OP_ADD) + return "ADD"; + else if (op == PPCREC_IML_OP_ADD_WITH_CARRY) + return "ADC"; + else if (op == PPCREC_IML_OP_SUB) + return "SUB"; + else if (op == PPCREC_IML_OP_OR) + return "OR"; + else if (op == PPCREC_IML_OP_AND) + return "AND"; + else if (op == PPCREC_IML_OP_XOR) + return "XOR"; + else if (op == PPCREC_IML_OP_LEFT_SHIFT) + return "LSH"; + else if (op == PPCREC_IML_OP_RIGHT_SHIFT_U) + return "RSH"; + else if (op == PPCREC_IML_OP_RIGHT_SHIFT_S) + return "ARSH"; + else if (op == PPCREC_IML_OP_LEFT_ROTATE) + return "LROT"; + else if (op == PPCREC_IML_OP_MULTIPLY_SIGNED) + return "MULS"; + else if (op == PPCREC_IML_OP_DIVIDE_SIGNED) + return "DIVS"; + else if (op == PPCREC_IML_OP_FPR_ASSIGN) + return "FMOV"; + else if (op == PPCREC_IML_OP_FPR_ADD) + return "FADD"; + else if (op == PPCREC_IML_OP_FPR_SUB) + return "FSUB"; + else if (op == PPCREC_IML_OP_FPR_MULTIPLY) + return "FMUL"; + else if (op == PPCREC_IML_OP_FPR_DIVIDE) + return "FDIV"; + else if (op == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64) + return "F32TOF64"; + else if (op == PPCREC_IML_OP_FPR_ABS) + return "FABS"; + else if (op == PPCREC_IML_OP_FPR_NEGATE) + return "FNEG"; + else if (op == PPCREC_IML_OP_FPR_NEGATIVE_ABS) + return "FNABS"; + else if (op == PPCREC_IML_OP_FPR_FLOAT_TO_INT) + return "F2I"; + else if (op == PPCREC_IML_OP_FPR_INT_TO_FLOAT) + return "I2F"; + else if (op == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT) + return "BITMOVE"; + + sprintf(_tempOpcodename, "OP0%02x_T%d", iml->operation, iml->type); + return _tempOpcodename; +} + +std::string IMLDebug_GetRegName(IMLReg r) +{ + std::string regName; + uint32 regId = r.GetRegID(); + switch (r.GetRegFormat()) + { + case IMLRegFormat::F32: + regName.append("f"); + break; + case IMLRegFormat::F64: + regName.append("fd"); + break; + case IMLRegFormat::I32: + regName.append("i"); + break; + case IMLRegFormat::I64: + regName.append("r"); + break; + default: + DEBUG_BREAK; + } + regName.append(fmt::format("{}", regId)); + return regName; +} + +void IMLDebug_AppendRegisterParam(StringBuf& strOutput, IMLReg virtualRegister, bool isLast = false) +{ + strOutput.add(IMLDebug_GetRegName(virtualRegister)); + if (!isLast) + strOutput.add(", "); +} + +void IMLDebug_AppendS32Param(StringBuf& strOutput, sint32 val, bool isLast = false) +{ + if (val < 0) + { + strOutput.add("-"); + val = -val; + } + strOutput.addFmt("0x{:08x}", val); + if (!isLast) + strOutput.add(", "); +} + +void IMLDebug_PrintLivenessRangeInfo(StringBuf& currentLineText, IMLSegment* imlSegment, sint32 offset) +{ + // pad to 70 characters + sint32 index = currentLineText.getLen(); + while (index < 70) + { + currentLineText.add(" "); + index++; + } + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + while (subrangeItr) + { + if (subrangeItr->interval.start.GetInstructionIndexEx() == offset) + { + if(subrangeItr->interval.start.IsInstructionIndex() && !subrangeItr->interval.start.IsOnInputEdge()) + currentLineText.add("."); + else + currentLineText.add("|"); + + currentLineText.addFmt("{:<4}", subrangeItr->GetVirtualRegister()); + } + else if (subrangeItr->interval.end.GetInstructionIndexEx() == offset) + { + if(subrangeItr->interval.end.IsInstructionIndex() && !subrangeItr->interval.end.IsOnOutputEdge()) + currentLineText.add("* "); + else + currentLineText.add("| "); + } + else if (subrangeItr->interval.ContainsInstructionIndexEx(offset)) + { + currentLineText.add("| "); + } + else + { + currentLineText.add(" "); + } + index += 5; + // next + subrangeItr = subrangeItr->link_allSegmentRanges.next; + } +} + +std::string IMLDebug_GetSegmentName(ppcImlGenContext_t* ctx, IMLSegment* seg) +{ + if (!ctx) + { + return ""; + } + // find segment index + for (size_t i = 0; i < ctx->segmentList2.size(); i++) + { + if (ctx->segmentList2[i] == seg) + { + return fmt::format("Seg{:04x}", i); + } + } + return ""; +} + +std::string IMLDebug_GetConditionName(IMLCondition cond) +{ + switch (cond) + { + case IMLCondition::EQ: + return "EQ"; + case IMLCondition::NEQ: + return "NEQ"; + case IMLCondition::UNSIGNED_GT: + return "UGT"; + case IMLCondition::UNSIGNED_LT: + return "ULT"; + case IMLCondition::SIGNED_GT: + return "SGT"; + case IMLCondition::SIGNED_LT: + return "SLT"; + default: + cemu_assert_unimplemented(); + } + return "ukn"; +} + +void IMLDebug_DisassembleInstruction(const IMLInstruction& inst, std::string& disassemblyLineOut) +{ + const sint32 lineOffsetParameters = 10;//18; + + StringBuf strOutput(1024); + strOutput.reset(); + if (inst.type == PPCREC_IML_TYPE_R_NAME || inst.type == PPCREC_IML_TYPE_NAME_R) + { + if (inst.type == PPCREC_IML_TYPE_R_NAME) + strOutput.add("R_NAME"); + else + strOutput.add("NAME_R"); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + + if(inst.type == PPCREC_IML_TYPE_R_NAME) + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR); + + strOutput.add("name_"); + if (inst.op_r_name.name >= PPCREC_NAME_R0 && inst.op_r_name.name < (PPCREC_NAME_R0 + 999)) + { + strOutput.addFmt("r{}", inst.op_r_name.name - PPCREC_NAME_R0); + } + if (inst.op_r_name.name >= PPCREC_NAME_FPR_HALF && inst.op_r_name.name < (PPCREC_NAME_FPR_HALF + 32*2)) + { + strOutput.addFmt("f{}", inst.op_r_name.name - ((PPCREC_NAME_FPR_HALF - inst.op_r_name.name)/2)); + if ((inst.op_r_name.name-PPCREC_NAME_FPR_HALF)&1) + strOutput.add(".ps1"); + else + strOutput.add(".ps0"); + } + else if (inst.op_r_name.name >= PPCREC_NAME_SPR0 && inst.op_r_name.name < (PPCREC_NAME_SPR0 + 999)) + { + strOutput.addFmt("spr{}", inst.op_r_name.name - PPCREC_NAME_SPR0); + } + else if (inst.op_r_name.name >= PPCREC_NAME_CR && inst.op_r_name.name <= PPCREC_NAME_CR_LAST) + strOutput.addFmt("cr{}", inst.op_r_name.name - PPCREC_NAME_CR); + else if (inst.op_r_name.name == PPCREC_NAME_XER_CA) + strOutput.add("xer.ca"); + else if (inst.op_r_name.name == PPCREC_NAME_XER_SO) + strOutput.add("xer.so"); + else if (inst.op_r_name.name == PPCREC_NAME_XER_OV) + strOutput.add("xer.ov"); + else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_EA) + strOutput.add("cpuReservation.ea"); + else if (inst.op_r_name.name == PPCREC_NAME_CPU_MEMRES_VAL) + strOutput.add("cpuReservation.value"); + else + { + strOutput.addFmt("name_ukn{}", inst.op_r_name.name); + } + if (inst.type != PPCREC_IML_TYPE_R_NAME) + { + strOutput.add(", "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_name.regR, true); + } + + } + else if (inst.type == PPCREC_IML_TYPE_R_R) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r.regA, true); + } + else if (inst.type == PPCREC_IML_TYPE_R_R_R) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r.regB, true); + } + else if (inst.type == PPCREC_IML_TYPE_R_R_R_CARRY) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regB); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_r_carry.regCarry, true); + } + else if (inst.type == PPCREC_IML_TYPE_COMPARE) + { + strOutput.add("CMP "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regB); + strOutput.addFmt("{}", IMLDebug_GetConditionName(inst.op_compare.cond)); + strOutput.add(" -> "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare.regR, true); + } + else if (inst.type == PPCREC_IML_TYPE_COMPARE_S32) + { + strOutput.add("CMP "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regA); + strOutput.addFmt("{}", inst.op_compare_s32.immS32); + strOutput.addFmt(", {}", IMLDebug_GetConditionName(inst.op_compare_s32.cond)); + strOutput.add(" -> "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_compare_s32.regR, true); + } + else if (inst.type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + strOutput.add("CJUMP "); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + IMLDebug_AppendRegisterParam(strOutput, inst.op_conditional_jump.registerBool, true); + if (!inst.op_conditional_jump.mustBeTrue) + strOutput.add("(inverted)"); + } + else if (inst.type == PPCREC_IML_TYPE_JUMP) + { + strOutput.add("JUMP"); + } + else if (inst.type == PPCREC_IML_TYPE_R_R_S32) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32.regA); + IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32.immS32, true); + } + else if (inst.type == PPCREC_IML_TYPE_R_R_S32_CARRY) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regR); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regA); + IMLDebug_AppendS32Param(strOutput, inst.op_r_r_s32_carry.immS32); + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_r_s32_carry.regCarry, true); + } + else if (inst.type == PPCREC_IML_TYPE_R_S32) + { + strOutput.addFmt("{}", IMLDebug_GetOpcodeName(&inst)); + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + + IMLDebug_AppendRegisterParam(strOutput, inst.op_r_immS32.regR); + IMLDebug_AppendS32Param(strOutput, inst.op_r_immS32.immS32, true); + } + else if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_STORE || + inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) + { + if (inst.type == PPCREC_IML_TYPE_LOAD || inst.type == PPCREC_IML_TYPE_LOAD_INDEXED) + strOutput.add("LD_"); + else + strOutput.add("ST_"); + + if (inst.op_storeLoad.flags2.signExtend) + strOutput.add("S"); + else + strOutput.add("U"); + strOutput.addFmt("{}", inst.op_storeLoad.copyWidth); + + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + + IMLDebug_AppendRegisterParam(strOutput, inst.op_storeLoad.registerData); + + if (inst.type == PPCREC_IML_TYPE_LOAD_INDEXED || inst.type == PPCREC_IML_TYPE_STORE_INDEXED) + strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), IMLDebug_GetRegName(inst.op_storeLoad.registerMem2)); + else + strOutput.addFmt("[{}+{}]", IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32); + } + else if (inst.type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) + { + strOutput.add("ATOMIC_ST_U32"); + + while ((sint32)strOutput.getLen() < lineOffsetParameters) + strOutput.add(" "); + + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regEA); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regCompareValue); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regWriteValue); + IMLDebug_AppendRegisterParam(strOutput, inst.op_atomic_compare_store.regBoolOut, true); + } + else if (inst.type == PPCREC_IML_TYPE_NO_OP) + { + strOutput.add("NOP"); + } + else if (inst.type == PPCREC_IML_TYPE_MACRO) + { + if (inst.operation == PPCREC_IML_MACRO_B_TO_REG) + { + strOutput.addFmt("MACRO B_TO_REG {}", IMLDebug_GetRegName(inst.op_macro.paramReg)); + } + else if (inst.operation == PPCREC_IML_MACRO_BL) + { + strOutput.addFmt("MACRO BL 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16); + } + else if (inst.operation == PPCREC_IML_MACRO_B_FAR) + { + strOutput.addFmt("MACRO B_FAR 0x{:08x} -> 0x{:08x} cycles (depr): {}", inst.op_macro.param, inst.op_macro.param2, (sint32)inst.op_macro.paramU16); + } + else if (inst.operation == PPCREC_IML_MACRO_LEAVE) + { + strOutput.addFmt("MACRO LEAVE ppc: 0x{:08x}", inst.op_macro.param); + } + else if (inst.operation == PPCREC_IML_MACRO_HLE) + { + strOutput.addFmt("MACRO HLE ppcAddr: 0x{:08x} funcId: 0x{:08x}", inst.op_macro.param, inst.op_macro.param2); + } + else if (inst.operation == PPCREC_IML_MACRO_COUNT_CYCLES) + { + strOutput.addFmt("MACRO COUNT_CYCLES cycles: {}", inst.op_macro.param); + } + else + { + strOutput.addFmt("MACRO ukn operation {}", inst.operation); + } + } + else if (inst.type == PPCREC_IML_TYPE_FPR_LOAD) + { + strOutput.addFmt("{} = ", IMLDebug_GetRegName(inst.op_storeLoad.registerData)); + if (inst.op_storeLoad.flags2.signExtend) + strOutput.add("S"); + else + strOutput.add("U"); + strOutput.addFmt("{} [{}+{}] mode {}", inst.op_storeLoad.copyWidth / 8, IMLDebug_GetRegName(inst.op_storeLoad.registerMem), inst.op_storeLoad.immS32, inst.op_storeLoad.mode); + if (inst.op_storeLoad.flags2.notExpanded) + { + strOutput.addFmt(" "); + } + } + else if (inst.type == PPCREC_IML_TYPE_FPR_STORE) + { + if (inst.op_storeLoad.flags2.signExtend) + strOutput.add("S"); + else + strOutput.add("U"); + strOutput.addFmt("{} [t{}+{}]", inst.op_storeLoad.copyWidth / 8, inst.op_storeLoad.registerMem.GetRegID(), inst.op_storeLoad.immS32); + strOutput.addFmt(" = {} mode {}", IMLDebug_GetRegName(inst.op_storeLoad.registerData), inst.op_storeLoad.mode); + } + else if (inst.type == PPCREC_IML_TYPE_FPR_R) + { + strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("{}", IMLDebug_GetRegName(inst.op_fpr_r.regR)); + } + else if (inst.type == PPCREC_IML_TYPE_FPR_R_R) + { + strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("{}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r.regA)); + } + else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R_R) + { + strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("{}, {}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regB), IMLDebug_GetRegName(inst.op_fpr_r_r_r_r.regC)); + } + else if (inst.type == PPCREC_IML_TYPE_FPR_R_R_R) + { + strOutput.addFmt("{:<6} ", IMLDebug_GetOpcodeName(&inst)); + strOutput.addFmt("{}, {}, {}", IMLDebug_GetRegName(inst.op_fpr_r_r_r.regR), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regA), IMLDebug_GetRegName(inst.op_fpr_r_r_r.regB)); + } + else if (inst.type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + { + strOutput.addFmt("CYCLE_CHECK"); + } + else if (inst.type == PPCREC_IML_TYPE_X86_EFLAGS_JCC) + { + strOutput.addFmt("X86_JCC {}", IMLDebug_GetConditionName(inst.op_x86_eflags_jcc.cond)); + } + else + { + strOutput.addFmt("Unknown iml type {}", inst.type); + } + disassemblyLineOut.assign(strOutput.c_str()); +} + +void IMLDebug_DumpSegment(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, bool printLivenessRangeInfo) +{ + StringBuf strOutput(4096); + + strOutput.addFmt("SEGMENT {} | PPC=0x{:08x} Loop-depth {}", IMLDebug_GetSegmentName(ctx, imlSegment), imlSegment->ppcAddress, imlSegment->loopDepth); + if (imlSegment->isEnterable) + { + strOutput.addFmt(" ENTERABLE (0x{:08x})", imlSegment->enterPPCAddress); + } + if (imlSegment->deadCodeEliminationHintSeg) + { + strOutput.addFmt(" InheritOverwrite: {}", IMLDebug_GetSegmentName(ctx, imlSegment->deadCodeEliminationHintSeg)); + } + cemuLog_log(LogType::Force, "{}", strOutput.c_str()); + + if (printLivenessRangeInfo) + { + strOutput.reset(); + IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START); + cemuLog_log(LogType::Force, "{}", strOutput.c_str()); + } + //debug_printf("\n"); + strOutput.reset(); + + std::string disassemblyLine; + for (sint32 i = 0; i < imlSegment->imlList.size(); i++) + { + const IMLInstruction& inst = imlSegment->imlList[i]; + // don't log NOP instructions + if (inst.type == PPCREC_IML_TYPE_NO_OP) + continue; + strOutput.reset(); + strOutput.addFmt("{:02x} ", i); + //cemuLog_log(LogType::Force, "{:02x} ", i); + disassemblyLine.clear(); + IMLDebug_DisassembleInstruction(inst, disassemblyLine); + strOutput.add(disassemblyLine); + if (printLivenessRangeInfo) + { + IMLDebug_PrintLivenessRangeInfo(strOutput, imlSegment, i); + } + cemuLog_log(LogType::Force, "{}", strOutput.c_str()); + } + // all ranges + if (printLivenessRangeInfo) + { + strOutput.reset(); + strOutput.add("Ranges-VirtReg "); + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + while (subrangeItr) + { + strOutput.addFmt("v{:<4}", (uint32)subrangeItr->GetVirtualRegister()); + subrangeItr = subrangeItr->link_allSegmentRanges.next; + } + cemuLog_log(LogType::Force, "{}", strOutput.c_str()); + strOutput.reset(); + strOutput.add("Ranges-PhysReg "); + subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + while (subrangeItr) + { + strOutput.addFmt("p{:<4}", subrangeItr->GetPhysicalRegister()); + subrangeItr = subrangeItr->link_allSegmentRanges.next; + } + cemuLog_log(LogType::Force, "{}", strOutput.c_str()); + } + // branch info + strOutput.reset(); + strOutput.add("Links from: "); + for (sint32 i = 0; i < imlSegment->list_prevSegments.size(); i++) + { + if (i) + strOutput.add(", "); + strOutput.addFmt("{}", IMLDebug_GetSegmentName(ctx, imlSegment->list_prevSegments[i]).c_str()); + } + cemuLog_log(LogType::Force, "{}", strOutput.c_str()); + if (imlSegment->nextSegmentBranchNotTaken) + cemuLog_log(LogType::Force, "BranchNotTaken: {}", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchNotTaken).c_str()); + if (imlSegment->nextSegmentBranchTaken) + cemuLog_log(LogType::Force, "BranchTaken: {}", IMLDebug_GetSegmentName(ctx, imlSegment->nextSegmentBranchTaken).c_str()); + if (imlSegment->nextSegmentIsUncertain) + cemuLog_log(LogType::Force, "Dynamic target"); +} + +void IMLDebug_Dump(ppcImlGenContext_t* ppcImlGenContext, bool printLivenessRangeInfo) +{ + for (size_t i = 0; i < ppcImlGenContext->segmentList2.size(); i++) + { + IMLDebug_DumpSegment(ppcImlGenContext, ppcImlGenContext->segmentList2[i], printLivenessRangeInfo); + cemuLog_log(LogType::Force, ""); + } +} diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp new file mode 100644 index 00000000..997de4e9 --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.cpp @@ -0,0 +1,536 @@ +#include "IMLInstruction.h" +#include "IML.h" + +#include "../PPCRecompiler.h" +#include "../PPCRecompilerIml.h" + +// return true if an instruction has side effects on top of just reading and writing registers +bool IMLInstruction::HasSideEffects() const +{ + bool hasSideEffects = true; + if(type == PPCREC_IML_TYPE_R_R || type == PPCREC_IML_TYPE_R_R_S32 || type == PPCREC_IML_TYPE_COMPARE || type == PPCREC_IML_TYPE_COMPARE_S32) + hasSideEffects = false; + // todo - add more cases + return hasSideEffects; +} + +void IMLInstruction::CheckRegisterUsage(IMLUsedRegisters* registersUsed) const +{ + registersUsed->readGPR1 = IMLREG_INVALID; + registersUsed->readGPR2 = IMLREG_INVALID; + registersUsed->readGPR3 = IMLREG_INVALID; + registersUsed->readGPR4 = IMLREG_INVALID; + registersUsed->writtenGPR1 = IMLREG_INVALID; + registersUsed->writtenGPR2 = IMLREG_INVALID; + if (type == PPCREC_IML_TYPE_R_NAME) + { + registersUsed->writtenGPR1 = op_r_name.regR; + } + else if (type == PPCREC_IML_TYPE_NAME_R) + { + registersUsed->readGPR1 = op_r_name.regR; + } + else if (type == PPCREC_IML_TYPE_R_R) + { + if (operation == PPCREC_IML_OP_X86_CMP) + { + // both operands are read only + registersUsed->readGPR1 = op_r_r.regR; + registersUsed->readGPR2 = op_r_r.regA; + } + else if ( + operation == PPCREC_IML_OP_ASSIGN || + operation == PPCREC_IML_OP_ENDIAN_SWAP || + operation == PPCREC_IML_OP_CNTLZW || + operation == PPCREC_IML_OP_NOT || + operation == PPCREC_IML_OP_NEG || + operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32 || + operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32) + { + // result is written, operand is read + registersUsed->writtenGPR1 = op_r_r.regR; + registersUsed->readGPR1 = op_r_r.regA; + } + else + cemu_assert_unimplemented(); + } + else if (type == PPCREC_IML_TYPE_R_S32) + { + cemu_assert_debug(operation != PPCREC_IML_OP_ADD && + operation != PPCREC_IML_OP_SUB && + operation != PPCREC_IML_OP_AND && + operation != PPCREC_IML_OP_OR && + operation != PPCREC_IML_OP_XOR); // deprecated, use r_r_s32 for these + + if (operation == PPCREC_IML_OP_LEFT_ROTATE) + { + // register operand is read and write + registersUsed->readGPR1 = op_r_immS32.regR; + registersUsed->writtenGPR1 = op_r_immS32.regR; + } + else if (operation == PPCREC_IML_OP_X86_CMP) + { + // register operand is read only + registersUsed->readGPR1 = op_r_immS32.regR; + } + else + { + // register operand is write only + // todo - use explicit lists, avoid default cases + registersUsed->writtenGPR1 = op_r_immS32.regR; + } + } + else if (type == PPCREC_IML_TYPE_R_R_S32) + { + registersUsed->writtenGPR1 = op_r_r_s32.regR; + registersUsed->readGPR1 = op_r_r_s32.regA; + } + else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY) + { + registersUsed->writtenGPR1 = op_r_r_s32_carry.regR; + registersUsed->readGPR1 = op_r_r_s32_carry.regA; + // some operations read carry + switch (operation) + { + case PPCREC_IML_OP_ADD_WITH_CARRY: + registersUsed->readGPR2 = op_r_r_s32_carry.regCarry; + break; + case PPCREC_IML_OP_ADD: + break; + default: + cemu_assert_unimplemented(); + } + // carry is always written + registersUsed->writtenGPR2 = op_r_r_s32_carry.regCarry; + } + else if (type == PPCREC_IML_TYPE_R_R_R) + { + // in all cases result is written and other operands are read only + // with the exception of XOR, where if regA == regB then all bits are zeroed out. So we don't consider it a read + registersUsed->writtenGPR1 = op_r_r_r.regR; + if(!(operation == PPCREC_IML_OP_XOR && op_r_r_r.regA == op_r_r_r.regB)) + { + registersUsed->readGPR1 = op_r_r_r.regA; + registersUsed->readGPR2 = op_r_r_r.regB; + } + } + else if (type == PPCREC_IML_TYPE_R_R_R_CARRY) + { + registersUsed->writtenGPR1 = op_r_r_r_carry.regR; + registersUsed->readGPR1 = op_r_r_r_carry.regA; + registersUsed->readGPR2 = op_r_r_r_carry.regB; + // some operations read carry + switch (operation) + { + case PPCREC_IML_OP_ADD_WITH_CARRY: + registersUsed->readGPR3 = op_r_r_r_carry.regCarry; + break; + case PPCREC_IML_OP_ADD: + break; + default: + cemu_assert_unimplemented(); + } + // carry is always written + registersUsed->writtenGPR2 = op_r_r_r_carry.regCarry; + } + else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + { + // no effect on registers + } + else if (type == PPCREC_IML_TYPE_NO_OP) + { + // no effect on registers + } + else if (type == PPCREC_IML_TYPE_MACRO) + { + if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_COUNT_CYCLES || operation == PPCREC_IML_MACRO_HLE) + { + // no effect on registers + } + else if (operation == PPCREC_IML_MACRO_B_TO_REG) + { + cemu_assert_debug(op_macro.paramReg.IsValid()); + registersUsed->readGPR1 = op_macro.paramReg; + } + else + cemu_assert_unimplemented(); + } + else if (type == PPCREC_IML_TYPE_COMPARE) + { + registersUsed->readGPR1 = op_compare.regA; + registersUsed->readGPR2 = op_compare.regB; + registersUsed->writtenGPR1 = op_compare.regR; + } + else if (type == PPCREC_IML_TYPE_COMPARE_S32) + { + registersUsed->readGPR1 = op_compare_s32.regA; + registersUsed->writtenGPR1 = op_compare_s32.regR; + } + else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + registersUsed->readGPR1 = op_conditional_jump.registerBool; + } + else if (type == PPCREC_IML_TYPE_JUMP) + { + // no registers affected + } + else if (type == PPCREC_IML_TYPE_LOAD) + { + registersUsed->writtenGPR1 = op_storeLoad.registerData; + if (op_storeLoad.registerMem.IsValid()) + registersUsed->readGPR1 = op_storeLoad.registerMem; + } + else if (type == PPCREC_IML_TYPE_LOAD_INDEXED) + { + registersUsed->writtenGPR1 = op_storeLoad.registerData; + if (op_storeLoad.registerMem.IsValid()) + registersUsed->readGPR1 = op_storeLoad.registerMem; + if (op_storeLoad.registerMem2.IsValid()) + registersUsed->readGPR2 = op_storeLoad.registerMem2; + } + else if (type == PPCREC_IML_TYPE_STORE) + { + registersUsed->readGPR1 = op_storeLoad.registerData; + if (op_storeLoad.registerMem.IsValid()) + registersUsed->readGPR2 = op_storeLoad.registerMem; + } + else if (type == PPCREC_IML_TYPE_STORE_INDEXED) + { + registersUsed->readGPR1 = op_storeLoad.registerData; + if (op_storeLoad.registerMem.IsValid()) + registersUsed->readGPR2 = op_storeLoad.registerMem; + if (op_storeLoad.registerMem2.IsValid()) + registersUsed->readGPR3 = op_storeLoad.registerMem2; + } + else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) + { + registersUsed->readGPR1 = op_atomic_compare_store.regEA; + registersUsed->readGPR2 = op_atomic_compare_store.regCompareValue; + registersUsed->readGPR3 = op_atomic_compare_store.regWriteValue; + registersUsed->writtenGPR1 = op_atomic_compare_store.regBoolOut; + } + else if (type == PPCREC_IML_TYPE_CALL_IMM) + { + if (op_call_imm.regParam0.IsValid()) + registersUsed->readGPR1 = op_call_imm.regParam0; + if (op_call_imm.regParam1.IsValid()) + registersUsed->readGPR2 = op_call_imm.regParam1; + if (op_call_imm.regParam2.IsValid()) + registersUsed->readGPR3 = op_call_imm.regParam2; + registersUsed->writtenGPR1 = op_call_imm.regReturn; + } + else if (type == PPCREC_IML_TYPE_FPR_LOAD) + { + // fpr load operation + registersUsed->writtenGPR1 = op_storeLoad.registerData; + // address is in gpr register + if (op_storeLoad.registerMem.IsValid()) + registersUsed->readGPR1 = op_storeLoad.registerMem; + } + else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) + { + // fpr load operation + registersUsed->writtenGPR1 = op_storeLoad.registerData; + // address is in gpr registers + if (op_storeLoad.registerMem.IsValid()) + registersUsed->readGPR1 = op_storeLoad.registerMem; + if (op_storeLoad.registerMem2.IsValid()) + registersUsed->readGPR2 = op_storeLoad.registerMem2; + } + else if (type == PPCREC_IML_TYPE_FPR_STORE) + { + // fpr store operation + registersUsed->readGPR1 = op_storeLoad.registerData; + if (op_storeLoad.registerMem.IsValid()) + registersUsed->readGPR2 = op_storeLoad.registerMem; + } + else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) + { + // fpr store operation + registersUsed->readGPR1 = op_storeLoad.registerData; + // address is in gpr registers + if (op_storeLoad.registerMem.IsValid()) + registersUsed->readGPR2 = op_storeLoad.registerMem; + if (op_storeLoad.registerMem2.IsValid()) + registersUsed->readGPR3 = op_storeLoad.registerMem2; + } + else if (type == PPCREC_IML_TYPE_FPR_R_R) + { + // fpr operation + if ( + operation == PPCREC_IML_OP_FPR_ASSIGN || + operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64 || + operation == PPCREC_IML_OP_FPR_FCTIWZ + ) + { + registersUsed->readGPR1 = op_fpr_r_r.regA; + registersUsed->writtenGPR1 = op_fpr_r_r.regR; + } + else if (operation == PPCREC_IML_OP_FPR_MULTIPLY || + operation == PPCREC_IML_OP_FPR_DIVIDE || + operation == PPCREC_IML_OP_FPR_ADD || + operation == PPCREC_IML_OP_FPR_SUB) + { + registersUsed->readGPR1 = op_fpr_r_r.regA; + registersUsed->readGPR2 = op_fpr_r_r.regR; + registersUsed->writtenGPR1 = op_fpr_r_r.regR; + + } + else if (operation == PPCREC_IML_OP_FPR_FLOAT_TO_INT || + operation == PPCREC_IML_OP_FPR_INT_TO_FLOAT || + operation == PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT) + { + registersUsed->writtenGPR1 = op_fpr_r_r.regR; + registersUsed->readGPR1 = op_fpr_r_r.regA; + } + else + cemu_assert_unimplemented(); + } + else if (type == PPCREC_IML_TYPE_FPR_R_R_R) + { + // fpr operation + registersUsed->readGPR1 = op_fpr_r_r_r.regA; + registersUsed->readGPR2 = op_fpr_r_r_r.regB; + registersUsed->writtenGPR1 = op_fpr_r_r_r.regR; + } + else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R) + { + // fpr operation + registersUsed->readGPR1 = op_fpr_r_r_r_r.regA; + registersUsed->readGPR2 = op_fpr_r_r_r_r.regB; + registersUsed->readGPR3 = op_fpr_r_r_r_r.regC; + registersUsed->writtenGPR1 = op_fpr_r_r_r_r.regR; + } + else if (type == PPCREC_IML_TYPE_FPR_R) + { + // fpr operation + if (operation == PPCREC_IML_OP_FPR_NEGATE || + operation == PPCREC_IML_OP_FPR_ABS || + operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS || + operation == PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64 || + operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM) + { + registersUsed->readGPR1 = op_fpr_r.regR; + registersUsed->writtenGPR1 = op_fpr_r.regR; + } + else if (operation == PPCREC_IML_OP_FPR_LOAD_ONE) + { + registersUsed->writtenGPR1 = op_fpr_r.regR; + } + else + cemu_assert_unimplemented(); + } + else if (type == PPCREC_IML_TYPE_FPR_COMPARE) + { + registersUsed->writtenGPR1 = op_fpr_compare.regR; + registersUsed->readGPR1 = op_fpr_compare.regA; + registersUsed->readGPR2 = op_fpr_compare.regB; + } + else if (type == PPCREC_IML_TYPE_X86_EFLAGS_JCC) + { + // no registers read or written (except for the implicit eflags) + } + else + { + cemu_assert_unimplemented(); + } +} + +IMLReg replaceRegisterIdMultiple(IMLReg reg, const std::unordered_map& translationTable) +{ + if (reg.IsInvalid()) + return reg; + const auto& it = translationTable.find(reg.GetRegID()); + cemu_assert_debug(it != translationTable.cend()); + IMLReg alteredReg = reg; + alteredReg.SetRegID(it->second); + return alteredReg; +} + +void IMLInstruction::RewriteGPR(const std::unordered_map& translationTable) +{ + if (type == PPCREC_IML_TYPE_R_NAME) + { + op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable); + } + else if (type == PPCREC_IML_TYPE_NAME_R) + { + op_r_name.regR = replaceRegisterIdMultiple(op_r_name.regR, translationTable); + } + else if (type == PPCREC_IML_TYPE_R_R) + { + op_r_r.regR = replaceRegisterIdMultiple(op_r_r.regR, translationTable); + op_r_r.regA = replaceRegisterIdMultiple(op_r_r.regA, translationTable); + } + else if (type == PPCREC_IML_TYPE_R_S32) + { + op_r_immS32.regR = replaceRegisterIdMultiple(op_r_immS32.regR, translationTable); + } + else if (type == PPCREC_IML_TYPE_R_R_S32) + { + op_r_r_s32.regR = replaceRegisterIdMultiple(op_r_r_s32.regR, translationTable); + op_r_r_s32.regA = replaceRegisterIdMultiple(op_r_r_s32.regA, translationTable); + } + else if (type == PPCREC_IML_TYPE_R_R_S32_CARRY) + { + op_r_r_s32_carry.regR = replaceRegisterIdMultiple(op_r_r_s32_carry.regR, translationTable); + op_r_r_s32_carry.regA = replaceRegisterIdMultiple(op_r_r_s32_carry.regA, translationTable); + op_r_r_s32_carry.regCarry = replaceRegisterIdMultiple(op_r_r_s32_carry.regCarry, translationTable); + } + else if (type == PPCREC_IML_TYPE_R_R_R) + { + op_r_r_r.regR = replaceRegisterIdMultiple(op_r_r_r.regR, translationTable); + op_r_r_r.regA = replaceRegisterIdMultiple(op_r_r_r.regA, translationTable); + op_r_r_r.regB = replaceRegisterIdMultiple(op_r_r_r.regB, translationTable); + } + else if (type == PPCREC_IML_TYPE_R_R_R_CARRY) + { + op_r_r_r_carry.regR = replaceRegisterIdMultiple(op_r_r_r_carry.regR, translationTable); + op_r_r_r_carry.regA = replaceRegisterIdMultiple(op_r_r_r_carry.regA, translationTable); + op_r_r_r_carry.regB = replaceRegisterIdMultiple(op_r_r_r_carry.regB, translationTable); + op_r_r_r_carry.regCarry = replaceRegisterIdMultiple(op_r_r_r_carry.regCarry, translationTable); + } + else if (type == PPCREC_IML_TYPE_COMPARE) + { + op_compare.regR = replaceRegisterIdMultiple(op_compare.regR, translationTable); + op_compare.regA = replaceRegisterIdMultiple(op_compare.regA, translationTable); + op_compare.regB = replaceRegisterIdMultiple(op_compare.regB, translationTable); + } + else if (type == PPCREC_IML_TYPE_COMPARE_S32) + { + op_compare_s32.regR = replaceRegisterIdMultiple(op_compare_s32.regR, translationTable); + op_compare_s32.regA = replaceRegisterIdMultiple(op_compare_s32.regA, translationTable); + } + else if (type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + op_conditional_jump.registerBool = replaceRegisterIdMultiple(op_conditional_jump.registerBool, translationTable); + } + else if (type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || type == PPCREC_IML_TYPE_JUMP) + { + // no effect on registers + } + else if (type == PPCREC_IML_TYPE_NO_OP) + { + // no effect on registers + } + else if (type == PPCREC_IML_TYPE_MACRO) + { + if (operation == PPCREC_IML_MACRO_BL || operation == PPCREC_IML_MACRO_B_FAR || operation == PPCREC_IML_MACRO_LEAVE || operation == PPCREC_IML_MACRO_DEBUGBREAK || operation == PPCREC_IML_MACRO_HLE || operation == PPCREC_IML_MACRO_COUNT_CYCLES) + { + // no effect on registers + } + else if (operation == PPCREC_IML_MACRO_B_TO_REG) + { + op_macro.paramReg = replaceRegisterIdMultiple(op_macro.paramReg, translationTable); + } + else + { + cemu_assert_unimplemented(); + } + } + else if (type == PPCREC_IML_TYPE_LOAD) + { + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + if (op_storeLoad.registerMem.IsValid()) + { + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + } + } + else if (type == PPCREC_IML_TYPE_LOAD_INDEXED) + { + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + if (op_storeLoad.registerMem.IsValid()) + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + if (op_storeLoad.registerMem2.IsValid()) + op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); + } + else if (type == PPCREC_IML_TYPE_STORE) + { + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + if (op_storeLoad.registerMem.IsValid()) + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + } + else if (type == PPCREC_IML_TYPE_STORE_INDEXED) + { + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + if (op_storeLoad.registerMem.IsValid()) + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + if (op_storeLoad.registerMem2.IsValid()) + op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); + } + else if (type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) + { + op_atomic_compare_store.regEA = replaceRegisterIdMultiple(op_atomic_compare_store.regEA, translationTable); + op_atomic_compare_store.regCompareValue = replaceRegisterIdMultiple(op_atomic_compare_store.regCompareValue, translationTable); + op_atomic_compare_store.regWriteValue = replaceRegisterIdMultiple(op_atomic_compare_store.regWriteValue, translationTable); + op_atomic_compare_store.regBoolOut = replaceRegisterIdMultiple(op_atomic_compare_store.regBoolOut, translationTable); + } + else if (type == PPCREC_IML_TYPE_CALL_IMM) + { + op_call_imm.regReturn = replaceRegisterIdMultiple(op_call_imm.regReturn, translationTable); + if (op_call_imm.regParam0.IsValid()) + op_call_imm.regParam0 = replaceRegisterIdMultiple(op_call_imm.regParam0, translationTable); + if (op_call_imm.regParam1.IsValid()) + op_call_imm.regParam1 = replaceRegisterIdMultiple(op_call_imm.regParam1, translationTable); + if (op_call_imm.regParam2.IsValid()) + op_call_imm.regParam2 = replaceRegisterIdMultiple(op_call_imm.regParam2, translationTable); + } + else if (type == PPCREC_IML_TYPE_FPR_LOAD) + { + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + } + else if (type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) + { + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); + } + else if (type == PPCREC_IML_TYPE_FPR_STORE) + { + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + } + else if (type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) + { + op_storeLoad.registerData = replaceRegisterIdMultiple(op_storeLoad.registerData, translationTable); + op_storeLoad.registerMem = replaceRegisterIdMultiple(op_storeLoad.registerMem, translationTable); + op_storeLoad.registerMem2 = replaceRegisterIdMultiple(op_storeLoad.registerMem2, translationTable); + } + else if (type == PPCREC_IML_TYPE_FPR_R) + { + op_fpr_r.regR = replaceRegisterIdMultiple(op_fpr_r.regR, translationTable); + } + else if (type == PPCREC_IML_TYPE_FPR_R_R) + { + op_fpr_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r.regR, translationTable); + op_fpr_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r.regA, translationTable); + } + else if (type == PPCREC_IML_TYPE_FPR_R_R_R) + { + op_fpr_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r.regR, translationTable); + op_fpr_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r.regA, translationTable); + op_fpr_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r.regB, translationTable); + } + else if (type == PPCREC_IML_TYPE_FPR_R_R_R_R) + { + op_fpr_r_r_r_r.regR = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regR, translationTable); + op_fpr_r_r_r_r.regA = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regA, translationTable); + op_fpr_r_r_r_r.regB = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regB, translationTable); + op_fpr_r_r_r_r.regC = replaceRegisterIdMultiple(op_fpr_r_r_r_r.regC, translationTable); + } + else if (type == PPCREC_IML_TYPE_FPR_COMPARE) + { + op_fpr_compare.regA = replaceRegisterIdMultiple(op_fpr_compare.regA, translationTable); + op_fpr_compare.regB = replaceRegisterIdMultiple(op_fpr_compare.regB, translationTable); + op_fpr_compare.regR = replaceRegisterIdMultiple(op_fpr_compare.regR, translationTable); + } + else if (type == PPCREC_IML_TYPE_X86_EFLAGS_JCC) + { + // no registers read or written (except for the implicit eflags) + } + else + { + cemu_assert_unimplemented(); + } +} diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h new file mode 100644 index 00000000..4df2a666 --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h @@ -0,0 +1,826 @@ +#pragma once + +using IMLRegID = uint16; // 16 bit ID +using IMLPhysReg = sint32; // arbitrary value that is up to the architecture backend, usually this will be the register index. A value of -1 is reserved and means not assigned + +// format of IMLReg: +// 0-15 (16 bit) IMLRegID +// 19-23 (5 bit) Offset In elements, for SIMD registers +// 24-27 (4 bit) IMLRegFormat RegFormat +// 28-31 (4 bit) IMLRegFormat BaseFormat + +enum class IMLRegFormat : uint8 +{ + INVALID_FORMAT, + I64, + I32, + I16, + I8, + // I1 ? + F64, + F32, + TYPE_COUNT, +}; + +class IMLReg +{ +public: + IMLReg() + { + m_raw = 0; // 0 is invalid + } + + IMLReg(IMLRegFormat baseRegFormat, IMLRegFormat regFormat, uint8 viewOffset, IMLRegID regId) + { + m_raw = 0; + m_raw |= ((uint8)baseRegFormat << 28); + m_raw |= ((uint8)regFormat << 24); + m_raw |= (uint32)regId; + } + + IMLReg(IMLReg&& baseReg, IMLRegFormat viewFormat, uint8 viewOffset, IMLRegID regId) + { + DEBUG_BREAK; + //m_raw = 0; + //m_raw |= ((uint8)baseRegFormat << 28); + //m_raw |= ((uint8)viewFormat << 24); + //m_raw |= (uint32)regId; + } + + IMLReg(const IMLReg& other) : m_raw(other.m_raw) {} + + IMLRegFormat GetBaseFormat() const + { + return (IMLRegFormat)((m_raw >> 28) & 0xF); + } + + IMLRegFormat GetRegFormat() const + { + return (IMLRegFormat)((m_raw >> 24) & 0xF); + } + + IMLRegID GetRegID() const + { + cemu_assert_debug(GetBaseFormat() != IMLRegFormat::INVALID_FORMAT); + cemu_assert_debug(GetRegFormat() != IMLRegFormat::INVALID_FORMAT); + return (IMLRegID)(m_raw & 0xFFFF); + } + + void SetRegID(IMLRegID regId) + { + cemu_assert_debug(regId <= 0xFFFF); + m_raw &= ~0xFFFF; + m_raw |= (uint32)regId; + } + + bool IsInvalid() const + { + return GetBaseFormat() == IMLRegFormat::INVALID_FORMAT; + } + + bool IsValid() const + { + return GetBaseFormat() != IMLRegFormat::INVALID_FORMAT; + } + + bool IsValidAndSameRegID(IMLRegID regId) const + { + return IsValid() && GetRegID() == regId; + } + + // compare all fields + bool operator==(const IMLReg& other) const + { + return m_raw == other.m_raw; + } + +private: + uint32 m_raw; +}; + +static const IMLReg IMLREG_INVALID(IMLRegFormat::INVALID_FORMAT, IMLRegFormat::INVALID_FORMAT, 0, 0); +static const IMLRegID IMLRegID_INVALID(0xFFFF); + +using IMLName = uint32; + +enum +{ + PPCREC_IML_OP_ASSIGN, // '=' operator + PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap + PPCREC_IML_OP_MULTIPLY_SIGNED, // '*' operator (signed multiply) + PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, // unsigned 64bit multiply, store only high 32bit-word of result + PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, // signed 64bit multiply, store only high 32bit-word of result + PPCREC_IML_OP_DIVIDE_SIGNED, // '/' operator (signed divide) + PPCREC_IML_OP_DIVIDE_UNSIGNED, // '/' operator (unsigned divide) + + // binary operation + PPCREC_IML_OP_OR, // '|' operator + PPCREC_IML_OP_AND, // '&' operator + PPCREC_IML_OP_XOR, // '^' operator + PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator + PPCREC_IML_OP_LEFT_SHIFT, // shift left operator + PPCREC_IML_OP_RIGHT_SHIFT_U, // right shift operator (unsigned) + PPCREC_IML_OP_RIGHT_SHIFT_S, // right shift operator (signed) + // ppc + PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits) + PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits) + PPCREC_IML_OP_CNTLZW, + // FPU + PPCREC_IML_OP_FPR_ASSIGN, + PPCREC_IML_OP_FPR_LOAD_ONE, // load constant 1.0 into register + PPCREC_IML_OP_FPR_ADD, + PPCREC_IML_OP_FPR_SUB, + PPCREC_IML_OP_FPR_MULTIPLY, + PPCREC_IML_OP_FPR_DIVIDE, + PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, // expand f32 to f64 in-place + PPCREC_IML_OP_FPR_NEGATE, + PPCREC_IML_OP_FPR_ABS, // abs(fpr) + PPCREC_IML_OP_FPR_NEGATIVE_ABS, // -abs(fpr) + PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, // round 64bit double to 64bit double with 32bit float precision (in bottom half of xmm register) + PPCREC_IML_OP_FPR_FCTIWZ, + PPCREC_IML_OP_FPR_SELECT, // selectively copy bottom value from operand B or C based on value in operand A + // Conversion (FPR_R_R) + PPCREC_IML_OP_FPR_INT_TO_FLOAT, // convert integer value in gpr to floating point value in fpr + PPCREC_IML_OP_FPR_FLOAT_TO_INT, // convert floating point value in fpr to integer value in gpr + + // Bitcast (FPR_R_R) + PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT, + + // R_R_R + R_R_S32 + PPCREC_IML_OP_ADD, // also R_R_R_CARRY + PPCREC_IML_OP_SUB, + + // R_R only + PPCREC_IML_OP_NOT, + PPCREC_IML_OP_NEG, + PPCREC_IML_OP_ASSIGN_S16_TO_S32, + PPCREC_IML_OP_ASSIGN_S8_TO_S32, + + // R_R_R_carry + PPCREC_IML_OP_ADD_WITH_CARRY, // similar to ADD but also adds carry bit (0 or 1) + + // X86 extension + PPCREC_IML_OP_X86_CMP, // R_R and R_S32 + + PPCREC_IML_OP_INVALID +}; + +#define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN) + +enum +{ + PPCREC_IML_MACRO_B_TO_REG, // branch to PPC address in register (used for BCCTR, BCLR) + + PPCREC_IML_MACRO_BL, // call to different function (can be within same function) + PPCREC_IML_MACRO_B_FAR, // branch to different function + PPCREC_IML_MACRO_COUNT_CYCLES, // decrease current remaining thread cycles by a certain amount + PPCREC_IML_MACRO_HLE, // HLE function call + PPCREC_IML_MACRO_LEAVE, // leaves recompiler and switches to interpeter + // debugging + PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak +}; + +enum class IMLCondition : uint8 +{ + EQ, + NEQ, + SIGNED_GT, + SIGNED_LT, + UNSIGNED_GT, + UNSIGNED_LT, + + // floating point conditions + UNORDERED_GT, // a > b, false if either is NaN + UNORDERED_LT, // a < b, false if either is NaN + UNORDERED_EQ, // a == b, false if either is NaN + UNORDERED_U, // unordered (true if either operand is NaN) + + ORDERED_GT, + ORDERED_LT, + ORDERED_EQ, + ORDERED_U +}; + +enum +{ + PPCREC_IML_TYPE_NONE, + PPCREC_IML_TYPE_NO_OP, // no-op instruction + PPCREC_IML_TYPE_R_R, // r* = (op) *r (can also be r* (op) *r) + PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r* + PPCREC_IML_TYPE_R_R_R_CARRY, // r* = r* (op) r* (reads and/or updates carry) + PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32* + PPCREC_IML_TYPE_R_R_S32_CARRY, // r* = r* (op) s32* (reads and/or updates carry) + PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*] + PPCREC_IML_TYPE_LOAD_INDEXED, // r* = [r*+r*] + PPCREC_IML_TYPE_STORE, // [r*+s32*] = r* + PPCREC_IML_TYPE_STORE_INDEXED, // [r*+r*] = r* + PPCREC_IML_TYPE_R_NAME, // r* = name + PPCREC_IML_TYPE_NAME_R, // name* = r* + PPCREC_IML_TYPE_R_S32, // r* (op) imm + PPCREC_IML_TYPE_MACRO, + PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles < 0 + + // conditions and branches + PPCREC_IML_TYPE_COMPARE, // r* = r* CMP[cond] r* + PPCREC_IML_TYPE_COMPARE_S32, // r* = r* CMP[cond] imm + PPCREC_IML_TYPE_JUMP, // jump always + PPCREC_IML_TYPE_CONDITIONAL_JUMP, // jump conditionally based on boolean value in register + + // atomic + PPCREC_IML_TYPE_ATOMIC_CMP_STORE, + + // function call + PPCREC_IML_TYPE_CALL_IMM, // call to fixed immediate address + + // FPR + PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode) + PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode) + PPCREC_IML_TYPE_FPR_STORE, // (bitdepth) [r*+s32*] = r* (single or paired single mode) + PPCREC_IML_TYPE_FPR_STORE_INDEXED, // (bitdepth) [r*+r*] = r* (single or paired single mode) + PPCREC_IML_TYPE_FPR_R_R, + PPCREC_IML_TYPE_FPR_R_R_R, + PPCREC_IML_TYPE_FPR_R_R_R_R, + PPCREC_IML_TYPE_FPR_R, + + PPCREC_IML_TYPE_FPR_COMPARE, // r* = r* CMP[cond] r* + + // X86 specific + PPCREC_IML_TYPE_X86_EFLAGS_JCC, +}; + +enum // IMLName +{ + PPCREC_NAME_NONE, + PPCREC_NAME_TEMPORARY = 1000, + PPCREC_NAME_R0 = 2000, + PPCREC_NAME_SPR0 = 3000, + PPCREC_NAME_FPR_HALF = 4800, // Counts PS0 and PS1 separately. E.g. fp3.ps1 is at offset 3 * 2 + 1 + PPCREC_NAME_TEMPORARY_FPR0 = 5000, // 0 to 7 + PPCREC_NAME_XER_CA = 6000, // carry bit from XER + PPCREC_NAME_XER_OV = 6001, // overflow bit from XER + PPCREC_NAME_XER_SO = 6002, // summary overflow bit from XER + PPCREC_NAME_CR = 7000, // CR register bits (31 to 0) + PPCREC_NAME_CR_LAST = PPCREC_NAME_CR+31, + PPCREC_NAME_CPU_MEMRES_EA = 8000, + PPCREC_NAME_CPU_MEMRES_VAL = 8001 +}; + +#define PPC_REC_INVALID_REGISTER 0xFF // deprecated. Use IMLREG_INVALID instead + +enum +{ + // fpr load + PPCREC_FPR_LD_MODE_SINGLE, + PPCREC_FPR_LD_MODE_DOUBLE, + + // fpr store + PPCREC_FPR_ST_MODE_SINGLE, + PPCREC_FPR_ST_MODE_DOUBLE, + + PPCREC_FPR_ST_MODE_UI32_FROM_PS0, // store raw low-32bit of PS0 +}; + +struct IMLUsedRegisters +{ + IMLUsedRegisters() {}; + + bool IsWrittenByRegId(IMLRegID regId) const + { + if (writtenGPR1.IsValid() && writtenGPR1.GetRegID() == regId) + return true; + if (writtenGPR2.IsValid() && writtenGPR2.GetRegID() == regId) + return true; + return false; + } + + bool IsBaseGPRWritten(IMLReg imlReg) const + { + cemu_assert_debug(imlReg.IsValid()); + auto regId = imlReg.GetRegID(); + return IsWrittenByRegId(regId); + } + + template + void ForEachWrittenGPR(Fn F) const + { + if (writtenGPR1.IsValid()) + F(writtenGPR1); + if (writtenGPR2.IsValid()) + F(writtenGPR2); + } + + template + void ForEachReadGPR(Fn F) const + { + if (readGPR1.IsValid()) + F(readGPR1); + if (readGPR2.IsValid()) + F(readGPR2); + if (readGPR3.IsValid()) + F(readGPR3); + if (readGPR4.IsValid()) + F(readGPR4); + } + + template + void ForEachAccessedGPR(Fn F) const + { + // GPRs + if (readGPR1.IsValid()) + F(readGPR1, false); + if (readGPR2.IsValid()) + F(readGPR2, false); + if (readGPR3.IsValid()) + F(readGPR3, false); + if (readGPR4.IsValid()) + F(readGPR4, false); + if (writtenGPR1.IsValid()) + F(writtenGPR1, true); + if (writtenGPR2.IsValid()) + F(writtenGPR2, true); + } + + IMLReg readGPR1; + IMLReg readGPR2; + IMLReg readGPR3; + IMLReg readGPR4; + IMLReg writtenGPR1; + IMLReg writtenGPR2; +}; + +struct IMLInstruction +{ + IMLInstruction() {} + IMLInstruction(const IMLInstruction& other) + { + memcpy(this, &other, sizeof(IMLInstruction)); + } + + uint8 type; + uint8 operation; + union + { + struct + { + uint8 _padding[7]; + }padding; + struct + { + IMLReg regR; + IMLReg regA; + }op_r_r; + struct + { + IMLReg regR; + IMLReg regA; + IMLReg regB; + }op_r_r_r; + struct + { + IMLReg regR; + IMLReg regA; + IMLReg regB; + IMLReg regCarry; + }op_r_r_r_carry; + struct + { + IMLReg regR; + IMLReg regA; + sint32 immS32; + }op_r_r_s32; + struct + { + IMLReg regR; + IMLReg regA; + IMLReg regCarry; + sint32 immS32; + }op_r_r_s32_carry; + struct + { + IMLReg regR; + IMLName name; + }op_r_name; // alias op_name_r + struct + { + IMLReg regR; + sint32 immS32; + }op_r_immS32; + struct + { + uint32 param; + uint32 param2; + uint16 paramU16; + IMLReg paramReg; + }op_macro; + struct + { + IMLReg registerData; + IMLReg registerMem; + IMLReg registerMem2; + uint8 copyWidth; + struct + { + bool swapEndian : 1; + bool signExtend : 1; + bool notExpanded : 1; // for floats + }flags2; + uint8 mode; // transfer mode + sint32 immS32; + }op_storeLoad; + struct + { + uintptr_t callAddress; + IMLReg regParam0; + IMLReg regParam1; + IMLReg regParam2; + IMLReg regReturn; + }op_call_imm; + struct + { + IMLReg regR; + IMLReg regA; + }op_fpr_r_r; + struct + { + IMLReg regR; + IMLReg regA; + IMLReg regB; + }op_fpr_r_r_r; + struct + { + IMLReg regR; + IMLReg regA; + IMLReg regB; + IMLReg regC; + }op_fpr_r_r_r_r; + struct + { + IMLReg regR; + }op_fpr_r; + struct + { + IMLReg regR; // stores the boolean result of the comparison + IMLReg regA; + IMLReg regB; + IMLCondition cond; + }op_fpr_compare; + struct + { + IMLReg regR; // stores the boolean result of the comparison + IMLReg regA; + IMLReg regB; + IMLCondition cond; + }op_compare; + struct + { + IMLReg regR; // stores the boolean result of the comparison + IMLReg regA; + sint32 immS32; + IMLCondition cond; + }op_compare_s32; + struct + { + IMLReg registerBool; + bool mustBeTrue; + }op_conditional_jump; + struct + { + IMLReg regEA; + IMLReg regCompareValue; + IMLReg regWriteValue; + IMLReg regBoolOut; + }op_atomic_compare_store; + // conditional operations (emitted if supported by target platform) + struct + { + // r_s32 + IMLReg regR; + sint32 immS32; + // condition + uint8 crRegisterIndex; + uint8 crBitIndex; + bool bitMustBeSet; + }op_conditional_r_s32; + // X86 specific + struct + { + IMLCondition cond; + bool invertedCondition; + }op_x86_eflags_jcc; + }; + + bool IsSuffixInstruction() const + { + if (type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_BL || + type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_B_FAR || + type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_B_TO_REG || + type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_LEAVE || + type == PPCREC_IML_TYPE_MACRO && operation == PPCREC_IML_MACRO_HLE || + type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK || + type == PPCREC_IML_TYPE_JUMP || + type == PPCREC_IML_TYPE_CONDITIONAL_JUMP || + type == PPCREC_IML_TYPE_X86_EFLAGS_JCC) + return true; + return false; + } + + // instruction setters + void make_no_op() + { + type = PPCREC_IML_TYPE_NO_OP; + operation = 0; + } + + void make_r_name(IMLReg regR, IMLName name) + { + cemu_assert_debug(regR.GetBaseFormat() == regR.GetRegFormat()); // for name load/store instructions the register must match the base format + type = PPCREC_IML_TYPE_R_NAME; + operation = PPCREC_IML_OP_ASSIGN; + op_r_name.regR = regR; + op_r_name.name = name; + } + + void make_name_r(IMLName name, IMLReg regR) + { + cemu_assert_debug(regR.GetBaseFormat() == regR.GetRegFormat()); // for name load/store instructions the register must match the base format + type = PPCREC_IML_TYPE_NAME_R; + operation = PPCREC_IML_OP_ASSIGN; + op_r_name.regR = regR; + op_r_name.name = name; + } + + void make_debugbreak(uint32 currentPPCAddress = 0) + { + make_macro(PPCREC_IML_MACRO_DEBUGBREAK, 0, currentPPCAddress, 0, IMLREG_INVALID); + } + + void make_macro(uint32 macroId, uint32 param, uint32 param2, uint16 paramU16, IMLReg regParam) + { + this->type = PPCREC_IML_TYPE_MACRO; + this->operation = macroId; + this->op_macro.param = param; + this->op_macro.param2 = param2; + this->op_macro.paramU16 = paramU16; + this->op_macro.paramReg = regParam; + } + + void make_cjump_cycle_check() + { + this->type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; + this->operation = 0; + } + + void make_r_r(uint32 operation, IMLReg regR, IMLReg regA) + { + this->type = PPCREC_IML_TYPE_R_R; + this->operation = operation; + this->op_r_r.regR = regR; + this->op_r_r.regA = regA; + } + + void make_r_s32(uint32 operation, IMLReg regR, sint32 immS32) + { + this->type = PPCREC_IML_TYPE_R_S32; + this->operation = operation; + this->op_r_immS32.regR = regR; + this->op_r_immS32.immS32 = immS32; + } + + void make_r_r_r(uint32 operation, IMLReg regR, IMLReg regA, IMLReg regB) + { + this->type = PPCREC_IML_TYPE_R_R_R; + this->operation = operation; + this->op_r_r_r.regR = regR; + this->op_r_r_r.regA = regA; + this->op_r_r_r.regB = regB; + } + + void make_r_r_r_carry(uint32 operation, IMLReg regR, IMLReg regA, IMLReg regB, IMLReg regCarry) + { + this->type = PPCREC_IML_TYPE_R_R_R_CARRY; + this->operation = operation; + this->op_r_r_r_carry.regR = regR; + this->op_r_r_r_carry.regA = regA; + this->op_r_r_r_carry.regB = regB; + this->op_r_r_r_carry.regCarry = regCarry; + } + + void make_r_r_s32(uint32 operation, IMLReg regR, IMLReg regA, sint32 immS32) + { + this->type = PPCREC_IML_TYPE_R_R_S32; + this->operation = operation; + this->op_r_r_s32.regR = regR; + this->op_r_r_s32.regA = regA; + this->op_r_r_s32.immS32 = immS32; + } + + void make_r_r_s32_carry(uint32 operation, IMLReg regR, IMLReg regA, sint32 immS32, IMLReg regCarry) + { + this->type = PPCREC_IML_TYPE_R_R_S32_CARRY; + this->operation = operation; + this->op_r_r_s32_carry.regR = regR; + this->op_r_r_s32_carry.regA = regA; + this->op_r_r_s32_carry.immS32 = immS32; + this->op_r_r_s32_carry.regCarry = regCarry; + } + + void make_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond) + { + this->type = PPCREC_IML_TYPE_COMPARE; + this->operation = PPCREC_IML_OP_INVALID; + this->op_compare.regR = regR; + this->op_compare.regA = regA; + this->op_compare.regB = regB; + this->op_compare.cond = cond; + } + + void make_compare_s32(IMLReg regA, sint32 immS32, IMLReg regR, IMLCondition cond) + { + this->type = PPCREC_IML_TYPE_COMPARE_S32; + this->operation = PPCREC_IML_OP_INVALID; + this->op_compare_s32.regR = regR; + this->op_compare_s32.regA = regA; + this->op_compare_s32.immS32 = immS32; + this->op_compare_s32.cond = cond; + } + + void make_conditional_jump(IMLReg regBool, bool mustBeTrue) + { + this->type = PPCREC_IML_TYPE_CONDITIONAL_JUMP; + this->operation = PPCREC_IML_OP_INVALID; + this->op_conditional_jump.registerBool = regBool; + this->op_conditional_jump.mustBeTrue = mustBeTrue; + } + + void make_jump() + { + this->type = PPCREC_IML_TYPE_JUMP; + this->operation = PPCREC_IML_OP_INVALID; + } + + // load from memory + void make_r_memory(IMLReg regD, IMLReg regMem, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) + { + this->type = PPCREC_IML_TYPE_LOAD; + this->operation = 0; + this->op_storeLoad.registerData = regD; + this->op_storeLoad.registerMem = regMem; + this->op_storeLoad.immS32 = immS32; + this->op_storeLoad.copyWidth = copyWidth; + this->op_storeLoad.flags2.swapEndian = switchEndian; + this->op_storeLoad.flags2.signExtend = signExtend; + } + + // store to memory + void make_memory_r(IMLReg regS, IMLReg regMem, sint32 immS32, uint32 copyWidth, bool switchEndian) + { + this->type = PPCREC_IML_TYPE_STORE; + this->operation = 0; + this->op_storeLoad.registerData = regS; + this->op_storeLoad.registerMem = regMem; + this->op_storeLoad.immS32 = immS32; + this->op_storeLoad.copyWidth = copyWidth; + this->op_storeLoad.flags2.swapEndian = switchEndian; + this->op_storeLoad.flags2.signExtend = false; + } + + void make_atomic_cmp_store(IMLReg regEA, IMLReg regCompareValue, IMLReg regWriteValue, IMLReg regSuccessOutput) + { + this->type = PPCREC_IML_TYPE_ATOMIC_CMP_STORE; + this->operation = 0; + this->op_atomic_compare_store.regEA = regEA; + this->op_atomic_compare_store.regCompareValue = regCompareValue; + this->op_atomic_compare_store.regWriteValue = regWriteValue; + this->op_atomic_compare_store.regBoolOut = regSuccessOutput; + } + + void make_call_imm(uintptr_t callAddress, IMLReg param0, IMLReg param1, IMLReg param2, IMLReg regReturn) + { + this->type = PPCREC_IML_TYPE_CALL_IMM; + this->operation = 0; + this->op_call_imm.callAddress = callAddress; + this->op_call_imm.regParam0 = param0; + this->op_call_imm.regParam1 = param1; + this->op_call_imm.regParam2 = param2; + this->op_call_imm.regReturn = regReturn; + } + + // FPR + + // load from memory + void make_fpr_r_memory(IMLReg registerDestination, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian) + { + this->type = PPCREC_IML_TYPE_FPR_LOAD; + this->operation = 0; + this->op_storeLoad.registerData = registerDestination; + this->op_storeLoad.registerMem = registerMemory; + this->op_storeLoad.immS32 = immS32; + this->op_storeLoad.mode = mode; + this->op_storeLoad.flags2.swapEndian = switchEndian; + } + + void make_fpr_r_memory_indexed(IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 mode, bool switchEndian) + { + this->type = PPCREC_IML_TYPE_FPR_LOAD_INDEXED; + this->operation = 0; + this->op_storeLoad.registerData = registerDestination; + this->op_storeLoad.registerMem = registerMemory1; + this->op_storeLoad.registerMem2 = registerMemory2; + this->op_storeLoad.immS32 = 0; + this->op_storeLoad.mode = mode; + this->op_storeLoad.flags2.swapEndian = switchEndian; + } + + // store to memory + void make_fpr_memory_r(IMLReg registerSource, IMLReg registerMemory, sint32 immS32, uint32 mode, bool switchEndian) + { + this->type = PPCREC_IML_TYPE_FPR_STORE; + this->operation = 0; + this->op_storeLoad.registerData = registerSource; + this->op_storeLoad.registerMem = registerMemory; + this->op_storeLoad.immS32 = immS32; + this->op_storeLoad.mode = mode; + this->op_storeLoad.flags2.swapEndian = switchEndian; + } + + void make_fpr_memory_r_indexed(IMLReg registerSource, IMLReg registerMemory1, IMLReg registerMemory2, sint32 immS32, uint32 mode, bool switchEndian) + { + this->type = PPCREC_IML_TYPE_FPR_STORE_INDEXED; + this->operation = 0; + this->op_storeLoad.registerData = registerSource; + this->op_storeLoad.registerMem = registerMemory1; + this->op_storeLoad.registerMem2 = registerMemory2; + this->op_storeLoad.immS32 = immS32; + this->op_storeLoad.mode = mode; + this->op_storeLoad.flags2.swapEndian = switchEndian; + } + + void make_fpr_compare(IMLReg regA, IMLReg regB, IMLReg regR, IMLCondition cond) + { + this->type = PPCREC_IML_TYPE_FPR_COMPARE; + this->operation = -999; + this->op_fpr_compare.regR = regR; + this->op_fpr_compare.regA = regA; + this->op_fpr_compare.regB = regB; + this->op_fpr_compare.cond = cond; + } + + void make_fpr_r(sint32 operation, IMLReg registerResult) + { + // OP (fpr) + this->type = PPCREC_IML_TYPE_FPR_R; + this->operation = operation; + this->op_fpr_r.regR = registerResult; + } + + void make_fpr_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperand, sint32 crRegister=PPC_REC_INVALID_REGISTER) + { + // fpr OP fpr + this->type = PPCREC_IML_TYPE_FPR_R_R; + this->operation = operation; + this->op_fpr_r_r.regR = registerResult; + this->op_fpr_r_r.regA = registerOperand; + } + + void make_fpr_r_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperand1, IMLReg registerOperand2, sint32 crRegister=PPC_REC_INVALID_REGISTER) + { + // fpr = OP (fpr,fpr) + this->type = PPCREC_IML_TYPE_FPR_R_R_R; + this->operation = operation; + this->op_fpr_r_r_r.regR = registerResult; + this->op_fpr_r_r_r.regA = registerOperand1; + this->op_fpr_r_r_r.regB = registerOperand2; + } + + void make_fpr_r_r_r_r(sint32 operation, IMLReg registerResult, IMLReg registerOperandA, IMLReg registerOperandB, IMLReg registerOperandC, sint32 crRegister=PPC_REC_INVALID_REGISTER) + { + // fpr = OP (fpr,fpr,fpr) + this->type = PPCREC_IML_TYPE_FPR_R_R_R_R; + this->operation = operation; + this->op_fpr_r_r_r_r.regR = registerResult; + this->op_fpr_r_r_r_r.regA = registerOperandA; + this->op_fpr_r_r_r_r.regB = registerOperandB; + this->op_fpr_r_r_r_r.regC = registerOperandC; + } + + /* X86 specific */ + void make_x86_eflags_jcc(IMLCondition cond, bool invertedCondition) + { + this->type = PPCREC_IML_TYPE_X86_EFLAGS_JCC; + this->operation = -999; + this->op_x86_eflags_jcc.cond = cond; + this->op_x86_eflags_jcc.invertedCondition = invertedCondition; + } + + void CheckRegisterUsage(IMLUsedRegisters* registersUsed) const; + bool HasSideEffects() const; // returns true if the instruction has side effects beyond just reading and writing registers. Dead code elimination uses this to know if an instruction can be dropped when the regular register outputs are not used + + void RewriteGPR(const std::unordered_map& translationTable); +}; + +// architecture specific constants +namespace IMLArchX86 +{ + static constexpr int PHYSREG_GPR_BASE = 0; + static constexpr int PHYSREG_FPR_BASE = 16; +}; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp new file mode 100644 index 00000000..7671a163 --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLOptimizer.cpp @@ -0,0 +1,719 @@ +#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h" +#include "Cafe/HW/Espresso/Recompiler/IML/IML.h" +#include "Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h" + +#include "../PPCRecompiler.h" +#include "../PPCRecompilerIml.h" +#include "../BackendX64/BackendX64.h" + +#include "Common/FileStream.h" + +#include +#include + +IMLReg _FPRRegFromID(IMLRegID regId) +{ + return IMLReg(IMLRegFormat::F64, IMLRegFormat::F64, 0, regId); +} + +void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg fprReg) +{ + IMLRegID fprIndex = fprReg.GetRegID(); + + IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad; + if (imlInstructionLoad->op_storeLoad.flags2.notExpanded) + return; + boost::container::static_vector trackedMoves; // only track up to 4 copies + IMLUsedRegisters registersUsed; + sint32 scanRangeEnd = std::min(imlIndexLoad + 25, imlSegment->imlList.size()); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances) + bool foundMatch = false; + sint32 lastStore = -1; + for (sint32 i = imlIndexLoad + 1; i < scanRangeEnd; i++) + { + IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; + if (imlInstruction->IsSuffixInstruction()) + break; + // check if FPR is stored + if ((imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE) || + (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE)) + { + if (imlInstruction->op_storeLoad.registerData.GetRegID() == fprIndex) + { + if (foundMatch == false) + { + // flag the load-single instruction as "don't expand" (leave single value as-is) + imlInstructionLoad->op_storeLoad.flags2.notExpanded = true; + } + // also set the flag for the store instruction + IMLInstruction* imlInstructionStore = imlInstruction; + imlInstructionStore->op_storeLoad.flags2.notExpanded = true; + + foundMatch = true; + lastStore = i + 1; + + continue; + } + } + // if the FPR is copied then keep track of it. We can expand the copies instead of the original + if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R && imlInstruction->operation == PPCREC_IML_OP_FPR_ASSIGN && imlInstruction->op_fpr_r_r.regA.GetRegID() == fprIndex) + { + if (imlInstruction->op_fpr_r_r.regR.GetRegID() == fprIndex) + { + // unexpected no-op + break; + } + if (trackedMoves.size() >= trackedMoves.capacity()) + { + // we cant track any more moves, expand here + lastStore = i; + break; + } + trackedMoves.push_back(i); + continue; + } + // check if FPR is overwritten + imlInstruction->CheckRegisterUsage(®istersUsed); + if (registersUsed.writtenGPR1.IsValidAndSameRegID(fprIndex) || registersUsed.writtenGPR2.IsValidAndSameRegID(fprIndex)) + break; + if (registersUsed.readGPR1.IsValidAndSameRegID(fprIndex)) + break; + if (registersUsed.readGPR2.IsValidAndSameRegID(fprIndex)) + break; + if (registersUsed.readGPR3.IsValidAndSameRegID(fprIndex)) + break; + if (registersUsed.readGPR4.IsValidAndSameRegID(fprIndex)) + break; + } + + if (foundMatch) + { + // insert expand instructions for each target register of a move + sint32 positionBias = 0; + for (auto& trackedMove : trackedMoves) + { + sint32 realPosition = trackedMove + positionBias; + IMLInstruction* imlMoveInstruction = imlSegment->imlList.data() + realPosition; + if (realPosition >= lastStore) + break; // expand is inserted before this move + else + lastStore++; + + cemu_assert_debug(imlMoveInstruction->type == PPCREC_IML_TYPE_FPR_R_R && imlMoveInstruction->op_fpr_r_r.regA.GetRegID() == fprIndex); + cemu_assert_debug(imlMoveInstruction->op_fpr_r_r.regA.GetRegFormat() == IMLRegFormat::F64); + auto dstReg = imlMoveInstruction->op_fpr_r_r.regR; + IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, realPosition+1); // one after the move + newExpand->make_fpr_r(PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, dstReg); + positionBias++; + } + // insert expand instruction after store + IMLInstruction* newExpand = PPCRecompiler_insertInstruction(imlSegment, lastStore); + newExpand->make_fpr_r(PPCREC_IML_OP_FPR_EXPAND_F32_TO_F64, _FPRRegFromID(fprIndex)); + } +} + +/* +* Scans for patterns: +* +* +* +* For these patterns the store and load is modified to work with un-extended values (float remains as float, no double conversion) +* The float->double extension is then executed later +* Advantages: +* Keeps denormals and other special float values intact +* Slightly improves performance +*/ +void IMLOptimizer_OptimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext) +{ + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + { + for (sint32 i = 0; i < segIt->imlList.size(); i++) + { + IMLInstruction* imlInstruction = segIt->imlList.data() + i; + if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE) + { + PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); + } + else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE) + { + PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); + } + } + } +} + +void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, sint32 imlIndexLoad, IMLReg gprReg) +{ + cemu_assert_debug(gprReg.GetBaseFormat() == IMLRegFormat::I64); // todo - proper handling required for non-standard sizes + cemu_assert_debug(gprReg.GetRegFormat() == IMLRegFormat::I32); + + IMLRegID gprIndex = gprReg.GetRegID(); + IMLInstruction* imlInstructionLoad = imlSegment->imlList.data() + imlIndexLoad; + if ( imlInstructionLoad->op_storeLoad.flags2.swapEndian == false ) + return; + bool foundMatch = false; + IMLUsedRegisters registersUsed; + sint32 scanRangeEnd = std::min(imlIndexLoad + 25, imlSegment->imlList.size()); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances) + sint32 i = imlIndexLoad + 1; + for (; i < scanRangeEnd; i++) + { + IMLInstruction* imlInstruction = imlSegment->imlList.data() + i; + if (imlInstruction->IsSuffixInstruction()) + break; + // check if GPR is stored + if ((imlInstruction->type == PPCREC_IML_TYPE_STORE && imlInstruction->op_storeLoad.copyWidth == 32 ) ) + { + if (imlInstruction->op_storeLoad.registerMem.GetRegID() == gprIndex) + break; + if (imlInstruction->op_storeLoad.registerData.GetRegID() == gprIndex) + { + IMLInstruction* imlInstructionStore = imlInstruction; + if (foundMatch == false) + { + // switch the endian swap flag for the load instruction + imlInstructionLoad->op_storeLoad.flags2.swapEndian = !imlInstructionLoad->op_storeLoad.flags2.swapEndian; + foundMatch = true; + } + // switch the endian swap flag for the store instruction + imlInstructionStore->op_storeLoad.flags2.swapEndian = !imlInstructionStore->op_storeLoad.flags2.swapEndian; + // keep scanning + continue; + } + } + // check if GPR is accessed + imlInstruction->CheckRegisterUsage(®istersUsed); + if (registersUsed.readGPR1.IsValidAndSameRegID(gprIndex) || + registersUsed.readGPR2.IsValidAndSameRegID(gprIndex) || + registersUsed.readGPR3.IsValidAndSameRegID(gprIndex)) + { + break; + } + if (registersUsed.IsBaseGPRWritten(gprReg)) + return; // GPR overwritten, we don't need to byte swap anymore + } + if (foundMatch) + { + PPCRecompiler_insertInstruction(imlSegment, i)->make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, gprReg, gprReg); + } +} + +/* +* Scans for patterns: +* +* +* +* For these patterns the store and load is modified to work with non-swapped values +* The big_endian->little_endian conversion is then executed later +* Advantages: +* Slightly improves performance +*/ +void IMLOptimizer_OptimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext) +{ + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + { + for (sint32 i = 0; i < segIt->imlList.size(); i++) + { + IMLInstruction* imlInstruction = segIt->imlList.data() + i; + if (imlInstruction->type == PPCREC_IML_TYPE_LOAD && imlInstruction->op_storeLoad.copyWidth == 32 && imlInstruction->op_storeLoad.flags2.swapEndian ) + { + PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext, segIt, i, imlInstruction->op_storeLoad.registerData); + } + } + } +} + +IMLName PPCRecompilerImlGen_GetRegName(ppcImlGenContext_t* ppcImlGenContext, IMLReg reg); + +sint32 _getGQRIndexFromRegister(ppcImlGenContext_t* ppcImlGenContext, IMLReg gqrReg) +{ + if (gqrReg.IsInvalid()) + return -1; + sint32 namedReg = PPCRecompilerImlGen_GetRegName(ppcImlGenContext, gqrReg); + if (namedReg >= (PPCREC_NAME_SPR0 + SPR_UGQR0) && namedReg <= (PPCREC_NAME_SPR0 + SPR_UGQR7)) + { + return namedReg - (PPCREC_NAME_SPR0 + SPR_UGQR0); + } + else + { + cemu_assert_suspicious(); + } + return -1; +} + +bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, uint32& gqrValue) +{ + // the default configuration is: + // UGQR0 = 0x00000000 + // UGQR2 = 0x00040004 + // UGQR3 = 0x00050005 + // UGQR4 = 0x00060006 + // UGQR5 = 0x00070007 + // but games are free to modify UGQR2 to UGQR7 it seems. + // no game modifies UGQR0 so it's safe enough to optimize for the default value + // Ideally we would do some kind of runtime tracking and second recompilation to create fast paths for PSQ_L/PSQ_ST but thats todo + if (gqrIndex == 0) + gqrValue = 0x00000000; + else + return false; + return true; +} + +// analyses register dependencies across the entire function +// per segment this will generate information about which registers need to be preserved and which ones don't (e.g. are overwritten) +class IMLOptimizerRegIOAnalysis +{ + public: + // constructor with segment pointer list as span + IMLOptimizerRegIOAnalysis(std::span segmentList, uint32 maxRegId) : m_segmentList(segmentList), m_maxRegId(maxRegId) + { + m_segRegisterInOutList.resize(segmentList.size()); + } + + struct IMLSegmentRegisterInOut + { + // todo - since our register ID range is usually pretty small (<64) we could use integer bitmasks to accelerate this? There is a helper class used in RA code already + std::unordered_set regWritten; // registers which are modified in this segment + std::unordered_set regImported; // registers which are read in this segment before they are written (importing value from previous segments) + std::unordered_set regForward; // registers which are not read or written in this segment, but are imported into a later segment (propagated info) + }; + + // calculate which registers are imported (read-before-written) and forwarded (read-before-written by a later segment) per segment + // then in a second step propagate the dependencies across linked segments + void ComputeDepedencies() + { + std::vector& segRegisterInOutList = m_segRegisterInOutList; + IMLSegmentRegisterInOut* segIO = segRegisterInOutList.data(); + uint32 index = 0; + for(auto& seg : m_segmentList) + { + seg->momentaryIndex = index; + index++; + for(auto& instr : seg->imlList) + { + IMLUsedRegisters registerUsage; + instr.CheckRegisterUsage(®isterUsage); + // registers are considered imported if they are read before being written in this seg + registerUsage.ForEachReadGPR([&](IMLReg gprReg) { + IMLRegID gprId = gprReg.GetRegID(); + if (!segIO->regWritten.contains(gprId)) + { + segIO->regImported.insert(gprId); + } + }); + registerUsage.ForEachWrittenGPR([&](IMLReg gprReg) { + IMLRegID gprId = gprReg.GetRegID(); + segIO->regWritten.insert(gprId); + }); + } + segIO++; + } + // for every exit segment, import all registers + for(auto& seg : m_segmentList) + { + if (!seg->nextSegmentIsUncertain) + continue; + if(seg->deadCodeEliminationHintSeg) + continue; + IMLSegmentRegisterInOut& segIO = segRegisterInOutList[seg->momentaryIndex]; + for(uint32 i=0; i<=m_maxRegId; i++) + { + segIO.regImported.insert((IMLRegID)i); + } + } + // broadcast dependencies across segment chains + std::unordered_set segIdsWhichNeedUpdate; + for (uint32 i = 0; i < m_segmentList.size(); i++) + { + segIdsWhichNeedUpdate.insert(i); + } + while(!segIdsWhichNeedUpdate.empty()) + { + auto firstIt = segIdsWhichNeedUpdate.begin(); + uint32 segId = *firstIt; + segIdsWhichNeedUpdate.erase(firstIt); + // forward regImported and regForward to earlier segments into their regForward, unless the register is written + auto& curSeg = m_segmentList[segId]; + IMLSegmentRegisterInOut& curSegIO = segRegisterInOutList[segId]; + for(auto& prevSeg : curSeg->list_prevSegments) + { + IMLSegmentRegisterInOut& prevSegIO = segRegisterInOutList[prevSeg->momentaryIndex]; + bool prevSegChanged = false; + for(auto& regId : curSegIO.regImported) + { + if (!prevSegIO.regWritten.contains(regId)) + prevSegChanged |= prevSegIO.regForward.insert(regId).second; + } + for(auto& regId : curSegIO.regForward) + { + if (!prevSegIO.regWritten.contains(regId)) + prevSegChanged |= prevSegIO.regForward.insert(regId).second; + } + if(prevSegChanged) + segIdsWhichNeedUpdate.insert(prevSeg->momentaryIndex); + } + // same for hint links + for(auto& prevSeg : curSeg->list_deadCodeHintBy) + { + IMLSegmentRegisterInOut& prevSegIO = segRegisterInOutList[prevSeg->momentaryIndex]; + bool prevSegChanged = false; + for(auto& regId : curSegIO.regImported) + { + if (!prevSegIO.regWritten.contains(regId)) + prevSegChanged |= prevSegIO.regForward.insert(regId).second; + } + for(auto& regId : curSegIO.regForward) + { + if (!prevSegIO.regWritten.contains(regId)) + prevSegChanged |= prevSegIO.regForward.insert(regId).second; + } + if(prevSegChanged) + segIdsWhichNeedUpdate.insert(prevSeg->momentaryIndex); + } + } + } + + std::unordered_set GetRegistersNeededAtEndOfSegment(IMLSegment& seg) + { + std::unordered_set regsNeeded; + if(seg.nextSegmentIsUncertain) + { + if(seg.deadCodeEliminationHintSeg) + { + auto& nextSegIO = m_segRegisterInOutList[seg.deadCodeEliminationHintSeg->momentaryIndex]; + regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end()); + regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end()); + } + else + { + // add all regs + for(uint32 i = 0; i <= m_maxRegId; i++) + regsNeeded.insert(i); + } + return regsNeeded; + } + if(seg.nextSegmentBranchTaken) + { + auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchTaken->momentaryIndex]; + regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end()); + regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end()); + } + if(seg.nextSegmentBranchNotTaken) + { + auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchNotTaken->momentaryIndex]; + regsNeeded.insert(nextSegIO.regImported.begin(), nextSegIO.regImported.end()); + regsNeeded.insert(nextSegIO.regForward.begin(), nextSegIO.regForward.end()); + } + return regsNeeded; + } + + bool IsRegisterNeededAtEndOfSegment(IMLSegment& seg, IMLRegID regId) + { + if(seg.nextSegmentIsUncertain) + { + if(!seg.deadCodeEliminationHintSeg) + return true; + auto& nextSegIO = m_segRegisterInOutList[seg.deadCodeEliminationHintSeg->momentaryIndex]; + if(nextSegIO.regImported.contains(regId)) + return true; + if(nextSegIO.regForward.contains(regId)) + return true; + return false; + } + if(seg.nextSegmentBranchTaken) + { + auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchTaken->momentaryIndex]; + if(nextSegIO.regImported.contains(regId)) + return true; + if(nextSegIO.regForward.contains(regId)) + return true; + } + if(seg.nextSegmentBranchNotTaken) + { + auto& nextSegIO = m_segRegisterInOutList[seg.nextSegmentBranchNotTaken->momentaryIndex]; + if(nextSegIO.regImported.contains(regId)) + return true; + if(nextSegIO.regForward.contains(regId)) + return true; + } + return false; + } + + private: + std::span m_segmentList; + uint32 m_maxRegId; + + std::vector m_segRegisterInOutList; + +}; + +// scan backwards starting from index and return the index of the first found instruction which writes to the given register (by id) +sint32 IMLUtil_FindInstructionWhichWritesRegister(IMLSegment& seg, sint32 startIndex, IMLReg reg, sint32 maxScanDistance = -1) +{ + sint32 endIndex = std::max(startIndex - maxScanDistance, 0); + for (sint32 i = startIndex; i >= endIndex; i--) + { + IMLInstruction& imlInstruction = seg.imlList[i]; + IMLUsedRegisters registersUsed; + imlInstruction.CheckRegisterUsage(®istersUsed); + if (registersUsed.IsBaseGPRWritten(reg)) + return i; + } + return -1; +} + +// returns true if the instruction can safely be moved while keeping ordering constraints and data dependencies intact +// initialIndex is inclusive, targetIndex is exclusive +bool IMLUtil_CanMoveInstructionTo(IMLSegment& seg, sint32 initialIndex, sint32 targetIndex) +{ + boost::container::static_vector regsWritten; + boost::container::static_vector regsRead; + // get list of read and written registers + IMLUsedRegisters registersUsed; + seg.imlList[initialIndex].CheckRegisterUsage(®istersUsed); + registersUsed.ForEachAccessedGPR([&](IMLReg reg, bool isWritten) { + if (isWritten) + regsWritten.push_back(reg.GetRegID()); + else + regsRead.push_back(reg.GetRegID()); + }); + // check all the instructions inbetween + if(initialIndex < targetIndex) + { + sint32 scanStartIndex = initialIndex+1; // +1 to skip the moving instruction itself + sint32 scanEndIndex = targetIndex; + for (sint32 i = scanStartIndex; i < scanEndIndex; i++) + { + IMLUsedRegisters registersUsed; + seg.imlList[i].CheckRegisterUsage(®istersUsed); + // in order to be able to move an instruction past another instruction, any of the read registers must not be modified (written) + // and any of it's written registers must not be read + bool canMove = true; + registersUsed.ForEachAccessedGPR([&](IMLReg reg, bool isWritten) { + IMLRegID regId = reg.GetRegID(); + if (!isWritten) + canMove = canMove && std::find(regsWritten.begin(), regsWritten.end(), regId) == regsWritten.end(); + else + canMove = canMove && std::find(regsRead.begin(), regsRead.end(), regId) == regsRead.end(); + }); + if(!canMove) + return false; + } + } + else + { + cemu_assert_unimplemented(); // backwards scan is todo + return false; + } + return true; +} + +sint32 IMLUtil_CountRegisterReadsInRange(IMLSegment& seg, sint32 scanStartIndex, sint32 scanEndIndex, IMLRegID regId) +{ + cemu_assert_debug(scanStartIndex <= scanEndIndex); + cemu_assert_debug(scanEndIndex < seg.imlList.size()); + sint32 count = 0; + for (sint32 i = scanStartIndex; i <= scanEndIndex; i++) + { + IMLUsedRegisters registersUsed; + seg.imlList[i].CheckRegisterUsage(®istersUsed); + registersUsed.ForEachReadGPR([&](IMLReg reg) { + if (reg.GetRegID() == regId) + count++; + }); + } + return count; +} + +// move instruction from one index to another +// instruction will be inserted before the instruction at targetIndex +// returns the new instruction index of the moved instruction +sint32 IMLUtil_MoveInstructionTo(IMLSegment& seg, sint32 initialIndex, sint32 targetIndex) +{ + cemu_assert_debug(initialIndex != targetIndex); + IMLInstruction temp = seg.imlList[initialIndex]; + if (initialIndex < targetIndex) + { + cemu_assert_debug(targetIndex > 0); + targetIndex--; + for(size_t i=initialIndex; i regsNeeded = regIoAnalysis.GetRegistersNeededAtEndOfSegment(seg); + + // start with suffix instruction + if(seg.HasSuffixInstruction()) + { + IMLInstruction& imlInstruction = seg.imlList[seg.GetSuffixInstructionIndex()]; + IMLUsedRegisters registersUsed; + imlInstruction.CheckRegisterUsage(®istersUsed); + registersUsed.ForEachWrittenGPR([&](IMLReg reg) { + regsNeeded.erase(reg.GetRegID()); + }); + registersUsed.ForEachReadGPR([&](IMLReg reg) { + regsNeeded.insert(reg.GetRegID()); + }); + } + // iterate instructions backwards + for (sint32 i = seg.imlList.size() - (seg.HasSuffixInstruction() ? 2:1); i >= 0; i--) + { + IMLInstruction& imlInstruction = seg.imlList[i]; + IMLUsedRegisters registersUsed; + imlInstruction.CheckRegisterUsage(®istersUsed); + // register read -> remove from overwritten list + // register written -> add to overwritten list + + // check if this instruction only writes registers which will never be read + bool onlyWritesRedundantRegisters = true; + registersUsed.ForEachWrittenGPR([&](IMLReg reg) { + if (regsNeeded.contains(reg.GetRegID())) + onlyWritesRedundantRegisters = false; + }); + // check if any of the written registers are read after this point + registersUsed.ForEachWrittenGPR([&](IMLReg reg) { + regsNeeded.erase(reg.GetRegID()); + }); + registersUsed.ForEachReadGPR([&](IMLReg reg) { + regsNeeded.insert(reg.GetRegID()); + }); + if(!imlInstruction.HasSideEffects() && onlyWritesRedundantRegisters) + { + imlInstruction.make_no_op(); + } + } +} + +void IMLOptimizerX86_SubstituteCJumpForEflagsJump(IMLOptimizerRegIOAnalysis& regIoAnalysis, IMLSegment& seg) +{ + // convert and optimize bool condition jumps to eflags condition jumps + // - Moves eflag setter (e.g. cmp) closer to eflags consumer (conditional jump) if necessary. If not possible but required then exit early + // - Since we only rely on eflags, the boolean register can be optimized out if DCE considers it unused + // - Further detect and optimize patterns like DEC + CMP + JCC into fused ops (todo) + + // check if this segment ends with a conditional jump + if(!seg.HasSuffixInstruction()) + return; + sint32 cjmpInstIndex = seg.GetSuffixInstructionIndex(); + if(cjmpInstIndex < 0) + return; + IMLInstruction& cjumpInstr = seg.imlList[cjmpInstIndex]; + if( cjumpInstr.type != PPCREC_IML_TYPE_CONDITIONAL_JUMP ) + return; + IMLReg regCondBool = cjumpInstr.op_conditional_jump.registerBool; + bool invertedCondition = !cjumpInstr.op_conditional_jump.mustBeTrue; + // find the instruction which sets the bool + sint32 cmpInstrIndex = IMLUtil_FindInstructionWhichWritesRegister(seg, cjmpInstIndex-1, regCondBool, 20); + if(cmpInstrIndex < 0) + return; + // check if its an instruction combo which can be optimized (currently only cmp + cjump) and get the condition + IMLInstruction& condSetterInstr = seg.imlList[cmpInstrIndex]; + IMLCondition cond; + if(condSetterInstr.type == PPCREC_IML_TYPE_COMPARE) + cond = condSetterInstr.op_compare.cond; + else if(condSetterInstr.type == PPCREC_IML_TYPE_COMPARE_S32) + cond = condSetterInstr.op_compare_s32.cond; + else + return; + // check if instructions inbetween modify eflags + sint32 indexEflagsSafeStart = -1; // index of the first instruction which does not modify eflags up to cjump + for(sint32 i = cjmpInstIndex-1; i > cmpInstrIndex; i--) + { + if(IMLOptimizerX86_ModifiesEFlags(seg.imlList[i])) + { + indexEflagsSafeStart = i+1; + break; + } + } + if(indexEflagsSafeStart >= 0) + { + cemu_assert(indexEflagsSafeStart > 0); + // there are eflags-modifying instructions inbetween the bool setter and cjump + // try to move the eflags setter close enough to the cjump (to indexEflagsSafeStart) + bool canMove = IMLUtil_CanMoveInstructionTo(seg, cmpInstrIndex, indexEflagsSafeStart); + if(!canMove) + { + return; + } + else + { + cmpInstrIndex = IMLUtil_MoveInstructionTo(seg, cmpInstrIndex, indexEflagsSafeStart); + } + } + // we can turn the jump into an eflags jump + cjumpInstr.make_x86_eflags_jcc(cond, invertedCondition); + + if (IMLUtil_CountRegisterReadsInRange(seg, cmpInstrIndex, cjmpInstIndex, regCondBool.GetRegID()) > 1 || regIoAnalysis.IsRegisterNeededAtEndOfSegment(seg, regCondBool.GetRegID())) + return; // bool register is used beyond the CMP, we can't drop it + + auto& cmpInstr = seg.imlList[cmpInstrIndex]; + cemu_assert_debug(cmpInstr.type == PPCREC_IML_TYPE_COMPARE || cmpInstr.type == PPCREC_IML_TYPE_COMPARE_S32); + if(cmpInstr.type == PPCREC_IML_TYPE_COMPARE) + { + IMLReg regA = cmpInstr.op_compare.regA; + IMLReg regB = cmpInstr.op_compare.regB; + seg.imlList[cmpInstrIndex].make_r_r(PPCREC_IML_OP_X86_CMP, regA, regB); + } + else + { + IMLReg regA = cmpInstr.op_compare_s32.regA; + sint32 val = cmpInstr.op_compare_s32.immS32; + seg.imlList[cmpInstrIndex].make_r_s32(PPCREC_IML_OP_X86_CMP, regA, val); + } + +} + +void IMLOptimizer_StandardOptimizationPassForSegment(IMLOptimizerRegIOAnalysis& regIoAnalysis, IMLSegment& seg) +{ + IMLOptimizer_RemoveDeadCodeFromSegment(regIoAnalysis, seg); + +#ifdef ARCH_X86_64 + // x86 specific optimizations + IMLOptimizerX86_SubstituteCJumpForEflagsJump(regIoAnalysis, seg); // this pass should be applied late since it creates invisible eflags dependencies (which would break further register dependency analysis) +#endif +} + +void IMLOptimizer_StandardOptimizationPass(ppcImlGenContext_t& ppcImlGenContext) +{ + IMLOptimizerRegIOAnalysis regIoAnalysis(ppcImlGenContext.segmentList2, ppcImlGenContext.GetMaxRegId()); + regIoAnalysis.ComputeDepedencies(); + for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + { + IMLOptimizer_StandardOptimizationPassForSegment(regIoAnalysis, *segIt); + } +} diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp new file mode 100644 index 00000000..935e61ac --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.cpp @@ -0,0 +1,2204 @@ +#include "IML.h" + +#include "../PPCRecompiler.h" +#include "../PPCRecompilerIml.h" +#include "IMLRegisterAllocator.h" +#include "IMLRegisterAllocatorRanges.h" + +#include "../BackendX64/BackendX64.h" +#ifdef __aarch64__ +#include "../BackendAArch64/BackendAArch64.h" +#endif + +#include +#include + +#include "Common/cpu_features.h" + +#define DEBUG_RA_EXTRA_VALIDATION 0 // if set to non-zero, additional expensive validation checks will be performed +#define DEBUG_RA_INSTRUCTION_GEN 0 + +struct IMLRARegAbstractLiveness // preliminary liveness info. One entry per register and segment +{ + IMLRARegAbstractLiveness(IMLRegFormat regBaseFormat, sint32 usageStart, sint32 usageEnd) + : regBaseFormat(regBaseFormat), usageStart(usageStart), usageEnd(usageEnd) {}; + + void TrackInstruction(sint32 index) + { + usageStart = std::min(usageStart, index); + usageEnd = std::max(usageEnd, index + 1); // exclusive index + } + + sint32 usageStart; + sint32 usageEnd; + bool isProcessed{false}; + IMLRegFormat regBaseFormat; +}; + +struct IMLRegisterAllocatorContext +{ + IMLRegisterAllocatorParameters* raParam; + ppcImlGenContext_t* deprGenContext; // deprecated. Try to decouple IMLRA from other parts of IML/PPCRec + + std::unordered_map regIdToBaseFormat; + // first pass + std::vector> perSegmentAbstractRanges; + + // helper methods + inline std::unordered_map& GetSegmentAbstractRangeMap(IMLSegment* imlSegment) + { + return perSegmentAbstractRanges[imlSegment->momentaryIndex]; + } + + inline IMLRegFormat GetBaseFormatByRegId(IMLRegID regId) const + { + auto it = regIdToBaseFormat.find(regId); + cemu_assert_debug(it != regIdToBaseFormat.cend()); + return it->second; + } +}; + +struct IMLFixedRegisters +{ + struct Entry + { + Entry(IMLReg reg, IMLPhysRegisterSet physRegSet) + : reg(reg), physRegSet(physRegSet) {} + + IMLReg reg; + IMLPhysRegisterSet physRegSet; + }; + boost::container::small_vector listInput; // fixed register requirements for instruction input edge + boost::container::small_vector listOutput; // fixed register requirements for instruction output edge +}; + +static void SetupCallingConvention(const IMLInstruction* instruction, IMLFixedRegisters& fixedRegs, const IMLPhysReg intParamToPhysReg[3], const IMLPhysReg floatParamToPhysReg[3], const IMLPhysReg intReturnPhysReg, const IMLPhysReg floatReturnPhysReg, IMLPhysRegisterSet volatileRegisters) +{ + sint32 numIntParams = 0, numFloatParams = 0; + + auto AddParameterMapping = [&](IMLReg reg) { + if (!reg.IsValid()) + return; + if (reg.GetBaseFormat() == IMLRegFormat::I64) + { + IMLPhysRegisterSet ps; + ps.SetAvailable(intParamToPhysReg[numIntParams]); + fixedRegs.listInput.emplace_back(reg, ps); + numIntParams++; + } + else if (reg.GetBaseFormat() == IMLRegFormat::F64) + { + IMLPhysRegisterSet ps; + ps.SetAvailable(floatParamToPhysReg[numFloatParams]); + fixedRegs.listInput.emplace_back(reg, ps); + numFloatParams++; + } + else + { + cemu_assert_suspicious(); + } + }; + AddParameterMapping(instruction->op_call_imm.regParam0); + AddParameterMapping(instruction->op_call_imm.regParam1); + AddParameterMapping(instruction->op_call_imm.regParam2); + // return value + if (instruction->op_call_imm.regReturn.IsValid()) + { + IMLRegFormat returnFormat = instruction->op_call_imm.regReturn.GetBaseFormat(); + bool isIntegerFormat = returnFormat == IMLRegFormat::I64 || returnFormat == IMLRegFormat::I32 || returnFormat == IMLRegFormat::I16 || returnFormat == IMLRegFormat::I8; + IMLPhysRegisterSet ps; + if (isIntegerFormat) + { + ps.SetAvailable(intReturnPhysReg); + volatileRegisters.SetReserved(intReturnPhysReg); + } + else + { + ps.SetAvailable(floatReturnPhysReg); + volatileRegisters.SetReserved(floatReturnPhysReg); + } + fixedRegs.listOutput.emplace_back(instruction->op_call_imm.regReturn, ps); + } + // block volatile registers from being used on the output edge, this makes the register allocator store them during the call + fixedRegs.listOutput.emplace_back(IMLREG_INVALID, volatileRegisters); +} + +#if defined(__aarch64__) +// aarch64 +static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs) +{ + fixedRegs.listInput.clear(); + fixedRegs.listOutput.clear(); + + // The purpose of GetInstructionFixedRegisters() is to constraint virtual registers to specific physical registers for instructions which need it + // on x86 this is used for instructions like SHL , CL where the CL register is hardwired. On aarch it's probably only necessary for setting up the calling convention + if (instruction->type == PPCREC_IML_TYPE_CALL_IMM) + { + const IMLPhysReg intParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_GPR_BASE + 1, IMLArchAArch64::PHYSREG_GPR_BASE + 2}; + const IMLPhysReg floatParamToPhysReg[3] = {IMLArchAArch64::PHYSREG_FPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 1, IMLArchAArch64::PHYSREG_FPR_BASE + 2}; + IMLPhysRegisterSet volatileRegs; + for (int i = 0; i <= 17; i++) // x0 to x17 are volatile + volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_GPR_BASE + i); + // v0-v7 & v16-v31 are volatile. For v8-v15 only the high 64 bits are volatile. + for (int i = 0; i <= 7; i++) + volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_FPR_BASE + i); + for (int i = 16; i <= 31; i++) + volatileRegs.SetAvailable(IMLArchAArch64::PHYSREG_FPR_BASE + i); + SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchAArch64::PHYSREG_GPR_BASE + 0, IMLArchAArch64::PHYSREG_FPR_BASE + 0, volatileRegs); + } +} +#else +// x86-64 +static void GetInstructionFixedRegisters(IMLInstruction* instruction, IMLFixedRegisters& fixedRegs) +{ + fixedRegs.listInput.clear(); + fixedRegs.listOutput.clear(); + + if (instruction->type == PPCREC_IML_TYPE_R_R_R) + { + if (instruction->operation == PPCREC_IML_OP_LEFT_SHIFT || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_S || instruction->operation == PPCREC_IML_OP_RIGHT_SHIFT_U) + { + if(!g_CPUFeatures.x86.bmi2) + { + IMLPhysRegisterSet ps; + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_ECX); + fixedRegs.listInput.emplace_back(instruction->op_r_r_r.regB, ps); + } + } + } + else if (instruction->type == PPCREC_IML_TYPE_ATOMIC_CMP_STORE) + { + IMLPhysRegisterSet ps; + ps.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX); + fixedRegs.listInput.emplace_back(IMLREG_INVALID, ps); // none of the inputs may use EAX + fixedRegs.listOutput.emplace_back(instruction->op_atomic_compare_store.regBoolOut, ps); // but we output to EAX + } + else if (instruction->type == PPCREC_IML_TYPE_CALL_IMM) + { + const IMLPhysReg intParamToPhysReg[3] = {IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8}; + const IMLPhysReg floatParamToPhysReg[3] = {IMLArchX86::PHYSREG_FPR_BASE + 0, IMLArchX86::PHYSREG_FPR_BASE + 1, IMLArchX86::PHYSREG_FPR_BASE + 2}; + IMLPhysRegisterSet volatileRegs; + volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX); + volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX); + volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX); + volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8); + volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R9); + volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R10); + volatileRegs.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R11); + // YMM0-YMM5 are volatile + for (int i = 0; i <= 5; i++) + volatileRegs.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + i); + // for YMM6-YMM15 only the upper 128 bits are volatile which we dont use + SetupCallingConvention(instruction, fixedRegs, intParamToPhysReg, floatParamToPhysReg, IMLArchX86::PHYSREG_GPR_BASE + X86_REG_EAX, IMLArchX86::PHYSREG_FPR_BASE + 0, volatileRegs); + } +} +#endif + +uint32 IMLRA_GetNextIterationIndex() +{ + static uint32 recRACurrentIterationIndex = 0; + recRACurrentIterationIndex++; + return recRACurrentIterationIndex; +} + +bool _detectLoop(IMLSegment* currentSegment, sint32 depth, uint32 iterationIndex, IMLSegment* imlSegmentLoopBase) +{ + if (currentSegment == imlSegmentLoopBase) + return true; + if (currentSegment->raInfo.lastIterationIndex == iterationIndex) + return currentSegment->raInfo.isPartOfProcessedLoop; + if (depth >= 9) + return false; + currentSegment->raInfo.lastIterationIndex = iterationIndex; + currentSegment->raInfo.isPartOfProcessedLoop = false; + + if (currentSegment->nextSegmentIsUncertain) + return false; + if (currentSegment->nextSegmentBranchNotTaken) + { + if (currentSegment->nextSegmentBranchNotTaken->momentaryIndex > currentSegment->momentaryIndex) + { + currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchNotTaken, depth + 1, iterationIndex, imlSegmentLoopBase); + } + } + if (currentSegment->nextSegmentBranchTaken) + { + if (currentSegment->nextSegmentBranchTaken->momentaryIndex > currentSegment->momentaryIndex) + { + currentSegment->raInfo.isPartOfProcessedLoop |= _detectLoop(currentSegment->nextSegmentBranchTaken, depth + 1, iterationIndex, imlSegmentLoopBase); + } + } + if (currentSegment->raInfo.isPartOfProcessedLoop) + currentSegment->loopDepth++; + return currentSegment->raInfo.isPartOfProcessedLoop; +} + +void IMLRA_DetectLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegmentLoopBase) +{ + uint32 iterationIndex = IMLRA_GetNextIterationIndex(); + imlSegmentLoopBase->raInfo.lastIterationIndex = iterationIndex; + if (_detectLoop(imlSegmentLoopBase->nextSegmentBranchTaken, 0, iterationIndex, imlSegmentLoopBase)) + { + imlSegmentLoopBase->loopDepth++; + } +} + +void IMLRA_IdentifyLoop(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +{ + if (imlSegment->nextSegmentIsUncertain) + return; + // check if this segment has a branch that links to itself (tight loop) + if (imlSegment->nextSegmentBranchTaken == imlSegment) + { + // segment loops over itself + imlSegment->loopDepth++; + return; + } + // check if this segment has a branch that goes backwards (potential complex loop) + if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->momentaryIndex < imlSegment->momentaryIndex) + { + IMLRA_DetectLoop(ppcImlGenContext, imlSegment); + } +} + +#define SUBRANGE_LIST_SIZE (128) + +sint32 IMLRA_CountDistanceUntilNextUse(raLivenessRange* subrange, raInstructionEdge startPosition) +{ + for (sint32 i = 0; i < subrange->list_accessLocations.size(); i++) + { + if (subrange->list_accessLocations[i].pos >= startPosition) + { + auto& it = subrange->list_accessLocations[i]; + cemu_assert_debug(it.IsRead() != it.IsWrite()); // an access location can be either read or write + cemu_assert_debug(!startPosition.ConnectsToPreviousSegment() && !startPosition.ConnectsToNextSegment()); + return it.pos.GetRaw() - startPosition.GetRaw(); + } + } + cemu_assert_debug(subrange->imlSegment->imlList.size() < 10000); + return 10001 * 2; +} + +// returns -1 if there is no fixed register requirement on or after startPosition +sint32 IMLRA_CountDistanceUntilFixedRegUsageInRange(IMLSegment* imlSegment, raLivenessRange* range, raInstructionEdge startPosition, sint32 physRegister, bool& hasFixedAccess) +{ + hasFixedAccess = false; + cemu_assert_debug(startPosition.IsInstructionIndex()); + for (auto& fixedReqEntry : range->list_fixedRegRequirements) + { + if (fixedReqEntry.pos < startPosition) + continue; + if (fixedReqEntry.allowedReg.IsAvailable(physRegister)) + { + hasFixedAccess = true; + return fixedReqEntry.pos.GetRaw() - startPosition.GetRaw(); + } + } + cemu_assert_debug(range->interval.end.IsInstructionIndex()); + return range->interval.end.GetRaw() - startPosition.GetRaw(); +} + +sint32 IMLRA_CountDistanceUntilFixedRegUsage(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 maxDistance, IMLRegID ourRegId, sint32 physRegister) +{ + cemu_assert_debug(startPosition.IsInstructionIndex()); + raInstructionEdge lastPos2; + lastPos2.Set(imlSegment->imlList.size(), false); + + raInstructionEdge endPos; + endPos = startPosition + maxDistance; + if (endPos > lastPos2) + endPos = lastPos2; + IMLFixedRegisters fixedRegs; + if (startPosition.IsOnOutputEdge()) + GetInstructionFixedRegisters(imlSegment->imlList.data() + startPosition.GetInstructionIndex(), fixedRegs); + for (raInstructionEdge currentPos = startPosition; currentPos <= endPos; ++currentPos) + { + if (currentPos.IsOnInputEdge()) + { + GetInstructionFixedRegisters(imlSegment->imlList.data() + currentPos.GetInstructionIndex(), fixedRegs); + } + auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput; + for (auto& fixedRegLoc : fixedRegAccess) + { + if (fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId) + { + cemu_assert_debug(fixedRegLoc.reg.IsInvalid() || fixedRegLoc.physRegSet.HasExactlyOneAvailable()); // this whole function only makes sense when there is only one fixed register, otherwise there are extra permutations to consider. Except for IMLREG_INVALID which is used to indicate reserved registers + if (fixedRegLoc.physRegSet.IsAvailable(physRegister)) + return currentPos.GetRaw() - startPosition.GetRaw(); + } + } + } + return endPos.GetRaw() - startPosition.GetRaw(); +} + +// count how many instructions there are until physRegister is used by any subrange or reserved for any fixed register requirement (returns 0 if register is in use at startIndex) +sint32 PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(IMLSegment* imlSegment, raInstructionEdge startPosition, sint32 physRegister) +{ + cemu_assert_debug(startPosition.IsInstructionIndex()); + sint32 minDistance = (sint32)imlSegment->imlList.size() * 2 - startPosition.GetRaw(); + // next + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + while (subrangeItr) + { + if (subrangeItr->GetPhysicalRegister() != physRegister) + { + subrangeItr = subrangeItr->link_allSegmentRanges.next; + continue; + } + if (subrangeItr->interval.ContainsEdge(startPosition)) + return 0; + if (subrangeItr->interval.end < startPosition) + { + subrangeItr = subrangeItr->link_allSegmentRanges.next; + continue; + } + cemu_assert_debug(startPosition <= subrangeItr->interval.start); + sint32 currentDist = subrangeItr->interval.start.GetRaw() - startPosition.GetRaw(); + minDistance = std::min(minDistance, currentDist); + subrangeItr = subrangeItr->link_allSegmentRanges.next; + } + return minDistance; +} + +struct IMLRALivenessTimeline +{ + IMLRALivenessTimeline() + { + } + + // manually add an active range + void AddActiveRange(raLivenessRange* subrange) + { + activeRanges.emplace_back(subrange); + } + + void ExpireRanges(raInstructionEdge expireUpTo) + { + expiredRanges.clear(); + size_t count = activeRanges.size(); + for (size_t f = 0; f < count; f++) + { + raLivenessRange* liverange = activeRanges[f]; + if (liverange->interval.end < expireUpTo) // this was <= but since end is not inclusive we need to use < + { +#ifdef CEMU_DEBUG_ASSERT + if (!expireUpTo.ConnectsToNextSegment() && (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken)) + assert_dbg(); // infinite subranges should not expire +#endif + expiredRanges.emplace_back(liverange); + // remove entry + activeRanges[f] = activeRanges[count - 1]; + f--; + count--; + } + } + if (count != activeRanges.size()) + activeRanges.resize(count); + } + + std::span GetExpiredRanges() + { + return {expiredRanges.data(), expiredRanges.size()}; + } + + std::span GetActiveRanges() + { + return {activeRanges.data(), activeRanges.size()}; + } + + raLivenessRange* GetActiveRangeByVirtualRegId(IMLRegID regId) + { + for (auto& it : activeRanges) + if (it->virtualRegister == regId) + return it; + return nullptr; + } + + raLivenessRange* GetActiveRangeByPhysicalReg(sint32 physReg) + { + cemu_assert_debug(physReg >= 0); + for (auto& it : activeRanges) + if (it->physicalRegister == physReg) + return it; + return nullptr; + } + + boost::container::small_vector activeRanges; + + private: + boost::container::small_vector expiredRanges; +}; + +// mark occupied registers by any overlapping range as unavailable in physRegSet +void PPCRecRA_MaskOverlappingPhysRegForGlobalRange(raLivenessRange* range2, IMLPhysRegisterSet& physRegSet) +{ + auto clusterRanges = range2->GetAllSubrangesInCluster(); + for (auto& subrange : clusterRanges) + { + IMLSegment* imlSegment = subrange->imlSegment; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + while (subrangeItr) + { + if (subrange == subrangeItr) + { + // next + subrangeItr = subrangeItr->link_allSegmentRanges.next; + continue; + } + if (subrange->interval.IsOverlapping(subrangeItr->interval)) + { + if (subrangeItr->GetPhysicalRegister() >= 0) + physRegSet.SetReserved(subrangeItr->GetPhysicalRegister()); + } + // next + subrangeItr = subrangeItr->link_allSegmentRanges.next; + } + } +} + +bool _livenessRangeStartCompare(raLivenessRange* lhs, raLivenessRange* rhs) +{ + return lhs->interval.start < rhs->interval.start; +} + +void _sortSegmentAllSubrangesLinkedList(IMLSegment* imlSegment) +{ + raLivenessRange* subrangeList[4096 + 1]; + sint32 count = 0; + // disassemble linked list + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + while (subrangeItr) + { + cemu_assert(count < 4096); + subrangeList[count] = subrangeItr; + count++; + // next + subrangeItr = subrangeItr->link_allSegmentRanges.next; + } + if (count == 0) + { + imlSegment->raInfo.linkedList_allSubranges = nullptr; + return; + } + // sort + std::sort(subrangeList, subrangeList + count, _livenessRangeStartCompare); + // reassemble linked list + subrangeList[count] = nullptr; + imlSegment->raInfo.linkedList_allSubranges = subrangeList[0]; + subrangeList[0]->link_allSegmentRanges.prev = nullptr; + subrangeList[0]->link_allSegmentRanges.next = subrangeList[1]; + for (sint32 i = 1; i < count; i++) + { + subrangeList[i]->link_allSegmentRanges.prev = subrangeList[i - 1]; + subrangeList[i]->link_allSegmentRanges.next = subrangeList[i + 1]; + } + // validate list +#if DEBUG_RA_EXTRA_VALIDATION + sint32 count2 = 0; + subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + raInstructionEdge currentStartPosition; + currentStartPosition.SetRaw(RA_INTER_RANGE_START); + while (subrangeItr) + { + count2++; + if (subrangeItr->interval2.start < currentStartPosition) + assert_dbg(); + currentStartPosition = subrangeItr->interval2.start; + // next + subrangeItr = subrangeItr->link_allSegmentRanges.next; + } + if (count != count2) + assert_dbg(); +#endif +} + +std::unordered_map& IMLRA_GetSubrangeMap(IMLSegment* imlSegment) +{ + return imlSegment->raInfo.linkedList_perVirtualRegister; +} + +raLivenessRange* IMLRA_GetSubrange(IMLSegment* imlSegment, IMLRegID regId) +{ + auto it = imlSegment->raInfo.linkedList_perVirtualRegister.find(regId); + if (it == imlSegment->raInfo.linkedList_perVirtualRegister.end()) + return nullptr; + return it->second; +} + +struct raFixedRegRequirementWithVGPR +{ + raFixedRegRequirementWithVGPR(raInstructionEdge pos, IMLPhysRegisterSet allowedReg, IMLRegID regId) + : pos(pos), allowedReg(allowedReg), regId(regId) {} + + raInstructionEdge pos; + IMLPhysRegisterSet allowedReg; + IMLRegID regId; +}; + +std::vector IMLRA_BuildSegmentInstructionFixedRegList(IMLSegment* imlSegment) +{ + std::vector frrList; + size_t index = 0; + while (index < imlSegment->imlList.size()) + { + IMLFixedRegisters fixedRegs; + GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs); + raInstructionEdge pos; + pos.Set(index, true); + for (auto& fixedRegAccess : fixedRegs.listInput) + { + frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid() ? fixedRegAccess.reg.GetRegID() : IMLRegID_INVALID); + } + pos = pos + 1; + for (auto& fixedRegAccess : fixedRegs.listOutput) + { + frrList.emplace_back(pos, fixedRegAccess.physRegSet, fixedRegAccess.reg.IsValid() ? fixedRegAccess.reg.GetRegID() : IMLRegID_INVALID); + } + index++; + } + return frrList; +} + +boost::container::small_vector IMLRA_GetRangeWithFixedRegReservationOverlappingPos(IMLSegment* imlSegment, raInstructionEdge pos, IMLPhysReg physReg) +{ + boost::container::small_vector rangeList; + for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) + { + if (!currentRange->interval.ContainsEdge(pos)) + continue; + IMLPhysRegisterSet allowedRegs; + if (!currentRange->GetAllowedRegistersEx(allowedRegs)) + continue; + if (allowedRegs.IsAvailable(physReg)) + rangeList.emplace_back(currentRange); + } + return rangeList; +} + +void IMLRA_HandleFixedRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +{ + // first pass - iterate over all ranges with fixed register requirements and split them if they cross the segment border + // todo - this pass currently creates suboptimal results by splitting all ranges that cross the segment border if they have any fixed register requirement. This can be avoided in some cases + for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange;) + { + IMLPhysRegisterSet allowedRegs; + if(currentRange->list_fixedRegRequirements.empty()) + { + currentRange = currentRange->link_allSegmentRanges.next; + continue; // since we run this pass for every segment we dont need to do global checks here for clusters which may not even have fixed register requirements + } + if (!currentRange->GetAllowedRegistersEx(allowedRegs)) + { + currentRange = currentRange->link_allSegmentRanges.next; + continue; + } + if (currentRange->interval.ExtendsPreviousSegment() || currentRange->interval.ExtendsIntoNextSegment()) + { + raLivenessRange* nextRange = currentRange->link_allSegmentRanges.next; + IMLRA_ExplodeRangeCluster(ppcImlGenContext, currentRange); + currentRange = nextRange; + continue; + } + currentRange = currentRange->link_allSegmentRanges.next; + } + // second pass - look for ranges with conflicting fixed register requirements and split these too (locally) + for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) + { + IMLPhysRegisterSet allowedRegs; + if (currentRange->list_fixedRegRequirements.empty()) + continue; // we dont need to check whole clusters because the pass above guarantees that there are no ranges with fixed register requirements that extend outside of this segment + if (!currentRange->GetAllowedRegistersEx(allowedRegs)) + continue; + if (allowedRegs.HasAnyAvailable()) + continue; + cemu_assert_unimplemented(); + } + // third pass - assign fixed registers, split ranges if needed + std::vector frr = IMLRA_BuildSegmentInstructionFixedRegList(imlSegment); + std::unordered_map lastVGPR; + for (size_t i = 0; i < frr.size(); i++) + { + raFixedRegRequirementWithVGPR& entry = frr[i]; + // we currently only handle fixed register requirements with a single register + // with one exception: When regId is IMLRegID_INVALID then the entry acts as a list of reserved registers + cemu_assert_debug(entry.regId == IMLRegID_INVALID || entry.allowedReg.HasExactlyOneAvailable()); + for (IMLPhysReg physReg = entry.allowedReg.GetFirstAvailableReg(); physReg >= 0; physReg = entry.allowedReg.GetNextAvailableReg(physReg + 1)) + { + // check if the assigned vGPR has changed + bool vgprHasChanged = false; + auto it = lastVGPR.find(physReg); + if (it != lastVGPR.end()) + vgprHasChanged = it->second != entry.regId; + else + vgprHasChanged = true; + lastVGPR[physReg] = entry.regId; + + if (!vgprHasChanged) + continue; + + boost::container::small_vector overlappingRanges = IMLRA_GetRangeWithFixedRegReservationOverlappingPos(imlSegment, entry.pos, physReg); + if (entry.regId != IMLRegID_INVALID) + cemu_assert_debug(!overlappingRanges.empty()); // there should always be at least one range that overlaps corresponding to the fixed register requirement, except for IMLRegID_INVALID which is used to indicate reserved registers + + for (auto& range : overlappingRanges) + { + if (range->interval.start < entry.pos) + { + IMLRA_SplitRange(ppcImlGenContext, range, entry.pos, true); + } + } + } + } + // finally iterate ranges and assign fixed registers + for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) + { + IMLPhysRegisterSet allowedRegs; + if (currentRange->list_fixedRegRequirements.empty()) + continue; // we dont need to check whole clusters because the pass above guarantees that there are no ranges with fixed register requirements that extend outside of this segment + if (!currentRange->GetAllowedRegistersEx(allowedRegs)) + { + cemu_assert_debug(currentRange->list_fixedRegRequirements.empty()); + continue; + } + cemu_assert_debug(allowedRegs.HasExactlyOneAvailable()); + currentRange->SetPhysicalRegister(allowedRegs.GetFirstAvailableReg()); + } + // DEBUG - check for collisions and make sure all ranges with fixed register requirements got their physical register assigned +#if DEBUG_RA_EXTRA_VALIDATION + for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) + { + IMLPhysRegisterSet allowedRegs; + if (!currentRange->HasPhysicalRegister()) + continue; + for (raLivenessRange* currentRange2 = imlSegment->raInfo.linkedList_allSubranges; currentRange2; currentRange2 = currentRange2->link_allSegmentRanges.next) + { + if (currentRange == currentRange2) + continue; + if (currentRange->interval2.IsOverlapping(currentRange2->interval2)) + { + cemu_assert_debug(currentRange->GetPhysicalRegister() != currentRange2->GetPhysicalRegister()); + } + } + } +#endif +} + +// we should not split ranges on instructions with tied registers (i.e. where a register encoded as a single parameter is both input and output) +// otherwise the RA algorithm has to assign both ranges the same physical register (not supported yet) and the point of splitting to fit another range is nullified +void IMLRA_MakeSafeSplitPosition(IMLSegment* imlSegment, raInstructionEdge& pos) +{ + // we ignore the instruction for now and just always make it a safe split position + cemu_assert_debug(pos.IsInstructionIndex()); + if (pos.IsOnOutputEdge()) + pos = pos - 1; +} + +// convenience wrapper for IMLRA_MakeSafeSplitPosition +void IMLRA_MakeSafeSplitDistance(IMLSegment* imlSegment, raInstructionEdge startPos, sint32& distance) +{ + cemu_assert_debug(startPos.IsInstructionIndex()); + cemu_assert_debug(distance >= 0); + raInstructionEdge endPos = startPos + distance; + IMLRA_MakeSafeSplitPosition(imlSegment, endPos); + if (endPos < startPos) + { + distance = 0; + return; + } + distance = endPos.GetRaw() - startPos.GetRaw(); +} + +static void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx); + +class RASpillStrategy +{ + public: + virtual void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) = 0; + + sint32 GetCost() + { + return strategyCost; + } + + protected: + void ResetCost() + { + strategyCost = INT_MAX; + } + + sint32 strategyCost; +}; + +class RASpillStrategy_LocalRangeHoleCutting : public RASpillStrategy +{ + public: + void Reset() + { + localRangeHoleCutting.distance = -1; + localRangeHoleCutting.largestHoleSubrange = nullptr; + ResetCost(); + } + + void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs) + { + raInstructionEdge currentRangeStart = currentRange->interval.start; + sint32 requiredSize2 = currentRange->interval.GetPreciseDistance(); + cemu_assert_debug(localRangeHoleCutting.distance == -1); + cemu_assert_debug(strategyCost == INT_MAX); + if (!currentRangeStart.ConnectsToPreviousSegment()) + { + cemu_assert_debug(currentRangeStart.GetRaw() >= 0); + for (auto candidate : timeline.activeRanges) + { + if (candidate->interval.ExtendsIntoNextSegment()) + continue; + // new checks (Oct 2024): + if (candidate == currentRange) + continue; + if (candidate->GetPhysicalRegister() < 0) + continue; + if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister())) + continue; + + sint32 distance2 = IMLRA_CountDistanceUntilNextUse(candidate, currentRangeStart); + IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance2); + if (distance2 < 2) + continue; + cemu_assert_debug(currentRangeStart.IsInstructionIndex()); + distance2 = std::min(distance2, imlSegment->imlList.size() * 2 - currentRangeStart.GetRaw()); // limit distance to end of segment + // calculate split cost of candidate + sint32 cost = IMLRA_CalculateAdditionalCostAfterSplit(candidate, currentRangeStart + distance2); + // calculate additional split cost of currentRange if hole is not large enough + if (distance2 < requiredSize2) + { + cost += IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance2); + // we also slightly increase cost in relation to the remaining length (in order to make the algorithm prefer larger holes) + cost += (requiredSize2 - distance2) / 10; + } + // compare cost with previous candidates + if (cost < strategyCost) + { + strategyCost = cost; + localRangeHoleCutting.distance = distance2; + localRangeHoleCutting.largestHoleSubrange = candidate; + } + } + } + } + + void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override + { + cemu_assert_debug(strategyCost != INT_MAX); + sint32 requiredSize2 = currentRange->interval.GetPreciseDistance(); + raInstructionEdge currentRangeStart = currentRange->interval.start; + + raInstructionEdge holeStartPosition = currentRangeStart; + raInstructionEdge holeEndPosition = currentRangeStart + localRangeHoleCutting.distance; + raLivenessRange* collisionRange = localRangeHoleCutting.largestHoleSubrange; + + if (collisionRange->interval.start < holeStartPosition) + { + collisionRange = IMLRA_SplitRange(nullptr, collisionRange, holeStartPosition, true); + cemu_assert_debug(!collisionRange || collisionRange->interval.start >= holeStartPosition); // verify if splitting worked at all, tail must be on or after the split point + cemu_assert_debug(!collisionRange || collisionRange->interval.start >= holeEndPosition); // also verify that the trimmed hole is actually big enough + } + else + { + cemu_assert_unimplemented(); // we still need to trim? + } + // we may also have to cut the current range to fit partially into the hole + if (requiredSize2 > localRangeHoleCutting.distance) + { + raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + localRangeHoleCutting.distance, true); + if (tailRange) + { + cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers + tailRange->UnsetPhysicalRegister(); + } + } + // verify that the hole is large enough + if (collisionRange) + { + cemu_assert_debug(!collisionRange->interval.IsOverlapping(currentRange->interval)); + } + } + + private: + struct + { + sint32 distance; + raLivenessRange* largestHoleSubrange; + } localRangeHoleCutting; +}; + +class RASpillStrategy_AvailableRegisterHole : public RASpillStrategy +{ + // split current range (this is generally only a good choice when the current range is long but has few usages) + public: + void Reset() + { + ResetCost(); + availableRegisterHole.distance = -1; + availableRegisterHole.physRegister = -1; + } + + void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& localAvailableRegsMask, const IMLPhysRegisterSet& allowedRegs) + { + sint32 requiredSize2 = currentRange->interval.GetPreciseDistance(); + + raInstructionEdge currentRangeStart = currentRange->interval.start; + cemu_assert_debug(strategyCost == INT_MAX); + availableRegisterHole.distance = -1; + availableRegisterHole.physRegister = -1; + if (currentRangeStart.GetRaw() >= 0) + { + if (localAvailableRegsMask.HasAnyAvailable()) + { + sint32 physRegItr = -1; + while (true) + { + physRegItr = localAvailableRegsMask.GetNextAvailableReg(physRegItr + 1); + if (physRegItr < 0) + break; + if (!allowedRegs.IsAvailable(physRegItr)) + continue; + // get size of potential hole for this register + sint32 distance = PPCRecRA_countDistanceUntilNextLocalPhysRegisterUse(imlSegment, currentRangeStart, physRegItr); + + // some instructions may require the same register for another range, check the distance here + sint32 distUntilFixedReg = IMLRA_CountDistanceUntilFixedRegUsage(imlSegment, currentRangeStart, distance, currentRange->GetVirtualRegister(), physRegItr); + if (distUntilFixedReg < distance) + distance = distUntilFixedReg; + + IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance); + if (distance < 2) + continue; + // calculate additional cost due to split + cemu_assert_debug(distance < requiredSize2); // should always be true otherwise previous step would have selected this register? + sint32 cost = IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance); + // add small additional cost for the remaining range (prefer larger holes) + cost += ((requiredSize2 - distance) / 2) / 10; + if (cost < strategyCost) + { + strategyCost = cost; + availableRegisterHole.distance = distance; + availableRegisterHole.physRegister = physRegItr; + } + } + } + } + } + + void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override + { + cemu_assert_debug(strategyCost != INT_MAX); + raInstructionEdge currentRangeStart = currentRange->interval.start; + // use available register + raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + availableRegisterHole.distance, true); + if (tailRange) + { + cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers + tailRange->UnsetPhysicalRegister(); + } + } + + private: + struct + { + sint32 physRegister; + sint32 distance; // size of hole + } availableRegisterHole; +}; + +class RASpillStrategy_ExplodeRange : public RASpillStrategy +{ + public: + void Reset() + { + ResetCost(); + explodeRange.range = nullptr; + explodeRange.distance = -1; + } + + void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs) + { + raInstructionEdge currentRangeStart = currentRange->interval.start; + if (currentRangeStart.ConnectsToPreviousSegment()) + currentRangeStart.Set(0, true); + sint32 requiredSize2 = currentRange->interval.GetPreciseDistance(); + cemu_assert_debug(strategyCost == INT_MAX); + explodeRange.range = nullptr; + explodeRange.distance = -1; + for (auto candidate : timeline.activeRanges) + { + if (!candidate->interval.ExtendsIntoNextSegment()) + continue; + // new checks (Oct 2024): + if (candidate == currentRange) + continue; + if (candidate->GetPhysicalRegister() < 0) + continue; + if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister())) + continue; + + sint32 distance = IMLRA_CountDistanceUntilNextUse(candidate, currentRangeStart); + IMLRA_MakeSafeSplitDistance(imlSegment, currentRangeStart, distance); + if (distance < 2) + continue; + sint32 cost = IMLRA_CalculateAdditionalCostOfRangeExplode(candidate); + // if the hole is not large enough, add cost of splitting current subrange + if (distance < requiredSize2) + { + cost += IMLRA_CalculateAdditionalCostAfterSplit(currentRange, currentRangeStart + distance); + // add small additional cost for the remaining range (prefer larger holes) + cost += ((requiredSize2 - distance) / 2) / 10; + } + // compare with current best candidate for this strategy + if (cost < strategyCost) + { + strategyCost = cost; + explodeRange.distance = distance; + explodeRange.range = candidate; + } + } + } + + void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override + { + raInstructionEdge currentRangeStart = currentRange->interval.start; + if (currentRangeStart.ConnectsToPreviousSegment()) + currentRangeStart.Set(0, true); + sint32 requiredSize2 = currentRange->interval.GetPreciseDistance(); + // explode range + IMLRA_ExplodeRangeCluster(nullptr, explodeRange.range); + // split current subrange if necessary + if (requiredSize2 > explodeRange.distance) + { + raLivenessRange* tailRange = IMLRA_SplitRange(nullptr, currentRange, currentRangeStart + explodeRange.distance, true); + if (tailRange) + { + cemu_assert_debug(tailRange->list_fixedRegRequirements.empty()); // we are not allowed to unassign fixed registers + tailRange->UnsetPhysicalRegister(); + } + } + } + + private: + struct + { + raLivenessRange* range; + sint32 distance; // size of hole + // note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange + } explodeRange; +}; + +class RASpillStrategy_ExplodeRangeInter : public RASpillStrategy +{ + public: + void Reset() + { + ResetCost(); + explodeRange.range = nullptr; + explodeRange.distance = -1; + } + + void Evaluate(IMLSegment* imlSegment, raLivenessRange* currentRange, const IMLRALivenessTimeline& timeline, const IMLPhysRegisterSet& allowedRegs) + { + // explode the range with the least cost + cemu_assert_debug(strategyCost == INT_MAX); + cemu_assert_debug(explodeRange.range == nullptr && explodeRange.distance == -1); + for (auto candidate : timeline.activeRanges) + { + if (!candidate->interval.ExtendsIntoNextSegment()) + continue; + // only select candidates that clash with current subrange + if (candidate->GetPhysicalRegister() < 0 && candidate != currentRange) + continue; + // and also filter any that dont meet fixed register requirements + if (!allowedRegs.IsAvailable(candidate->GetPhysicalRegister())) + continue; + sint32 cost; + cost = IMLRA_CalculateAdditionalCostOfRangeExplode(candidate); + // compare with current best candidate for this strategy + if (cost < strategyCost) + { + strategyCost = cost; + explodeRange.distance = INT_MAX; + explodeRange.range = candidate; + } + } + // add current range as a candidate too + sint32 ownCost; + ownCost = IMLRA_CalculateAdditionalCostOfRangeExplode(currentRange); + if (ownCost < strategyCost) + { + strategyCost = ownCost; + explodeRange.distance = INT_MAX; + explodeRange.range = currentRange; + } + } + + void Apply(ppcImlGenContext_t* ctx, IMLSegment* imlSegment, raLivenessRange* currentRange) override + { + cemu_assert_debug(strategyCost != INT_MAX); + IMLRA_ExplodeRangeCluster(ctx, explodeRange.range); + } + + private: + struct + { + raLivenessRange* range; + sint32 distance; // size of hole + // note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange + }explodeRange; +}; + +// filter any registers from candidatePhysRegSet which cannot be used by currentRange due to fixed register requirements within the range that it occupies +void IMLRA_FilterReservedFixedRegisterRequirementsForSegment(IMLRegisterAllocatorContext& ctx, raLivenessRange* currentRange, IMLPhysRegisterSet& candidatePhysRegSet) +{ + IMLSegment* seg = currentRange->imlSegment; + if (seg->imlList.empty()) + return; // there can be no fixed register requirements if there are no instructions + + raInstructionEdge firstPos = currentRange->interval.start; + if (currentRange->interval.start.ConnectsToPreviousSegment()) + firstPos.SetRaw(0); + else if (currentRange->interval.start.ConnectsToNextSegment()) + firstPos.Set(seg->imlList.size() - 1, false); + + raInstructionEdge lastPos = currentRange->interval.end; + if (currentRange->interval.end.ConnectsToPreviousSegment()) + lastPos.SetRaw(0); + else if (currentRange->interval.end.ConnectsToNextSegment()) + lastPos.Set(seg->imlList.size() - 1, false); + cemu_assert_debug(firstPos <= lastPos); + + IMLRegID ourRegId = currentRange->GetVirtualRegister(); + + IMLFixedRegisters fixedRegs; + if (firstPos.IsOnOutputEdge()) + GetInstructionFixedRegisters(seg->imlList.data() + firstPos.GetInstructionIndex(), fixedRegs); + for (raInstructionEdge currentPos = firstPos; currentPos <= lastPos; ++currentPos) + { + if (currentPos.IsOnInputEdge()) + { + GetInstructionFixedRegisters(seg->imlList.data() + currentPos.GetInstructionIndex(), fixedRegs); + } + auto& fixedRegAccess = currentPos.IsOnInputEdge() ? fixedRegs.listInput : fixedRegs.listOutput; + for (auto& fixedRegLoc : fixedRegAccess) + { + if (fixedRegLoc.reg.IsInvalid() || fixedRegLoc.reg.GetRegID() != ourRegId) + candidatePhysRegSet.RemoveRegisters(fixedRegLoc.physRegSet); + } + } +} + +// filter out any registers along the range cluster +void IMLRA_FilterReservedFixedRegisterRequirementsForCluster(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, raLivenessRange* currentRange, IMLPhysRegisterSet& candidatePhysRegSet) +{ + cemu_assert_debug(currentRange->imlSegment == imlSegment); + if (currentRange->interval.ExtendsPreviousSegment() || currentRange->interval.ExtendsIntoNextSegment()) + { + auto clusterRanges = currentRange->GetAllSubrangesInCluster(); + for (auto& rangeIt : clusterRanges) + { + IMLRA_FilterReservedFixedRegisterRequirementsForSegment(ctx, rangeIt, candidatePhysRegSet); + if (!candidatePhysRegSet.HasAnyAvailable()) + break; + } + return; + } + IMLRA_FilterReservedFixedRegisterRequirementsForSegment(ctx, currentRange, candidatePhysRegSet); +} + +bool IMLRA_AssignSegmentRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment) +{ + // sort subranges ascending by start index + _sortSegmentAllSubrangesLinkedList(imlSegment); + + IMLRALivenessTimeline livenessTimeline; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + raInstructionEdge lastInstructionEdge; + lastInstructionEdge.SetRaw(RA_INTER_RANGE_END); + + struct + { + RASpillStrategy_LocalRangeHoleCutting localRangeHoleCutting; + RASpillStrategy_AvailableRegisterHole availableRegisterHole; + RASpillStrategy_ExplodeRange explodeRange; + // for ranges that connect to follow up segments: + RASpillStrategy_ExplodeRangeInter explodeRangeInter; + } strategy; + + while (subrangeItr) + { + raInstructionEdge currentRangeStart = subrangeItr->interval.start; // used to be currentIndex before refactor + PPCRecRA_debugValidateSubrange(subrangeItr); + + livenessTimeline.ExpireRanges((currentRangeStart > lastInstructionEdge) ? lastInstructionEdge : currentRangeStart); // expire up to currentIndex (inclusive), but exclude infinite ranges + + // if subrange already has register assigned then add it to the active list and continue + if (subrangeItr->GetPhysicalRegister() >= 0) + { + // verify if register is actually available +#if DEBUG_RA_EXTRA_VALIDATION + for (auto& liverangeItr : livenessTimeline.activeRanges) + { + // check for register mismatch + cemu_assert_debug(liverangeItr->GetPhysicalRegister() != subrangeItr->GetPhysicalRegister()); + } +#endif + livenessTimeline.AddActiveRange(subrangeItr); + subrangeItr = subrangeItr->link_allSegmentRanges.next; + continue; + } + // ranges with fixed register requirements should already have a phys register assigned + if (!subrangeItr->list_fixedRegRequirements.empty()) + { + cemu_assert_debug(subrangeItr->HasPhysicalRegister()); + } + // find free register for current subrangeItr and segment + IMLRegFormat regBaseFormat = ctx.GetBaseFormatByRegId(subrangeItr->GetVirtualRegister()); + IMLPhysRegisterSet candidatePhysRegSet = ctx.raParam->GetPhysRegPool(regBaseFormat); + cemu_assert_debug(candidatePhysRegSet.HasAnyAvailable()); // no valid pool provided for this register type + + IMLPhysRegisterSet allowedRegs = subrangeItr->GetAllowedRegisters(candidatePhysRegSet); + cemu_assert_debug(allowedRegs.HasAnyAvailable()); // if zero regs are available, then this range needs to be split to avoid mismatching register requirements (do this in the initial pass to keep the code here simpler) + candidatePhysRegSet &= allowedRegs; + + for (auto& liverangeItr : livenessTimeline.activeRanges) + { + cemu_assert_debug(liverangeItr->GetPhysicalRegister() >= 0); + candidatePhysRegSet.SetReserved(liverangeItr->GetPhysicalRegister()); + } + // check intersections with other ranges and determine allowed registers + IMLPhysRegisterSet localAvailableRegsMask = candidatePhysRegSet; // mask of registers that are currently not used (does not include range checks in other segments) + if (candidatePhysRegSet.HasAnyAvailable()) + { + // check for overlaps on a global scale (subrangeItr can be part of a larger range cluster across multiple segments) + PPCRecRA_MaskOverlappingPhysRegForGlobalRange(subrangeItr, candidatePhysRegSet); + } + // some target instructions may enforce specific registers (e.g. common on X86 where something like SHL , CL forces CL as the count register) + // we determine the list of allowed registers here + // this really only works if we assume single-register requirements (otherwise its better not to filter out early and instead allow register corrections later but we don't support this yet) + if (candidatePhysRegSet.HasAnyAvailable()) + { + IMLRA_FilterReservedFixedRegisterRequirementsForCluster(ctx, imlSegment, subrangeItr, candidatePhysRegSet); + } + if (candidatePhysRegSet.HasAnyAvailable()) + { + // use free register + subrangeItr->SetPhysicalRegisterForCluster(candidatePhysRegSet.GetFirstAvailableReg()); + livenessTimeline.AddActiveRange(subrangeItr); + subrangeItr = subrangeItr->link_allSegmentRanges.next; // next + continue; + } + // there is no free register for the entire range + // evaluate different strategies of splitting ranges to free up another register or shorten the current range + strategy.localRangeHoleCutting.Reset(); + strategy.availableRegisterHole.Reset(); + strategy.explodeRange.Reset(); + // cant assign register + // there might be registers available, we just can't use them due to range conflicts + RASpillStrategy* selectedStrategy = nullptr; + auto SelectStrategyIfBetter = [&selectedStrategy](RASpillStrategy& newStrategy) { + if (newStrategy.GetCost() == INT_MAX) + return; + if (selectedStrategy == nullptr || newStrategy.GetCost() < selectedStrategy->GetCost()) + selectedStrategy = &newStrategy; + }; + + if (!subrangeItr->interval.ExtendsIntoNextSegment()) + { + // range ends in current segment, use local strategies + // evaluate strategy: Cut hole into local subrange + strategy.localRangeHoleCutting.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs); + SelectStrategyIfBetter(strategy.localRangeHoleCutting); + // evaluate strategy: Split current range to fit in available holes + // todo - are checks required to avoid splitting on the suffix instruction? + strategy.availableRegisterHole.Evaluate(imlSegment, subrangeItr, livenessTimeline, localAvailableRegsMask, allowedRegs); + SelectStrategyIfBetter(strategy.availableRegisterHole); + // evaluate strategy: Explode inter-segment ranges + strategy.explodeRange.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs); + SelectStrategyIfBetter(strategy.explodeRange); + } + else // if subrangeItr->interval2.ExtendsIntoNextSegment() + { + strategy.explodeRangeInter.Reset(); + strategy.explodeRangeInter.Evaluate(imlSegment, subrangeItr, livenessTimeline, allowedRegs); + SelectStrategyIfBetter(strategy.explodeRangeInter); + } + // choose strategy + if (selectedStrategy) + { + selectedStrategy->Apply(ppcImlGenContext, imlSegment, subrangeItr); + } + else + { + // none of the evulated strategies can be applied, this should only happen if the segment extends into the next segment(s) for which we have no good strategy + cemu_assert_debug(subrangeItr->interval.ExtendsPreviousSegment()); + // alternative strategy if we have no other choice: explode current range + IMLRA_ExplodeRangeCluster(ppcImlGenContext, subrangeItr); + } + return false; + } + return true; +} + +void IMLRA_AssignRegisters(IMLRegisterAllocatorContext& ctx, ppcImlGenContext_t* ppcImlGenContext) +{ + // start with frequently executed segments first + sint32 maxLoopDepth = 0; + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + { + maxLoopDepth = std::max(maxLoopDepth, segIt->loopDepth); + } + // assign fixed registers first + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + IMLRA_HandleFixedRegisters(ppcImlGenContext, segIt); +#if DEBUG_RA_EXTRA_VALIDATION + // fixed registers are currently handled per-segment, but here we validate that they are assigned correctly on a global scope as well + for (IMLSegment* imlSegment : ppcImlGenContext->segmentList2) + { + for (raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; currentRange; currentRange = currentRange->link_allSegmentRanges.next) + { + IMLPhysRegisterSet allowedRegs; + if (!currentRange->GetAllowedRegistersEx(allowedRegs)) + { + cemu_assert_debug(currentRange->list_fixedRegRequirements.empty()); + continue; + } + cemu_assert_debug(currentRange->HasPhysicalRegister() && allowedRegs.IsAvailable(currentRange->GetPhysicalRegister())); + } + } +#endif + + while (true) + { + bool done = false; + for (sint32 d = maxLoopDepth; d >= 0; d--) + { + for (IMLSegment* segIt : ppcImlGenContext->segmentList2) + { + if (segIt->loopDepth != d) + continue; + done = IMLRA_AssignSegmentRegisters(ctx, ppcImlGenContext, segIt); + if (done == false) + break; + } + if (done == false) + break; + } + if (done) + break; + } +} + +void IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) +{ + // insert empty segments after every non-taken branch if the linked segment has more than one input + // this gives the register allocator more room to create efficient spill code + size_t segmentIndex = 0; + while (segmentIndex < ppcImlGenContext->segmentList2.size()) + { + IMLSegment* imlSegment = ppcImlGenContext->segmentList2[segmentIndex]; + if (imlSegment->nextSegmentIsUncertain) + { + segmentIndex++; + continue; + } + if (imlSegment->nextSegmentBranchTaken == nullptr || imlSegment->nextSegmentBranchNotTaken == nullptr) + { + segmentIndex++; + continue; + } + if (imlSegment->nextSegmentBranchNotTaken->list_prevSegments.size() <= 1) + { + segmentIndex++; + continue; + } + if (imlSegment->nextSegmentBranchNotTaken->isEnterable) + { + segmentIndex++; + continue; + } + PPCRecompilerIml_insertSegments(ppcImlGenContext, segmentIndex + 1, 1); + IMLSegment* imlSegmentP0 = ppcImlGenContext->segmentList2[segmentIndex + 0]; + IMLSegment* imlSegmentP1 = ppcImlGenContext->segmentList2[segmentIndex + 1]; + IMLSegment* nextSegment = imlSegment->nextSegmentBranchNotTaken; + IMLSegment_RemoveLink(imlSegmentP0, nextSegment); + IMLSegment_SetLinkBranchNotTaken(imlSegmentP1, nextSegment); + IMLSegment_SetLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); + segmentIndex++; + } + // detect loops + for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) + { + IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; + imlSegment->momentaryIndex = s; + } + for (size_t s = 0; s < ppcImlGenContext->segmentList2.size(); s++) + { + IMLSegment* imlSegment = ppcImlGenContext->segmentList2[s]; + IMLRA_IdentifyLoop(ppcImlGenContext, imlSegment); + } +} + +IMLRARegAbstractLiveness* _GetAbstractRange(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId) +{ + auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + auto it = segMap.find(regId); + return it != segMap.end() ? &it->second : nullptr; +} + +// scan instructions and establish register usage range for segment +void IMLRA_CalculateSegmentMinMaxAbstractRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) +{ + size_t instructionIndex = 0; + IMLUsedRegisters gprTracking; + auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + while (instructionIndex < imlSegment->imlList.size()) + { + imlSegment->imlList[instructionIndex].CheckRegisterUsage(&gprTracking); + gprTracking.ForEachAccessedGPR([&](IMLReg gprReg, bool isWritten) { + IMLRegID gprId = gprReg.GetRegID(); + auto it = segDistMap.find(gprId); + if (it == segDistMap.end()) + { + segDistMap.try_emplace(gprId, gprReg.GetBaseFormat(), (sint32)instructionIndex, (sint32)instructionIndex + 1); + ctx.regIdToBaseFormat.try_emplace(gprId, gprReg.GetBaseFormat()); + } + else + { + it->second.TrackInstruction(instructionIndex); +#ifdef CEMU_DEBUG_ASSERT + cemu_assert_debug(ctx.regIdToBaseFormat[gprId] == gprReg.GetBaseFormat()); // the base type per register always has to be the same +#endif + } + }); + instructionIndex++; + } +} + +void IMLRA_CalculateLivenessRanges(IMLRegisterAllocatorContext& ctx) +{ + // for each register calculate min/max index of usage range within each segment + size_t dbgIndex = 0; + for (IMLSegment* segIt : ctx.deprGenContext->segmentList2) + { + cemu_assert_debug(segIt->momentaryIndex == dbgIndex); + IMLRA_CalculateSegmentMinMaxAbstractRanges(ctx, segIt); + dbgIndex++; + } +} + +raLivenessRange* PPCRecRA_convertToMappedRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID vGPR, IMLName name) +{ + IMLRARegAbstractLiveness* abstractRange = _GetAbstractRange(ctx, imlSegment, vGPR); + if (!abstractRange) + return nullptr; + if (abstractRange->isProcessed) + { + // return already existing segment + raLivenessRange* existingRange = IMLRA_GetSubrange(imlSegment, vGPR); + cemu_assert_debug(existingRange); + return existingRange; + } + abstractRange->isProcessed = true; + // create subrange + cemu_assert_debug(IMLRA_GetSubrange(imlSegment, vGPR) == nullptr); + cemu_assert_debug( + (abstractRange->usageStart == abstractRange->usageEnd && (abstractRange->usageStart == RA_INTER_RANGE_START || abstractRange->usageStart == RA_INTER_RANGE_END)) || + abstractRange->usageStart < abstractRange->usageEnd); // usageEnd is exclusive so it should always be larger + sint32 inclusiveEnd = abstractRange->usageEnd; + if (inclusiveEnd != RA_INTER_RANGE_START && inclusiveEnd != RA_INTER_RANGE_END) + inclusiveEnd--; // subtract one, because usageEnd is exclusive, but the end value of the interval passed to createSubrange is inclusive + raInterval interval; + interval.SetInterval(abstractRange->usageStart, true, inclusiveEnd, true); + raLivenessRange* subrange = IMLRA_CreateRange(ctx.deprGenContext, imlSegment, vGPR, name, interval.start, interval.end); + // traverse forward + if (abstractRange->usageEnd == RA_INTER_RANGE_END) + { + if (imlSegment->nextSegmentBranchTaken) + { + IMLRARegAbstractLiveness* branchTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchTaken, vGPR); + if (branchTakenRange && branchTakenRange->usageStart == RA_INTER_RANGE_START) + { + subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchTaken, vGPR, name); + subrange->subrangeBranchTaken->previousRanges.push_back(subrange); + cemu_assert_debug(subrange->subrangeBranchTaken->interval.ExtendsPreviousSegment()); + } + } + if (imlSegment->nextSegmentBranchNotTaken) + { + IMLRARegAbstractLiveness* branchNotTakenRange = _GetAbstractRange(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR); + if (branchNotTakenRange && branchNotTakenRange->usageStart == RA_INTER_RANGE_START) + { + subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ctx, imlSegment->nextSegmentBranchNotTaken, vGPR, name); + subrange->subrangeBranchNotTaken->previousRanges.push_back(subrange); + cemu_assert_debug(subrange->subrangeBranchNotTaken->interval.ExtendsPreviousSegment()); + } + } + } + // traverse backward + if (abstractRange->usageStart == RA_INTER_RANGE_START) + { + for (auto& it : imlSegment->list_prevSegments) + { + IMLRARegAbstractLiveness* prevRange = _GetAbstractRange(ctx, it, vGPR); + if (!prevRange) + continue; + if (prevRange->usageEnd == RA_INTER_RANGE_END) + PPCRecRA_convertToMappedRanges(ctx, it, vGPR, name); + } + } + return subrange; +} + +void IMLRA_UpdateOrAddSubrangeLocation(raLivenessRange* subrange, raInstructionEdge pos) +{ + if (subrange->list_accessLocations.empty()) + { + subrange->list_accessLocations.emplace_back(pos); + return; + } + if(subrange->list_accessLocations.back().pos == pos) + return; + cemu_assert_debug(subrange->list_accessLocations.back().pos < pos); + subrange->list_accessLocations.emplace_back(pos); +} + +// take abstract range data and create LivenessRanges +void IMLRA_ConvertAbstractToLivenessRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) +{ + const std::unordered_map& regToSubrange = IMLRA_GetSubrangeMap(imlSegment); + + auto AddOrUpdateFixedRegRequirement = [&](IMLRegID regId, sint32 instructionIndex, bool isInput, const IMLPhysRegisterSet& physRegSet) { + raLivenessRange* subrange = regToSubrange.find(regId)->second; + cemu_assert_debug(subrange); + raFixedRegRequirement tmp; + tmp.pos.Set(instructionIndex, isInput); + tmp.allowedReg = physRegSet; + if (subrange->list_fixedRegRequirements.empty() || subrange->list_fixedRegRequirements.back().pos != tmp.pos) + subrange->list_fixedRegRequirements.push_back(tmp); + }; + + // convert abstract min-max ranges to liveness range objects + auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + for (auto& it : segMap) + { + if (it.second.isProcessed) + continue; + IMLRegID regId = it.first; + PPCRecRA_convertToMappedRanges(ctx, imlSegment, regId, ctx.raParam->regIdToName.find(regId)->second); + } + // fill created ranges with read/write location indices + // note that at this point there is only one range per register per segment + // and the algorithm below relies on this + size_t index = 0; + IMLUsedRegisters gprTracking; + while (index < imlSegment->imlList.size()) + { + imlSegment->imlList[index].CheckRegisterUsage(&gprTracking); + raInstructionEdge pos((sint32)index, true); + gprTracking.ForEachReadGPR([&](IMLReg gprReg) { + IMLRegID gprId = gprReg.GetRegID(); + raLivenessRange* subrange = regToSubrange.find(gprId)->second; + IMLRA_UpdateOrAddSubrangeLocation(subrange, pos); + }); + pos = {(sint32)index, false}; + gprTracking.ForEachWrittenGPR([&](IMLReg gprReg) { + IMLRegID gprId = gprReg.GetRegID(); + raLivenessRange* subrange = regToSubrange.find(gprId)->second; + IMLRA_UpdateOrAddSubrangeLocation(subrange, pos); + }); + // check fixed register requirements + IMLFixedRegisters fixedRegs; + GetInstructionFixedRegisters(&imlSegment->imlList[index], fixedRegs); + for (auto& fixedRegAccess : fixedRegs.listInput) + { + if (fixedRegAccess.reg != IMLREG_INVALID) + AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, true, fixedRegAccess.physRegSet); + } + for (auto& fixedRegAccess : fixedRegs.listOutput) + { + if (fixedRegAccess.reg != IMLREG_INVALID) + AddOrUpdateFixedRegRequirement(fixedRegAccess.reg.GetRegID(), index, false, fixedRegAccess.physRegSet); + } + index++; + } +} + +void IMLRA_extendAbstractRangeToEndOfSegment(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId) +{ + auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + auto it = segDistMap.find(regId); + if (it == segDistMap.end()) + { + sint32 startIndex; + if (imlSegment->HasSuffixInstruction()) + startIndex = imlSegment->GetSuffixInstructionIndex(); + else + startIndex = RA_INTER_RANGE_END; + segDistMap.try_emplace((IMLRegID)regId, IMLRegFormat::INVALID_FORMAT, startIndex, RA_INTER_RANGE_END); + } + else + { + it->second.usageEnd = RA_INTER_RANGE_END; + } +} + +void IMLRA_extendAbstractRangeToBeginningOfSegment(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment, IMLRegID regId) +{ + auto& segDistMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + auto it = segDistMap.find(regId); + if (it == segDistMap.end()) + { + segDistMap.try_emplace((IMLRegID)regId, IMLRegFormat::INVALID_FORMAT, RA_INTER_RANGE_START, RA_INTER_RANGE_START); + } + else + { + it->second.usageStart = RA_INTER_RANGE_START; + } + // propagate backwards + for (auto& it : imlSegment->list_prevSegments) + { + IMLRA_extendAbstractRangeToEndOfSegment(ctx, it, regId); + } +} + +void IMLRA_connectAbstractRanges(IMLRegisterAllocatorContext& ctx, IMLRegID regId, IMLSegment** route, sint32 routeDepth) +{ +#ifdef CEMU_DEBUG_ASSERT + if (routeDepth < 2) + assert_dbg(); +#endif + // extend starting range to end of segment + IMLRA_extendAbstractRangeToEndOfSegment(ctx, route[0], regId); + // extend all the connecting segments in both directions + for (sint32 i = 1; i < (routeDepth - 1); i++) + { + IMLRA_extendAbstractRangeToEndOfSegment(ctx, route[i], regId); + IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, route[i], regId); + } + // extend the final segment towards the beginning + IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, route[routeDepth - 1], regId); +} + +void _IMLRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRegID regID, sint32 distanceLeft, IMLSegment** route, sint32 routeDepth) +{ + if (routeDepth >= 64) + { + cemuLog_logDebug(LogType::Force, "Recompiler RA route maximum depth exceeded\n"); + return; + } + route[routeDepth] = currentSegment; + + IMLRARegAbstractLiveness* range = _GetAbstractRange(ctx, currentSegment, regID); + + if (!range) + { + // measure distance over entire segment + distanceLeft -= (sint32)currentSegment->imlList.size(); + if (distanceLeft > 0) + { + if (currentSegment->nextSegmentBranchNotTaken) + _IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, distanceLeft, route, routeDepth + 1); + if (currentSegment->nextSegmentBranchTaken) + _IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, distanceLeft, route, routeDepth + 1); + } + return; + } + else + { + // measure distance to range + if (range->usageStart == RA_INTER_RANGE_END) + { + if (distanceLeft < (sint32)currentSegment->imlList.size()) + return; // range too far away + } + else if (range->usageStart != RA_INTER_RANGE_START && range->usageStart > distanceLeft) + return; // out of range + // found close range -> connect ranges + IMLRA_connectAbstractRanges(ctx, regID, route, routeDepth + 1); + } +} + +void PPCRecRA_checkAndTryExtendRange(IMLRegisterAllocatorContext& ctx, IMLSegment* currentSegment, IMLRARegAbstractLiveness* range, IMLRegID regID) +{ + cemu_assert_debug(range->usageEnd >= 0); + // count instructions to end of initial segment + sint32 instructionsUntilEndOfSeg; + if (range->usageEnd == RA_INTER_RANGE_END) + instructionsUntilEndOfSeg = 0; + else + instructionsUntilEndOfSeg = (sint32)currentSegment->imlList.size() - range->usageEnd; + cemu_assert_debug(instructionsUntilEndOfSeg >= 0); + sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg; + if (remainingScanDist <= 0) + return; // can't reach end + + IMLSegment* route[64]; + route[0] = currentSegment; + if (currentSegment->nextSegmentBranchNotTaken) + _IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchNotTaken, regID, remainingScanDist, route, 1); + if (currentSegment->nextSegmentBranchTaken) + _IMLRA_checkAndTryExtendRange(ctx, currentSegment->nextSegmentBranchTaken, regID, remainingScanDist, route, 1); +} + +void PPCRecRA_mergeCloseRangesForSegmentV2(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) +{ + auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + for (auto& it : segMap) + { + PPCRecRA_checkAndTryExtendRange(ctx, imlSegment, &(it.second), it.first); + } +#ifdef CEMU_DEBUG_ASSERT + if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable) + assert_dbg(); + if ((imlSegment->nextSegmentBranchNotTaken != nullptr || imlSegment->nextSegmentBranchTaken != nullptr) && imlSegment->nextSegmentIsUncertain) + assert_dbg(); +#endif +} + +void PPCRecRA_followFlowAndExtendRanges(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) +{ + std::vector list_segments; + std::vector list_processedSegment; + size_t segmentCount = ctx.deprGenContext->segmentList2.size(); + list_segments.reserve(segmentCount + 1); + list_processedSegment.resize(segmentCount); + + auto markSegProcessed = [&list_processedSegment](IMLSegment* seg) { + list_processedSegment[seg->momentaryIndex] = true; + }; + auto isSegProcessed = [&list_processedSegment](IMLSegment* seg) -> bool { + return list_processedSegment[seg->momentaryIndex]; + }; + markSegProcessed(imlSegment); + + sint32 index = 0; + list_segments.push_back(imlSegment); + while (index < list_segments.size()) + { + IMLSegment* currentSegment = list_segments[index]; + PPCRecRA_mergeCloseRangesForSegmentV2(ctx, currentSegment); + // follow flow + if (currentSegment->nextSegmentBranchNotTaken && !isSegProcessed(currentSegment->nextSegmentBranchNotTaken)) + { + markSegProcessed(currentSegment->nextSegmentBranchNotTaken); + list_segments.push_back(currentSegment->nextSegmentBranchNotTaken); + } + if (currentSegment->nextSegmentBranchTaken && !isSegProcessed(currentSegment->nextSegmentBranchTaken)) + { + markSegProcessed(currentSegment->nextSegmentBranchTaken); + list_segments.push_back(currentSegment->nextSegmentBranchTaken); + } + index++; + } +} + +void IMLRA_MergeCloseAbstractRanges(IMLRegisterAllocatorContext& ctx) +{ + for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) + { + IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s]; + if (!imlSegment->list_prevSegments.empty()) + continue; // not an entry/standalone segment + PPCRecRA_followFlowAndExtendRanges(ctx, imlSegment); + } +} + +void IMLRA_ExtendAbstractRangesOutOfLoops(IMLRegisterAllocatorContext& ctx) +{ + for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) + { + IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s]; + auto localLoopDepth = imlSegment->loopDepth; + if (localLoopDepth <= 0) + continue; // not inside a loop + // look for loop exit + bool hasLoopExit = false; + if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->loopDepth < localLoopDepth) + { + hasLoopExit = true; + } + if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->loopDepth < localLoopDepth) + { + hasLoopExit = true; + } + if (hasLoopExit == false) + continue; + + // extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop) + auto& segMap = ctx.GetSegmentAbstractRangeMap(imlSegment); + for (auto& it : segMap) + { + if (it.second.usageEnd != RA_INTER_RANGE_END) + continue; + if (imlSegment->nextSegmentBranchTaken) + IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, imlSegment->nextSegmentBranchTaken, it.first); + if (imlSegment->nextSegmentBranchNotTaken) + IMLRA_extendAbstractRangeToBeginningOfSegment(ctx, imlSegment->nextSegmentBranchNotTaken, it.first); + } + } +} + +void IMLRA_ProcessFlowAndCalculateLivenessRanges(IMLRegisterAllocatorContext& ctx) +{ + IMLRA_MergeCloseAbstractRanges(ctx); + // extra pass to move register loads and stores out of loops + IMLRA_ExtendAbstractRangesOutOfLoops(ctx); + // calculate liveness ranges + for (auto& segIt : ctx.deprGenContext->segmentList2) + IMLRA_ConvertAbstractToLivenessRanges(ctx, segIt); +} + +void IMLRA_AnalyzeSubrangeDataDependency(raLivenessRange* subrange) +{ + bool isRead = false; + bool isWritten = false; + bool isOverwritten = false; + for (auto& location : subrange->list_accessLocations) + { + if (location.IsRead()) + { + isRead = true; + } + if (location.IsWrite()) + { + if (isRead == false) + isOverwritten = true; + isWritten = true; + } + } + subrange->_noLoad = isOverwritten; + subrange->hasStore = isWritten; + + if (subrange->interval.ExtendsPreviousSegment()) + subrange->_noLoad = true; +} + +struct subrangeEndingInfo_t +{ + raLivenessRange* subrangeList[SUBRANGE_LIST_SIZE]; + sint32 subrangeCount; + + bool hasUndefinedEndings; +}; + +void _findSubrangeWriteEndings(raLivenessRange* subrange, uint32 iterationIndex, sint32 depth, subrangeEndingInfo_t* info) +{ + if (depth >= 30) + { + info->hasUndefinedEndings = true; + return; + } + if (subrange->lastIterationIndex == iterationIndex) + return; // already processed + subrange->lastIterationIndex = iterationIndex; + if (subrange->hasStoreDelayed) + return; // no need to traverse this subrange + IMLSegment* imlSegment = subrange->imlSegment; + if (!subrange->interval.ExtendsIntoNextSegment()) + { + // ending segment + if (info->subrangeCount >= SUBRANGE_LIST_SIZE) + { + info->hasUndefinedEndings = true; + return; + } + else + { + info->subrangeList[info->subrangeCount] = subrange; + info->subrangeCount++; + } + return; + } + + // traverse next subranges in flow + if (imlSegment->nextSegmentBranchNotTaken) + { + if (subrange->subrangeBranchNotTaken == nullptr) + { + info->hasUndefinedEndings = true; + } + else + { + _findSubrangeWriteEndings(subrange->subrangeBranchNotTaken, iterationIndex, depth + 1, info); + } + } + if (imlSegment->nextSegmentBranchTaken) + { + if (subrange->subrangeBranchTaken == nullptr) + { + info->hasUndefinedEndings = true; + } + else + { + _findSubrangeWriteEndings(subrange->subrangeBranchTaken, iterationIndex, depth + 1, info); + } + } +} + +static void IMLRA_AnalyzeRangeDataFlow(raLivenessRange* subrange) +{ + if (!subrange->interval.ExtendsIntoNextSegment()) + return; + // analyze data flow across segments (if this segment has writes) + if (subrange->hasStore) + { + subrangeEndingInfo_t writeEndingInfo; + writeEndingInfo.subrangeCount = 0; + writeEndingInfo.hasUndefinedEndings = false; + _findSubrangeWriteEndings(subrange, IMLRA_GetNextIterationIndex(), 0, &writeEndingInfo); + if (writeEndingInfo.hasUndefinedEndings == false) + { + // get cost of delaying store into endings + sint32 delayStoreCost = 0; + bool alreadyStoredInAllEndings = true; + for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++) + { + raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i]; + if (subrangeItr->hasStore) + continue; // this ending already stores, no extra cost + alreadyStoredInAllEndings = false; + sint32 storeCost = IMLRA_GetSegmentReadWriteCost(subrangeItr->imlSegment); + delayStoreCost = std::max(storeCost, delayStoreCost); + } + if (alreadyStoredInAllEndings) + { + subrange->hasStore = false; + subrange->hasStoreDelayed = true; + } + else if (delayStoreCost <= IMLRA_GetSegmentReadWriteCost(subrange->imlSegment)) + { + subrange->hasStore = false; + subrange->hasStoreDelayed = true; + for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++) + { + raLivenessRange* subrangeItr = writeEndingInfo.subrangeList[i]; + subrangeItr->hasStore = true; + } + } + } + } +} + +void IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext_t* ppcImlGenContext) +{ + // this function is called after _AssignRegisters(), which means that all liveness ranges are already final and must not be modified anymore + // track read/write dependencies per segment + for (auto& seg : ppcImlGenContext->segmentList2) + { + raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges; + while (subrange) + { + IMLRA_AnalyzeSubrangeDataDependency(subrange); + subrange = subrange->link_allSegmentRanges.next; + } + } + // propagate information across segment boundaries + for (auto& seg : ppcImlGenContext->segmentList2) + { + raLivenessRange* subrange = seg->raInfo.linkedList_allSubranges; + while (subrange) + { + IMLRA_AnalyzeRangeDataFlow(subrange); + subrange = subrange->link_allSegmentRanges.next; + } + } +} + +/* Generate move instructions */ + +inline IMLReg _MakeNativeReg(IMLRegFormat baseFormat, IMLRegID regId) +{ + return IMLReg(baseFormat, baseFormat, 0, regId); +} + +// prepass for IMLRA_GenerateSegmentMoveInstructions which updates all virtual registers to their physical counterparts +void IMLRA_RewriteRegisters(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) +{ + std::unordered_map virtId2PhysReg; + boost::container::small_vector activeRanges; + raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; + raInstructionEdge currentEdge; + for (size_t i = 0; i < imlSegment->imlList.size(); i++) + { + currentEdge.Set(i, false); // set to instruction index on output edge + // activate ranges which begin before or during this instruction + while (currentRange && currentRange->interval.start <= currentEdge) + { + cemu_assert_debug(virtId2PhysReg.find(currentRange->GetVirtualRegister()) == virtId2PhysReg.end() || virtId2PhysReg[currentRange->GetVirtualRegister()] == currentRange->GetPhysicalRegister()); // check for register conflict + + virtId2PhysReg[currentRange->GetVirtualRegister()] = currentRange->GetPhysicalRegister(); + activeRanges.push_back(currentRange); + currentRange = currentRange->link_allSegmentRanges.next; + } + // rewrite registers + imlSegment->imlList[i].RewriteGPR(virtId2PhysReg); + // deactivate ranges which end during this instruction + auto it = activeRanges.begin(); + while (it != activeRanges.end()) + { + if ((*it)->interval.end <= currentEdge) + { + virtId2PhysReg.erase((*it)->GetVirtualRegister()); + it = activeRanges.erase(it); + } + else + ++it; + } + } +} + +void IMLRA_GenerateSegmentMoveInstructions2(IMLRegisterAllocatorContext& ctx, IMLSegment* imlSegment) +{ + IMLRA_RewriteRegisters(ctx, imlSegment); + +#if DEBUG_RA_INSTRUCTION_GEN + cemuLog_log(LogType::Force, ""); + cemuLog_log(LogType::Force, "[Seg before RA]"); + IMLDebug_DumpSegment(nullptr, imlSegment, true); +#endif + + bool hadSuffixInstruction = imlSegment->HasSuffixInstruction(); + + std::vector rebuiltInstructions; + sint32 numInstructionsWithoutSuffix = (sint32)imlSegment->imlList.size() - (imlSegment->HasSuffixInstruction() ? 1 : 0); + + if (imlSegment->imlList.empty()) + { + // empty segments need special handling (todo - look into merging this with the core logic below eventually) + // store all ranges + raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; + while (currentRange) + { + if (currentRange->hasStore) + rebuiltInstructions.emplace_back().make_name_r(currentRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister())); + currentRange = currentRange->link_allSegmentRanges.next; + } + // load ranges + currentRange = imlSegment->raInfo.linkedList_allSubranges; + while (currentRange) + { + if (!currentRange->_noLoad) + { + cemu_assert_debug(currentRange->interval.ExtendsIntoNextSegment()); + rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName()); + } + currentRange = currentRange->link_allSegmentRanges.next; + } + imlSegment->imlList = std::move(rebuiltInstructions); + return; + } + + // make sure that no range exceeds the suffix instruction input edge except if they need to be loaded for the next segment (todo - for those, set the start point accordingly?) + { + raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; + raInstructionEdge edge; + if (imlSegment->HasSuffixInstruction()) + edge.Set(numInstructionsWithoutSuffix, true); + else + edge.Set(numInstructionsWithoutSuffix - 1, false); + + while (currentRange) + { + if (!currentRange->interval.IsNextSegmentOnly() && currentRange->interval.end > edge) + { + currentRange->interval.SetEnd(edge); + } + currentRange = currentRange->link_allSegmentRanges.next; + } + } + +#if DEBUG_RA_INSTRUCTION_GEN + cemuLog_log(LogType::Force, ""); + cemuLog_log(LogType::Force, "--- Intermediate liveness info ---"); + { + raLivenessRange* dbgRange = imlSegment->raInfo.linkedList_allSubranges; + while (dbgRange) + { + cemuLog_log(LogType::Force, "Range i{}: {}-{}", dbgRange->GetVirtualRegister(), dbgRange->interval2.start.GetDebugString(), dbgRange->interval2.end.GetDebugString()); + dbgRange = dbgRange->link_allSegmentRanges.next; + } + } +#endif + + boost::container::small_vector activeRanges; + // first we add all the ranges that extend from the previous segment, some of these will end immediately at the first instruction so we might need to store them early + raLivenessRange* currentRange = imlSegment->raInfo.linkedList_allSubranges; + // make all ranges active that start on RA_INTER_RANGE_START + while (currentRange && currentRange->interval.start.ConnectsToPreviousSegment()) + { + activeRanges.push_back(currentRange); + currentRange = currentRange->link_allSegmentRanges.next; + } + // store all ranges that end before the first output edge (includes RA_INTER_RANGE_START) + auto it = activeRanges.begin(); + raInstructionEdge firstOutputEdge; + firstOutputEdge.Set(0, false); + while (it != activeRanges.end()) + { + if ((*it)->interval.end < firstOutputEdge) + { + raLivenessRange* storedRange = *it; + if (storedRange->hasStore) + rebuiltInstructions.emplace_back().make_name_r(storedRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[storedRange->GetVirtualRegister()], storedRange->GetPhysicalRegister())); + it = activeRanges.erase(it); + continue; + } + ++it; + } + + sint32 numInstructions = (sint32)imlSegment->imlList.size(); + for (sint32 i = 0; i < numInstructions; i++) + { + raInstructionEdge curEdge; + // input edge + curEdge.SetRaw(i * 2 + 1); // +1 to include ranges that start at the output of the instruction + while (currentRange && currentRange->interval.start <= curEdge) + { + if (!currentRange->_noLoad) + { + rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName()); + } + activeRanges.push_back(currentRange); + currentRange = currentRange->link_allSegmentRanges.next; + } + // copy instruction + rebuiltInstructions.push_back(imlSegment->imlList[i]); + // output edge + curEdge.SetRaw(i * 2 + 1 + 1); + // also store ranges that end on the next input edge, we handle this by adding an extra 1 above + auto it = activeRanges.begin(); + while (it != activeRanges.end()) + { + if ((*it)->interval.end <= curEdge) + { + // range expires + // todo - check hasStore + raLivenessRange* storedRange = *it; + if (storedRange->hasStore) + { + cemu_assert_debug(i != numInstructionsWithoutSuffix); // not allowed to emit after suffix + rebuiltInstructions.emplace_back().make_name_r(storedRange->GetName(), _MakeNativeReg(ctx.regIdToBaseFormat[storedRange->GetVirtualRegister()], storedRange->GetPhysicalRegister())); + } + it = activeRanges.erase(it); + continue; + } + ++it; + } + } + // if there is no suffix instruction we currently need to handle the final loads here + cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction()); + if (imlSegment->HasSuffixInstruction()) + { + if (currentRange) + { + cemuLog_logDebug(LogType::Force, "[DEBUG] GenerateSegmentMoveInstructions() hit suffix path with non-null currentRange. Segment: {:08x}", imlSegment->ppcAddress); + } + for (auto& remainingRange : activeRanges) + { + cemu_assert_debug(!remainingRange->hasStore); + } + } + else + { + for (auto& remainingRange : activeRanges) + { + cemu_assert_debug(!remainingRange->hasStore); // this range still needs to be stored + } + while (currentRange) + { + cemu_assert_debug(currentRange->interval.IsNextSegmentOnly()); + cemu_assert_debug(!currentRange->_noLoad); + rebuiltInstructions.emplace_back().make_r_name(_MakeNativeReg(ctx.regIdToBaseFormat[currentRange->GetVirtualRegister()], currentRange->GetPhysicalRegister()), currentRange->GetName()); + currentRange = currentRange->link_allSegmentRanges.next; + } + } + + imlSegment->imlList = std::move(rebuiltInstructions); + cemu_assert_debug(hadSuffixInstruction == imlSegment->HasSuffixInstruction()); + +#if DEBUG_RA_INSTRUCTION_GEN + cemuLog_log(LogType::Force, ""); + cemuLog_log(LogType::Force, "[Seg after RA]"); + IMLDebug_DumpSegment(nullptr, imlSegment, false); +#endif +} + +void IMLRA_GenerateMoveInstructions(IMLRegisterAllocatorContext& ctx) +{ + for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) + { + IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s]; + IMLRA_GenerateSegmentMoveInstructions2(ctx, imlSegment); + } +} + +static void DbgVerifyFixedRegRequirements(IMLSegment* imlSegment) +{ +#if DEBUG_RA_EXTRA_VALIDATION + std::vector frr = IMLRA_BuildSegmentInstructionFixedRegList(imlSegment); + for(auto& fixedReq : frr) + { + for (raLivenessRange* range = imlSegment->raInfo.linkedList_allSubranges; range; range = range->link_allSegmentRanges.next) + { + if (!range->interval2.ContainsEdge(fixedReq.pos)) + continue; + // verify if the requirement is compatible + if(range->GetVirtualRegister() == fixedReq.regId) + { + cemu_assert(range->HasPhysicalRegister()); + cemu_assert(fixedReq.allowedReg.IsAvailable(range->GetPhysicalRegister())); // virtual register matches, but not assigned the right physical register + } + else + { + cemu_assert(!fixedReq.allowedReg.IsAvailable(range->GetPhysicalRegister())); // virtual register does not match, but using the reserved physical register + } + } + } +#endif +} + +static void DbgVerifyAllRanges(IMLRegisterAllocatorContext& ctx) +{ +#if DEBUG_RA_EXTRA_VALIDATION + for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) + { + IMLSegment* imlSegment = ctx.deprGenContext->segmentList2[s]; + raLivenessRange* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; + while (subrangeItr) + { + PPCRecRA_debugValidateSubrange(subrangeItr); + subrangeItr = subrangeItr->link_allSegmentRanges.next; + } + } + // check that no range validates register requirements + for (size_t s = 0; s < ctx.deprGenContext->segmentList2.size(); s++) + { + DbgVerifyFixedRegRequirements(ctx.deprGenContext->segmentList2[s]); + } +#endif +} + +void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam) +{ + IMLRegisterAllocatorContext ctx; + ctx.raParam = &raParam; + ctx.deprGenContext = ppcImlGenContext; + + IMLRA_ReshapeForRegisterAllocation(ppcImlGenContext); + ppcImlGenContext->UpdateSegmentIndices(); // update momentaryIndex of each segment + ctx.perSegmentAbstractRanges.resize(ppcImlGenContext->segmentList2.size()); + IMLRA_CalculateLivenessRanges(ctx); + IMLRA_ProcessFlowAndCalculateLivenessRanges(ctx); + IMLRA_AssignRegisters(ctx, ppcImlGenContext); + DbgVerifyAllRanges(ctx); + IMLRA_AnalyzeRangeDataFlow(ppcImlGenContext); + IMLRA_GenerateMoveInstructions(ctx); + + IMLRA_DeleteAllRanges(ppcImlGenContext); +} diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h new file mode 100644 index 00000000..0a54e4cb --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocator.h @@ -0,0 +1,125 @@ +#pragma once + +// container for storing a set of register indices +// specifically optimized towards storing typical range of physical register indices (expected to be below 64) +class IMLPhysRegisterSet +{ +public: + void SetAvailable(uint32 index) + { + cemu_assert_debug(index < 64); + m_regBitmask |= ((uint64)1 << index); + } + + void SetReserved(uint32 index) + { + cemu_assert_debug(index < 64); + m_regBitmask &= ~((uint64)1 << index); + } + + void SetAllAvailable() + { + m_regBitmask = ~0ull; + } + + bool HasAllAvailable() const + { + return m_regBitmask == ~0ull; + } + + bool IsAvailable(uint32 index) const + { + return (m_regBitmask & ((uint64)1 << index)) != 0; + } + + IMLPhysRegisterSet& operator&=(const IMLPhysRegisterSet& other) + { + this->m_regBitmask &= other.m_regBitmask; + return *this; + } + + IMLPhysRegisterSet& operator=(const IMLPhysRegisterSet& other) + { + this->m_regBitmask = other.m_regBitmask; + return *this; + } + + void RemoveRegisters(const IMLPhysRegisterSet& other) + { + this->m_regBitmask &= ~other.m_regBitmask; + } + + bool HasAnyAvailable() const + { + return m_regBitmask != 0; + } + + bool HasExactlyOneAvailable() const + { + return m_regBitmask != 0 && (m_regBitmask & (m_regBitmask - 1)) == 0; + } + + // returns index of first available register. Do not call when HasAnyAvailable() == false + IMLPhysReg GetFirstAvailableReg() + { + cemu_assert_debug(m_regBitmask != 0); + sint32 regIndex = 0; + auto tmp = m_regBitmask; + while ((tmp & 0xFF) == 0) + { + regIndex += 8; + tmp >>= 8; + } + while ((tmp & 0x1) == 0) + { + regIndex++; + tmp >>= 1; + } + return regIndex; + } + + // returns index of next available register (search includes any register index >= startIndex) + // returns -1 if there is no more register + IMLPhysReg GetNextAvailableReg(sint32 startIndex) const + { + if (startIndex >= 64) + return -1; + uint32 regIndex = startIndex; + auto tmp = m_regBitmask; + tmp >>= regIndex; + if (!tmp) + return -1; + while ((tmp & 0xFF) == 0) + { + regIndex += 8; + tmp >>= 8; + } + while ((tmp & 0x1) == 0) + { + regIndex++; + tmp >>= 1; + } + return regIndex; + } + + sint32 CountAvailableRegs() const + { + return std::popcount(m_regBitmask); + } + +private: + uint64 m_regBitmask{ 0 }; +}; + +struct IMLRegisterAllocatorParameters +{ + inline IMLPhysRegisterSet& GetPhysRegPool(IMLRegFormat regFormat) + { + return perTypePhysPool[stdx::to_underlying(regFormat)]; + } + + IMLPhysRegisterSet perTypePhysPool[stdx::to_underlying(IMLRegFormat::TYPE_COUNT)]; + std::unordered_map regIdToName; +}; + +void IMLRegisterAllocator_AllocateRegisters(ppcImlGenContext_t* ppcImlGenContext, IMLRegisterAllocatorParameters& raParam); \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp new file mode 100644 index 00000000..583d5905 --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.cpp @@ -0,0 +1,635 @@ +#include "../PPCRecompiler.h" +#include "../PPCRecompilerIml.h" +#include "IMLRegisterAllocatorRanges.h" +#include "util/helpers/MemoryPool.h" + +uint32 IMLRA_GetNextIterationIndex(); + +IMLRegID raLivenessRange::GetVirtualRegister() const +{ + return virtualRegister; +} + +sint32 raLivenessRange::GetPhysicalRegister() const +{ + return physicalRegister; +} + +IMLName raLivenessRange::GetName() const +{ + return name; +} + +void raLivenessRange::SetPhysicalRegister(IMLPhysReg physicalRegister) +{ + this->physicalRegister = physicalRegister; +} + +void raLivenessRange::SetPhysicalRegisterForCluster(IMLPhysReg physicalRegister) +{ + auto clusterRanges = GetAllSubrangesInCluster(); + for(auto& range : clusterRanges) + range->physicalRegister = physicalRegister; +} + +boost::container::small_vector raLivenessRange::GetAllSubrangesInCluster() +{ + uint32 iterationIndex = IMLRA_GetNextIterationIndex(); + boost::container::small_vector subranges; + subranges.push_back(this); + this->lastIterationIndex = iterationIndex; + size_t i = 0; + while(isubrangeBranchTaken && cur->subrangeBranchTaken->lastIterationIndex != iterationIndex) + { + cur->subrangeBranchTaken->lastIterationIndex = iterationIndex; + subranges.push_back(cur->subrangeBranchTaken); + } + if(cur->subrangeBranchNotTaken && cur->subrangeBranchNotTaken->lastIterationIndex != iterationIndex) + { + cur->subrangeBranchNotTaken->lastIterationIndex = iterationIndex; + subranges.push_back(cur->subrangeBranchNotTaken); + } + // check predecessors + for(auto& prev : cur->previousRanges) + { + if(prev->lastIterationIndex != iterationIndex) + { + prev->lastIterationIndex = iterationIndex; + subranges.push_back(prev); + } + } + } + return subranges; +} + +void raLivenessRange::GetAllowedRegistersExRecursive(raLivenessRange* range, uint32 iterationIndex, IMLPhysRegisterSet& allowedRegs) +{ + range->lastIterationIndex = iterationIndex; + for (auto& it : range->list_fixedRegRequirements) + allowedRegs &= it.allowedReg; + // check successors + if (range->subrangeBranchTaken && range->subrangeBranchTaken->lastIterationIndex != iterationIndex) + GetAllowedRegistersExRecursive(range->subrangeBranchTaken, iterationIndex, allowedRegs); + if (range->subrangeBranchNotTaken && range->subrangeBranchNotTaken->lastIterationIndex != iterationIndex) + GetAllowedRegistersExRecursive(range->subrangeBranchNotTaken, iterationIndex, allowedRegs); + // check predecessors + for (auto& prev : range->previousRanges) + { + if (prev->lastIterationIndex != iterationIndex) + GetAllowedRegistersExRecursive(prev, iterationIndex, allowedRegs); + } +}; + +bool raLivenessRange::GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters) +{ + uint32 iterationIndex = IMLRA_GetNextIterationIndex(); + allowedRegisters.SetAllAvailable(); + GetAllowedRegistersExRecursive(this, iterationIndex, allowedRegisters); + return !allowedRegisters.HasAllAvailable(); +} + +IMLPhysRegisterSet raLivenessRange::GetAllowedRegisters(IMLPhysRegisterSet regPool) +{ + IMLPhysRegisterSet fixedRegRequirements = regPool; + if(interval.ExtendsPreviousSegment() || interval.ExtendsIntoNextSegment()) + { + auto clusterRanges = GetAllSubrangesInCluster(); + for(auto& subrange : clusterRanges) + { + for(auto& fixedRegLoc : subrange->list_fixedRegRequirements) + fixedRegRequirements &= fixedRegLoc.allowedReg; + } + return fixedRegRequirements; + } + for(auto& fixedRegLoc : list_fixedRegRequirements) + fixedRegRequirements &= fixedRegLoc.allowedReg; + return fixedRegRequirements; +} + +void PPCRecRARange_addLink_perVirtualGPR(std::unordered_map& root, raLivenessRange* subrange) +{ + IMLRegID regId = subrange->GetVirtualRegister(); + auto it = root.find(regId); + if (it == root.end()) + { + // new single element + root.try_emplace(regId, subrange); + subrange->link_sameVirtualRegister.prev = nullptr; + subrange->link_sameVirtualRegister.next = nullptr; + } + else + { + // insert in first position + raLivenessRange* priorFirst = it->second; + subrange->link_sameVirtualRegister.next = priorFirst; + it->second = subrange; + subrange->link_sameVirtualRegister.prev = nullptr; + priorFirst->link_sameVirtualRegister.prev = subrange; + } +} + +void PPCRecRARange_addLink_allSegmentRanges(raLivenessRange** root, raLivenessRange* subrange) +{ + subrange->link_allSegmentRanges.next = *root; + if (*root) + (*root)->link_allSegmentRanges.prev = subrange; + subrange->link_allSegmentRanges.prev = nullptr; + *root = subrange; +} + +void PPCRecRARange_removeLink_perVirtualGPR(std::unordered_map& root, raLivenessRange* subrange) +{ +#ifdef CEMU_DEBUG_ASSERT + raLivenessRange* cur = root.find(subrange->GetVirtualRegister())->second; + bool hasRangeFound = false; + while(cur) + { + if(cur == subrange) + { + hasRangeFound = true; + break; + } + cur = cur->link_sameVirtualRegister.next; + } + cemu_assert_debug(hasRangeFound); +#endif + IMLRegID regId = subrange->GetVirtualRegister(); + raLivenessRange* nextRange = subrange->link_sameVirtualRegister.next; + raLivenessRange* prevRange = subrange->link_sameVirtualRegister.prev; + raLivenessRange* newBase = prevRange ? prevRange : nextRange; + if (prevRange) + prevRange->link_sameVirtualRegister.next = subrange->link_sameVirtualRegister.next; + if (nextRange) + nextRange->link_sameVirtualRegister.prev = subrange->link_sameVirtualRegister.prev; + + if (!prevRange) + { + if (nextRange) + { + root.find(regId)->second = nextRange; + } + else + { + cemu_assert_debug(root.find(regId)->second == subrange); + root.erase(regId); + } + } +#ifdef CEMU_DEBUG_ASSERT + subrange->link_sameVirtualRegister.prev = (raLivenessRange*)1; + subrange->link_sameVirtualRegister.next = (raLivenessRange*)1; +#endif +} + +void PPCRecRARange_removeLink_allSegmentRanges(raLivenessRange** root, raLivenessRange* subrange) +{ + raLivenessRange* tempPrev = subrange->link_allSegmentRanges.prev; + if (subrange->link_allSegmentRanges.prev) + subrange->link_allSegmentRanges.prev->link_allSegmentRanges.next = subrange->link_allSegmentRanges.next; + else + (*root) = subrange->link_allSegmentRanges.next; + if (subrange->link_allSegmentRanges.next) + subrange->link_allSegmentRanges.next->link_allSegmentRanges.prev = tempPrev; +#ifdef CEMU_DEBUG_ASSERT + subrange->link_allSegmentRanges.prev = (raLivenessRange*)1; + subrange->link_allSegmentRanges.next = (raLivenessRange*)1; +#endif +} + +MemoryPoolPermanentObjects memPool_livenessSubrange(4096); + +// startPosition and endPosition are inclusive +raLivenessRange* IMLRA_CreateRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition) +{ + raLivenessRange* range = memPool_livenessSubrange.acquireObj(); + range->previousRanges.clear(); + range->list_accessLocations.clear(); + range->list_fixedRegRequirements.clear(); + range->imlSegment = imlSegment; + + cemu_assert_debug(startPosition <= endPosition); + range->interval.start = startPosition; + range->interval.end = endPosition; + + // register mapping + range->virtualRegister = virtualRegister; + range->name = name; + range->physicalRegister = -1; + // default values + range->hasStore = false; + range->hasStoreDelayed = false; + range->lastIterationIndex = 0; + range->subrangeBranchNotTaken = nullptr; + range->subrangeBranchTaken = nullptr; + cemu_assert_debug(range->previousRanges.empty()); + range->_noLoad = false; + // add to segment linked lists + PPCRecRARange_addLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, range); + PPCRecRARange_addLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, range); + return range; +} + +void _unlinkSubrange(raLivenessRange* range) +{ + IMLSegment* imlSegment = range->imlSegment; + PPCRecRARange_removeLink_perVirtualGPR(imlSegment->raInfo.linkedList_perVirtualRegister, range); + PPCRecRARange_removeLink_allSegmentRanges(&imlSegment->raInfo.linkedList_allSubranges, range); + // unlink reverse references + if(range->subrangeBranchTaken) + range->subrangeBranchTaken->previousRanges.erase(std::find(range->subrangeBranchTaken->previousRanges.begin(), range->subrangeBranchTaken->previousRanges.end(), range)); + if(range->subrangeBranchNotTaken) + range->subrangeBranchNotTaken->previousRanges.erase(std::find(range->subrangeBranchNotTaken->previousRanges.begin(), range->subrangeBranchNotTaken->previousRanges.end(), range)); + range->subrangeBranchTaken = (raLivenessRange*)(uintptr_t)-1; + range->subrangeBranchNotTaken = (raLivenessRange*)(uintptr_t)-1; + // remove forward references + for(auto& prev : range->previousRanges) + { + if(prev->subrangeBranchTaken == range) + prev->subrangeBranchTaken = nullptr; + if(prev->subrangeBranchNotTaken == range) + prev->subrangeBranchNotTaken = nullptr; + } + range->previousRanges.clear(); +} + +void IMLRA_DeleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* range) +{ + _unlinkSubrange(range); + range->list_accessLocations.clear(); + range->list_fixedRegRequirements.clear(); + memPool_livenessSubrange.releaseObj(range); +} + +void IMLRA_DeleteRangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* range) +{ + auto clusterRanges = range->GetAllSubrangesInCluster(); + for (auto& subrange : clusterRanges) + IMLRA_DeleteRange(ppcImlGenContext, subrange); +} + +void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext) +{ + for(auto& seg : ppcImlGenContext->segmentList2) + { + raLivenessRange* cur; + while(cur = seg->raInfo.linkedList_allSubranges) + IMLRA_DeleteRange(ppcImlGenContext, cur); + seg->raInfo.linkedList_allSubranges = nullptr; + seg->raInfo.linkedList_perVirtualRegister.clear(); + } +} + +void IMLRA_MergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange) +{ +#ifdef CEMU_DEBUG_ASSERT + PPCRecRA_debugValidateSubrange(subrange); + PPCRecRA_debugValidateSubrange(absorbedSubrange); + if (subrange->imlSegment != absorbedSubrange->imlSegment) + assert_dbg(); + cemu_assert_debug(subrange->interval.end == absorbedSubrange->interval.start); + + if (subrange->subrangeBranchTaken || subrange->subrangeBranchNotTaken) + assert_dbg(); + if (subrange == absorbedSubrange) + assert_dbg(); +#endif + // update references + subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken; + subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken; + absorbedSubrange->subrangeBranchTaken = nullptr; + absorbedSubrange->subrangeBranchNotTaken = nullptr; + if(subrange->subrangeBranchTaken) + *std::find(subrange->subrangeBranchTaken->previousRanges.begin(), subrange->subrangeBranchTaken->previousRanges.end(), absorbedSubrange) = subrange; + if(subrange->subrangeBranchNotTaken) + *std::find(subrange->subrangeBranchNotTaken->previousRanges.begin(), subrange->subrangeBranchNotTaken->previousRanges.end(), absorbedSubrange) = subrange; + + // merge usage locations + for (auto& accessLoc : absorbedSubrange->list_accessLocations) + subrange->list_accessLocations.push_back(accessLoc); + absorbedSubrange->list_accessLocations.clear(); + // merge fixed reg locations +#ifdef CEMU_DEBUG_ASSERT + if(!subrange->list_fixedRegRequirements.empty() && !absorbedSubrange->list_fixedRegRequirements.empty()) + { + cemu_assert_debug(subrange->list_fixedRegRequirements.back().pos < absorbedSubrange->list_fixedRegRequirements.front().pos); + } +#endif + for (auto& fixedReg : absorbedSubrange->list_fixedRegRequirements) + subrange->list_fixedRegRequirements.push_back(fixedReg); + absorbedSubrange->list_fixedRegRequirements.clear(); + + subrange->interval.end = absorbedSubrange->interval.end; + + PPCRecRA_debugValidateSubrange(subrange); + + IMLRA_DeleteRange(ppcImlGenContext, absorbedSubrange); +} + +// remove all inter-segment connections from the range cluster and split it into local ranges. Ranges are trimmed and if they have no access location they will be removed +void IMLRA_ExplodeRangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange) +{ + cemu_assert_debug(originRange->interval.ExtendsPreviousSegment() || originRange->interval.ExtendsIntoNextSegment()); // only call this on ranges that span multiple segments + auto clusterRanges = originRange->GetAllSubrangesInCluster(); + for (auto& subrange : clusterRanges) + { + if (subrange->list_accessLocations.empty()) + continue; + raInterval interval; + interval.SetInterval(subrange->list_accessLocations.front().pos, subrange->list_accessLocations.back().pos); + raLivenessRange* newSubrange = IMLRA_CreateRange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), interval.start, interval.end); + // copy locations and fixed reg indices + newSubrange->list_accessLocations = subrange->list_accessLocations; + newSubrange->list_fixedRegRequirements = subrange->list_fixedRegRequirements; + if(originRange->HasPhysicalRegister()) + { + cemu_assert_debug(subrange->list_fixedRegRequirements.empty()); // avoid unassigning a register from a range with a fixed register requirement + } + // validate + if(!newSubrange->list_accessLocations.empty()) + { + cemu_assert_debug(newSubrange->list_accessLocations.front().pos >= newSubrange->interval.start); + cemu_assert_debug(newSubrange->list_accessLocations.back().pos <= newSubrange->interval.end); + } + if(!newSubrange->list_fixedRegRequirements.empty()) + { + cemu_assert_debug(newSubrange->list_fixedRegRequirements.front().pos >= newSubrange->interval.start); // fixed register requirements outside of the actual access range probably means there is a mistake in GetInstructionFixedRegisters() + cemu_assert_debug(newSubrange->list_fixedRegRequirements.back().pos <= newSubrange->interval.end); + } + } + // delete the original range cluster + IMLRA_DeleteRangeCluster(ppcImlGenContext, originRange); +} + +#ifdef CEMU_DEBUG_ASSERT +void PPCRecRA_debugValidateSubrange(raLivenessRange* range) +{ + // validate subrange + if (range->subrangeBranchTaken && range->subrangeBranchTaken->imlSegment != range->imlSegment->nextSegmentBranchTaken) + assert_dbg(); + if (range->subrangeBranchNotTaken && range->subrangeBranchNotTaken->imlSegment != range->imlSegment->nextSegmentBranchNotTaken) + assert_dbg(); + + if(range->subrangeBranchTaken || range->subrangeBranchNotTaken) + { + cemu_assert_debug(range->interval.end.ConnectsToNextSegment()); + } + if(!range->previousRanges.empty()) + { + cemu_assert_debug(range->interval.start.ConnectsToPreviousSegment()); + } + // validate locations + if (!range->list_accessLocations.empty()) + { + cemu_assert_debug(range->list_accessLocations.front().pos >= range->interval.start); + cemu_assert_debug(range->list_accessLocations.back().pos <= range->interval.end); + } + // validate fixed reg requirements + if (!range->list_fixedRegRequirements.empty()) + { + cemu_assert_debug(range->list_fixedRegRequirements.front().pos >= range->interval.start); + cemu_assert_debug(range->list_fixedRegRequirements.back().pos <= range->interval.end); + for(sint32 i = 0; i < (sint32)range->list_fixedRegRequirements.size()-1; i++) + cemu_assert_debug(range->list_fixedRegRequirements[i].pos < range->list_fixedRegRequirements[i+1].pos); + } + +} +#else +void PPCRecRA_debugValidateSubrange(raLivenessRange* range) {} +#endif + +// trim start and end of range to match first and last read/write locations +// does not trim start/endpoints which extend into the next/previous segment +void IMLRA_TrimRangeToUse(raLivenessRange* range) +{ + if(range->list_accessLocations.empty()) + { + // special case where we trim ranges extending from other segments to a single instruction edge + cemu_assert_debug(!range->interval.start.IsInstructionIndex() || !range->interval.end.IsInstructionIndex()); + if(range->interval.start.IsInstructionIndex()) + range->interval.start = range->interval.end; + if(range->interval.end.IsInstructionIndex()) + range->interval.end = range->interval.start; + return; + } + // trim start and end + raInterval prevInterval = range->interval; + if(range->interval.start.IsInstructionIndex()) + range->interval.start = range->list_accessLocations.front().pos; + if(range->interval.end.IsInstructionIndex()) + range->interval.end = range->list_accessLocations.back().pos; + // extra checks +#ifdef CEMU_DEBUG_ASSERT + cemu_assert_debug(range->interval.start <= range->interval.end); + for(auto& loc : range->list_accessLocations) + { + cemu_assert_debug(range->interval.ContainsEdge(loc.pos)); + } + cemu_assert_debug(prevInterval.ContainsWholeInterval(range->interval)); +#endif +} + +// split range at the given position +// After the split there will be two ranges: +// head -> subrange is shortened to end at splitIndex (exclusive) +// tail -> a new subrange that ranges from splitIndex (inclusive) to the end of the original subrange +// if head has a physical register assigned it will not carry over to tail +// The return value is the tail range +// If trimToUsage is true, the end of the head subrange and the start of the tail subrange will be shrunk to fit the read/write locations within. If there are no locations then the range will be deleted +raLivenessRange* IMLRA_SplitRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToUsage) +{ + cemu_assert_debug(splitPosition.IsInstructionIndex()); + cemu_assert_debug(!subrange->interval.IsNextSegmentOnly() && !subrange->interval.IsPreviousSegmentOnly()); + cemu_assert_debug(subrange->interval.ContainsEdge(splitPosition)); + // determine new intervals + raInterval headInterval, tailInterval; + headInterval.SetInterval(subrange->interval.start, splitPosition-1); + tailInterval.SetInterval(splitPosition, subrange->interval.end); + cemu_assert_debug(headInterval.start <= headInterval.end); + cemu_assert_debug(tailInterval.start <= tailInterval.end); + // create tail + raLivenessRange* tailSubrange = IMLRA_CreateRange(ppcImlGenContext, subrange->imlSegment, subrange->GetVirtualRegister(), subrange->GetName(), tailInterval.start, tailInterval.end); + tailSubrange->SetPhysicalRegister(subrange->GetPhysicalRegister()); + // carry over branch targets and update reverse references + tailSubrange->subrangeBranchTaken = subrange->subrangeBranchTaken; + tailSubrange->subrangeBranchNotTaken = subrange->subrangeBranchNotTaken; + subrange->subrangeBranchTaken = nullptr; + subrange->subrangeBranchNotTaken = nullptr; + if(tailSubrange->subrangeBranchTaken) + *std::find(tailSubrange->subrangeBranchTaken->previousRanges.begin(), tailSubrange->subrangeBranchTaken->previousRanges.end(), subrange) = tailSubrange; + if(tailSubrange->subrangeBranchNotTaken) + *std::find(tailSubrange->subrangeBranchNotTaken->previousRanges.begin(), tailSubrange->subrangeBranchNotTaken->previousRanges.end(), subrange) = tailSubrange; + // we assume that list_locations is ordered by instruction index and contains no duplicate indices, so lets check that here just in case +#ifdef CEMU_DEBUG_ASSERT + if(subrange->list_accessLocations.size() > 1) + { + for(size_t i=0; ilist_accessLocations.size()-1; i++) + { + cemu_assert_debug(subrange->list_accessLocations[i].pos < subrange->list_accessLocations[i+1].pos); + } + } +#endif + // split locations + auto it = std::lower_bound( + subrange->list_accessLocations.begin(), subrange->list_accessLocations.end(), splitPosition, + [](const raAccessLocation& accessLoc, raInstructionEdge value) { return accessLoc.pos < value; } + ); + size_t originalCount = subrange->list_accessLocations.size(); + tailSubrange->list_accessLocations.insert(tailSubrange->list_accessLocations.end(), it, subrange->list_accessLocations.end()); + subrange->list_accessLocations.erase(it, subrange->list_accessLocations.end()); + cemu_assert_debug(subrange->list_accessLocations.empty() || subrange->list_accessLocations.back().pos < splitPosition); + cemu_assert_debug(tailSubrange->list_accessLocations.empty() || tailSubrange->list_accessLocations.front().pos >= splitPosition); + cemu_assert_debug(subrange->list_accessLocations.size() + tailSubrange->list_accessLocations.size() == originalCount); + // split fixed reg requirements + for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++) + { + raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i; + if (tailInterval.ContainsEdge(fixedReg->pos)) + { + tailSubrange->list_fixedRegRequirements.push_back(*fixedReg); + } + } + // remove tail fixed reg requirements from head + for (sint32 i = 0; i < subrange->list_fixedRegRequirements.size(); i++) + { + raFixedRegRequirement* fixedReg = subrange->list_fixedRegRequirements.data() + i; + if (!headInterval.ContainsEdge(fixedReg->pos)) + { + subrange->list_fixedRegRequirements.resize(i); + break; + } + } + // adjust intervals + subrange->interval = headInterval; + tailSubrange->interval = tailInterval; + // trim to hole + if(trimToUsage) + { + if(subrange->list_accessLocations.empty() && (subrange->interval.start.IsInstructionIndex() && subrange->interval.end.IsInstructionIndex())) + { + IMLRA_DeleteRange(ppcImlGenContext, subrange); + subrange = nullptr; + } + else + { + IMLRA_TrimRangeToUse(subrange); + } + if(tailSubrange->list_accessLocations.empty() && (tailSubrange->interval.start.IsInstructionIndex() && tailSubrange->interval.end.IsInstructionIndex())) + { + IMLRA_DeleteRange(ppcImlGenContext, tailSubrange); + tailSubrange = nullptr; + } + else + { + IMLRA_TrimRangeToUse(tailSubrange); + } + } + // validation + cemu_assert_debug(!subrange || subrange->interval.start <= subrange->interval.end); + cemu_assert_debug(!tailSubrange || tailSubrange->interval.start <= tailSubrange->interval.end); + cemu_assert_debug(!tailSubrange || tailSubrange->interval.start >= splitPosition); + if (!trimToUsage) + cemu_assert_debug(!tailSubrange || tailSubrange->interval.start == splitPosition); + + if(subrange) + PPCRecRA_debugValidateSubrange(subrange); + if(tailSubrange) + PPCRecRA_debugValidateSubrange(tailSubrange); + return tailSubrange; +} + +sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment) +{ + sint32 v = imlSegment->loopDepth + 1; + v *= 5; + return v*v; // 25, 100, 225, 400 +} + +// calculate additional cost of range that it would have after calling _ExplodeRange() on it +sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange) +{ + auto ranges = subrange->GetAllSubrangesInCluster(); + sint32 cost = 0;//-PPCRecRARange_estimateTotalCost(ranges); + for (auto& subrange : ranges) + { + if (subrange->list_accessLocations.empty()) + continue; // this range would be deleted and thus has no cost + sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment); + bool hasAdditionalLoad = subrange->interval.ExtendsPreviousSegment(); + bool hasAdditionalStore = subrange->interval.ExtendsIntoNextSegment(); + if(hasAdditionalLoad && subrange->list_accessLocations.front().IsWrite()) // if written before read then a load isn't necessary + { + cemu_assert_debug(!subrange->list_accessLocations.front().IsRead()); + cost += segmentLoadStoreCost; + } + if(hasAdditionalStore) + { + bool hasWrite = std::find_if(subrange->list_accessLocations.begin(), subrange->list_accessLocations.end(), [](const raAccessLocation& loc) { return loc.IsWrite(); }) != subrange->list_accessLocations.end(); + if(!hasWrite) // ranges which don't modify their value do not need to be stored + cost += segmentLoadStoreCost; + } + } + // todo - properly calculating all the data-flow dependency based costs is more complex so this currently is an approximation + return cost; +} + +sint32 IMLRA_CalculateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition) +{ + // validation +#ifdef CEMU_DEBUG_ASSERT + if (subrange->interval.ExtendsIntoNextSegment()) + assert_dbg(); +#endif + cemu_assert_debug(splitPosition.IsInstructionIndex()); + + sint32 cost = 0; + // find split position in location list + if (subrange->list_accessLocations.empty()) + return 0; + if (splitPosition <= subrange->list_accessLocations.front().pos) + return 0; + if (splitPosition > subrange->list_accessLocations.back().pos) + return 0; + + size_t firstTailLocationIndex = 0; + for (size_t i = 0; i < subrange->list_accessLocations.size(); i++) + { + if (subrange->list_accessLocations[i].pos >= splitPosition) + { + firstTailLocationIndex = i; + break; + } + } + std::span headLocations{subrange->list_accessLocations.data(), firstTailLocationIndex}; + std::span tailLocations{subrange->list_accessLocations.data() + firstTailLocationIndex, subrange->list_accessLocations.size() - firstTailLocationIndex}; + cemu_assert_debug(headLocations.empty() || headLocations.back().pos < splitPosition); + cemu_assert_debug(tailLocations.empty() || tailLocations.front().pos >= splitPosition); + + sint32 segmentLoadStoreCost = IMLRA_GetSegmentReadWriteCost(subrange->imlSegment); + + auto CalculateCostFromLocationRange = [segmentLoadStoreCost](std::span locations, bool trackLoadCost = true, bool trackStoreCost = true) -> sint32 + { + if(locations.empty()) + return 0; + sint32 cost = 0; + if(locations.front().IsRead() && trackLoadCost) + cost += segmentLoadStoreCost; // not overwritten, so there is a load cost + bool hasWrite = std::find_if(locations.begin(), locations.end(), [](const raAccessLocation& loc) { return loc.IsWrite(); }) != locations.end(); + if(hasWrite && trackStoreCost) + cost += segmentLoadStoreCost; // modified, so there is a store cost + return cost; + }; + + sint32 baseCost = CalculateCostFromLocationRange(subrange->list_accessLocations); + + bool tailOverwritesValue = !tailLocations.empty() && !tailLocations.front().IsRead() && tailLocations.front().IsWrite(); + + sint32 newCost = CalculateCostFromLocationRange(headLocations) + CalculateCostFromLocationRange(tailLocations, !tailOverwritesValue, true); + cemu_assert_debug(newCost >= baseCost); + cost = newCost - baseCost; + + return cost; +} \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h new file mode 100644 index 00000000..b0685cc5 --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLRegisterAllocatorRanges.h @@ -0,0 +1,364 @@ +#pragma once +#include "IMLRegisterAllocator.h" + +struct raLivenessSubrangeLink +{ + struct raLivenessRange* prev; + struct raLivenessRange* next; +}; + +struct raInstructionEdge +{ + friend struct raInterval; +public: + raInstructionEdge() + { + index = 0; + } + + raInstructionEdge(sint32 instructionIndex, bool isInputEdge) + { + Set(instructionIndex, isInputEdge); + } + + void Set(sint32 instructionIndex, bool isInputEdge) + { + if(instructionIndex == RA_INTER_RANGE_START || instructionIndex == RA_INTER_RANGE_END) + { + index = instructionIndex; + return; + } + index = instructionIndex * 2 + (isInputEdge ? 0 : 1); + cemu_assert_debug(index >= 0 && index < 0x100000*2); // make sure index value is sane + } + + void SetRaw(sint32 index) + { + this->index = index; + cemu_assert_debug(index == RA_INTER_RANGE_START || index == RA_INTER_RANGE_END || (index >= 0 && index < 0x100000*2)); // make sure index value is sane + } + + // sint32 GetRaw() + // { + // this->index = index; + // } + + std::string GetDebugString() + { + if(index == RA_INTER_RANGE_START) + return "RA_START"; + else if(index == RA_INTER_RANGE_END) + return "RA_END"; + std::string str = fmt::format("{}", GetInstructionIndex()); + if(IsOnInputEdge()) + str += "i"; + else if(IsOnOutputEdge()) + str += "o"; + return str; + } + + sint32 GetInstructionIndex() const + { + cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END); + return index >> 1; + } + + // returns instruction index or RA_INTER_RANGE_START/RA_INTER_RANGE_END + sint32 GetInstructionIndexEx() const + { + if(index == RA_INTER_RANGE_START || index == RA_INTER_RANGE_END) + return index; + return index >> 1; + } + + sint32 GetRaw() const + { + return index; + } + + bool IsOnInputEdge() const + { + cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END); + return (index&1) == 0; + } + + bool IsOnOutputEdge() const + { + cemu_assert_debug(index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END); + return (index&1) != 0; + } + + bool ConnectsToPreviousSegment() const + { + return index == RA_INTER_RANGE_START; + } + + bool ConnectsToNextSegment() const + { + return index == RA_INTER_RANGE_END; + } + + bool IsInstructionIndex() const + { + return index != RA_INTER_RANGE_START && index != RA_INTER_RANGE_END; + } + + // comparison operators + bool operator>(const raInstructionEdge& other) const + { + return index > other.index; + } + bool operator<(const raInstructionEdge& other) const + { + return index < other.index; + } + bool operator<=(const raInstructionEdge& other) const + { + return index <= other.index; + } + bool operator>=(const raInstructionEdge& other) const + { + return index >= other.index; + } + bool operator==(const raInstructionEdge& other) const + { + return index == other.index; + } + + raInstructionEdge operator+(sint32 offset) const + { + cemu_assert_debug(IsInstructionIndex()); + cemu_assert_debug(offset >= 0 && offset < RA_INTER_RANGE_END); + raInstructionEdge edge; + edge.index = index + offset; + return edge; + } + + raInstructionEdge operator-(sint32 offset) const + { + cemu_assert_debug(IsInstructionIndex()); + cemu_assert_debug(offset >= 0 && offset < RA_INTER_RANGE_END); + raInstructionEdge edge; + edge.index = index - offset; + return edge; + } + + raInstructionEdge& operator++() + { + cemu_assert_debug(IsInstructionIndex()); + index++; + return *this; + } + +private: + sint32 index; // can also be RA_INTER_RANGE_START or RA_INTER_RANGE_END, otherwise contains instruction index * 2 + +}; + +struct raAccessLocation +{ + raAccessLocation(raInstructionEdge pos) : pos(pos) {} + + bool IsRead() const + { + return pos.IsOnInputEdge(); + } + + bool IsWrite() const + { + return pos.IsOnOutputEdge(); + } + + raInstructionEdge pos; +}; + +struct raInterval +{ + raInterval() + { + + } + + raInterval(raInstructionEdge start, raInstructionEdge end) + { + SetInterval(start, end); + } + + // isStartOnInput = Input+Output edge on first instruction. If false then only output + // isEndOnOutput = Input+Output edge on last instruction. If false then only input + void SetInterval(sint32 start, bool isStartOnInput, sint32 end, bool isEndOnOutput) + { + this->start.Set(start, isStartOnInput); + this->end.Set(end, !isEndOnOutput); + } + + void SetInterval(raInstructionEdge start, raInstructionEdge end) + { + cemu_assert_debug(start <= end); + this->start = start; + this->end = end; + } + + void SetStart(const raInstructionEdge& edge) + { + start = edge; + } + + void SetEnd(const raInstructionEdge& edge) + { + end = edge; + } + + sint32 GetStartIndex() const + { + return start.GetInstructionIndex(); + } + + sint32 GetEndIndex() const + { + return end.GetInstructionIndex(); + } + + bool ExtendsPreviousSegment() const + { + return start.ConnectsToPreviousSegment(); + } + + bool ExtendsIntoNextSegment() const + { + return end.ConnectsToNextSegment(); + } + + bool IsNextSegmentOnly() const + { + return start.ConnectsToNextSegment() && end.ConnectsToNextSegment(); + } + + bool IsPreviousSegmentOnly() const + { + return start.ConnectsToPreviousSegment() && end.ConnectsToPreviousSegment(); + } + + // returns true if range is contained within a single segment + bool IsLocal() const + { + return start.GetRaw() > RA_INTER_RANGE_START && end.GetRaw() < RA_INTER_RANGE_END; + } + + bool ContainsInstructionIndex(sint32 instructionIndex) const + { + cemu_assert_debug(instructionIndex != RA_INTER_RANGE_START && instructionIndex != RA_INTER_RANGE_END); + return instructionIndex >= start.GetInstructionIndexEx() && instructionIndex <= end.GetInstructionIndexEx(); + } + + // similar to ContainsInstructionIndex, but allows RA_INTER_RANGE_START/END as input + bool ContainsInstructionIndexEx(sint32 instructionIndex) const + { + if(instructionIndex == RA_INTER_RANGE_START) + return start.ConnectsToPreviousSegment(); + if(instructionIndex == RA_INTER_RANGE_END) + return end.ConnectsToNextSegment(); + return instructionIndex >= start.GetInstructionIndexEx() && instructionIndex <= end.GetInstructionIndexEx(); + } + + bool ContainsEdge(const raInstructionEdge& edge) const + { + return edge >= start && edge <= end; + } + + bool ContainsWholeInterval(const raInterval& other) const + { + return other.start >= start && other.end <= end; + } + + bool IsOverlapping(const raInterval& other) const + { + return start <= other.end && end >= other.start; + } + + sint32 GetPreciseDistance() + { + cemu_assert_debug(!start.ConnectsToNextSegment()); // how to handle this? + if(start == end) + return 1; + cemu_assert_debug(!end.ConnectsToPreviousSegment() && !end.ConnectsToNextSegment()); + if(start.ConnectsToPreviousSegment()) + return end.GetRaw() + 1; + + return end.GetRaw() - start.GetRaw() + 1; // +1 because end is inclusive + } + +//private: not making these directly accessible only forces us to create loads of verbose getters and setters + raInstructionEdge start; + raInstructionEdge end; +}; + +struct raFixedRegRequirement +{ + raInstructionEdge pos; + IMLPhysRegisterSet allowedReg; +}; + +struct raLivenessRange +{ + IMLSegment* imlSegment; + raInterval interval; + + // dirty state tracking + bool _noLoad; + bool hasStore; + bool hasStoreDelayed; + // next + raLivenessRange* subrangeBranchTaken; + raLivenessRange* subrangeBranchNotTaken; + // reverse counterpart of BranchTaken/BranchNotTaken + boost::container::small_vector previousRanges; + // processing + uint32 lastIterationIndex; + // instruction read/write locations + std::vector list_accessLocations; + // ordered list of all raInstructionEdge indices which require a fixed register + std::vector list_fixedRegRequirements; + // linked list (subranges with same GPR virtual register) + raLivenessSubrangeLink link_sameVirtualRegister; + // linked list (all subranges for this segment) + raLivenessSubrangeLink link_allSegmentRanges; + // register info + IMLRegID virtualRegister; + IMLName name; + // register allocator result + IMLPhysReg physicalRegister; + + boost::container::small_vector GetAllSubrangesInCluster(); + bool GetAllowedRegistersEx(IMLPhysRegisterSet& allowedRegisters); // if the cluster has fixed register requirements in any instruction this returns the combined register mask. Otherwise returns false in which case allowedRegisters is left undefined + IMLPhysRegisterSet GetAllowedRegisters(IMLPhysRegisterSet regPool); // return regPool with fixed register requirements filtered out + + IMLRegID GetVirtualRegister() const; + sint32 GetPhysicalRegister() const; + bool HasPhysicalRegister() const { return physicalRegister >= 0; } + IMLName GetName() const; + void SetPhysicalRegister(IMLPhysReg physicalRegister); + void SetPhysicalRegisterForCluster(IMLPhysReg physicalRegister); + void UnsetPhysicalRegister() { physicalRegister = -1; } + + private: + void GetAllowedRegistersExRecursive(raLivenessRange* range, uint32 iterationIndex, IMLPhysRegisterSet& allowedRegs); +}; + +raLivenessRange* IMLRA_CreateRange(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* imlSegment, IMLRegID virtualRegister, IMLName name, raInstructionEdge startPosition, raInstructionEdge endPosition); +void IMLRA_DeleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange); +void IMLRA_DeleteAllRanges(ppcImlGenContext_t* ppcImlGenContext); + +void IMLRA_ExplodeRangeCluster(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* originRange); + +void IMLRA_MergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange* subrange, raLivenessRange* absorbedSubrange); + +raLivenessRange* IMLRA_SplitRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange*& subrange, raInstructionEdge splitPosition, bool trimToUsage = false); + +void PPCRecRA_debugValidateSubrange(raLivenessRange* subrange); + +// cost estimation +sint32 IMLRA_GetSegmentReadWriteCost(IMLSegment* imlSegment); +sint32 IMLRA_CalculateAdditionalCostOfRangeExplode(raLivenessRange* subrange); +//sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessRange* subrange, sint32 splitIndex); +sint32 IMLRA_CalculateAdditionalCostAfterSplit(raLivenessRange* subrange, raInstructionEdge splitPosition); \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp new file mode 100644 index 00000000..f3b6834f --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.cpp @@ -0,0 +1,133 @@ +#include "IMLInstruction.h" +#include "IMLSegment.h" + +void IMLSegment::SetEnterable(uint32 enterAddress) +{ + cemu_assert_debug(!isEnterable || enterPPCAddress == enterAddress); + isEnterable = true; + enterPPCAddress = enterAddress; +} + +bool IMLSegment::HasSuffixInstruction() const +{ + if (imlList.empty()) + return false; + const IMLInstruction& imlInstruction = imlList.back(); + return imlInstruction.IsSuffixInstruction(); +} + +sint32 IMLSegment::GetSuffixInstructionIndex() const +{ + cemu_assert_debug(HasSuffixInstruction()); + return (sint32)(imlList.size() - 1); +} + +IMLInstruction* IMLSegment::GetLastInstruction() +{ + if (imlList.empty()) + return nullptr; + return &imlList.back(); +} + +void IMLSegment::SetLinkBranchNotTaken(IMLSegment* imlSegmentDst) +{ + if (nextSegmentBranchNotTaken) + nextSegmentBranchNotTaken->list_prevSegments.erase(std::find(nextSegmentBranchNotTaken->list_prevSegments.begin(), nextSegmentBranchNotTaken->list_prevSegments.end(), this)); + nextSegmentBranchNotTaken = imlSegmentDst; + if(imlSegmentDst) + imlSegmentDst->list_prevSegments.push_back(this); +} + +void IMLSegment::SetLinkBranchTaken(IMLSegment* imlSegmentDst) +{ + if (nextSegmentBranchTaken) + nextSegmentBranchTaken->list_prevSegments.erase(std::find(nextSegmentBranchTaken->list_prevSegments.begin(), nextSegmentBranchTaken->list_prevSegments.end(), this)); + nextSegmentBranchTaken = imlSegmentDst; + if (imlSegmentDst) + imlSegmentDst->list_prevSegments.push_back(this); +} + +IMLInstruction* IMLSegment::AppendInstruction() +{ + IMLInstruction& inst = imlList.emplace_back(); + memset(&inst, 0, sizeof(IMLInstruction)); + return &inst; +} + +void IMLSegment_SetLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst) +{ + // make sure segments aren't already linked + if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst) + return; + // add as next segment for source + if (imlSegmentSrc->nextSegmentBranchNotTaken != nullptr) + assert_dbg(); + imlSegmentSrc->nextSegmentBranchNotTaken = imlSegmentDst; + // add as previous segment for destination + imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc); +} + +void IMLSegment_SetLinkBranchTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst) +{ + // make sure segments aren't already linked + if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst) + return; + // add as next segment for source + if (imlSegmentSrc->nextSegmentBranchTaken != nullptr) + assert_dbg(); + imlSegmentSrc->nextSegmentBranchTaken = imlSegmentDst; + // add as previous segment for destination + imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc); +} + +void IMLSegment_RemoveLink(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst) +{ + if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst) + { + imlSegmentSrc->nextSegmentBranchNotTaken = nullptr; + } + else if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst) + { + imlSegmentSrc->nextSegmentBranchTaken = nullptr; + } + else + assert_dbg(); + + bool matchFound = false; + for (sint32 i = 0; i < imlSegmentDst->list_prevSegments.size(); i++) + { + if (imlSegmentDst->list_prevSegments[i] == imlSegmentSrc) + { + imlSegmentDst->list_prevSegments.erase(imlSegmentDst->list_prevSegments.begin() + i); + matchFound = true; + break; + } + } + if (matchFound == false) + assert_dbg(); +} + +/* + * Replaces all links to segment orig with linkts to segment new + */ +void IMLSegment_RelinkInputSegment(IMLSegment* imlSegmentOrig, IMLSegment* imlSegmentNew) +{ + while (imlSegmentOrig->list_prevSegments.size() != 0) + { + IMLSegment* prevSegment = imlSegmentOrig->list_prevSegments[0]; + if (prevSegment->nextSegmentBranchNotTaken == imlSegmentOrig) + { + IMLSegment_RemoveLink(prevSegment, imlSegmentOrig); + IMLSegment_SetLinkBranchNotTaken(prevSegment, imlSegmentNew); + } + else if (prevSegment->nextSegmentBranchTaken == imlSegmentOrig) + { + IMLSegment_RemoveLink(prevSegment, imlSegmentOrig); + IMLSegment_SetLinkBranchTaken(prevSegment, imlSegmentNew); + } + else + { + assert_dbg(); + } + } +} diff --git a/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h new file mode 100644 index 00000000..10e3dc06 --- /dev/null +++ b/src/Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h @@ -0,0 +1,193 @@ +#pragma once +#include "IMLInstruction.h" + +#include + +// special values to mark the index of ranges that reach across the segment border +#define RA_INTER_RANGE_START (-1) +#define RA_INTER_RANGE_END (0x70000000) + +struct IMLSegmentPoint +{ + friend struct IMLSegmentInterval; + + sint32 index; + struct IMLSegment* imlSegment; // do we really need to track this? SegmentPoints are always accessed via the segment that they are part of + IMLSegmentPoint* next; + IMLSegmentPoint* prev; + + // the index is the instruction index times two. + // this gives us the ability to cover half an instruction with RA ranges + // covering only the first half of an instruction (0-0) means that the register is read, but not preserved + // covering first and the second half means the register is read and preserved + // covering only the second half means the register is written but not read + + sint32 GetInstructionIndex() const + { + return index; + } + + void SetInstructionIndex(sint32 index) + { + this->index = index; + } + + void ShiftIfAfter(sint32 instructionIndex, sint32 shiftCount) + { + if (!IsPreviousSegment() && !IsNextSegment()) + { + if (GetInstructionIndex() >= instructionIndex) + index += shiftCount; + } + } + + void DecrementByOneInstruction() + { + index--; + } + + // the segment point can point beyond the first and last instruction which indicates that it is an infinite range reaching up to the previous or next segment + bool IsPreviousSegment() const { return index == RA_INTER_RANGE_START; } + bool IsNextSegment() const { return index == RA_INTER_RANGE_END; } + + // overload operand > and < + bool operator>(const IMLSegmentPoint& other) const { return index > other.index; } + bool operator<(const IMLSegmentPoint& other) const { return index < other.index; } + bool operator==(const IMLSegmentPoint& other) const { return index == other.index; } + bool operator!=(const IMLSegmentPoint& other) const { return index != other.index; } + + // overload comparison operands for sint32 + bool operator>(const sint32 other) const { return index > other; } + bool operator<(const sint32 other) const { return index < other; } + bool operator<=(const sint32 other) const { return index <= other; } + bool operator>=(const sint32 other) const { return index >= other; } +}; + +struct IMLSegmentInterval +{ + IMLSegmentPoint start; + IMLSegmentPoint end; + + bool ContainsInstructionIndex(sint32 offset) const { return start <= offset && end > offset; } + + bool IsRangeOverlapping(const IMLSegmentInterval& other) + { + // todo - compare the raw index + sint32 r1start = this->start.GetInstructionIndex(); + sint32 r1end = this->end.GetInstructionIndex(); + sint32 r2start = other.start.GetInstructionIndex(); + sint32 r2end = other.end.GetInstructionIndex(); + if (r1start < r2end && r1end > r2start) + return true; + if (this->start.IsPreviousSegment() && r1start == r2start) + return true; + if (this->end.IsNextSegment() && r1end == r2end) + return true; + return false; + } + + bool ExtendsIntoPreviousSegment() const + { + return start.IsPreviousSegment(); + } + + bool ExtendsIntoNextSegment() const + { + return end.IsNextSegment(); + } + + bool IsNextSegmentOnly() const + { + if(!start.IsNextSegment()) + return false; + cemu_assert_debug(end.IsNextSegment()); + return true; + } + + bool IsPreviousSegmentOnly() const + { + if (!end.IsPreviousSegment()) + return false; + cemu_assert_debug(start.IsPreviousSegment()); + return true; + } + + sint32 GetDistance() const + { + // todo - assert if either start or end is outside the segment + // we may also want to switch this to raw indices? + return end.GetInstructionIndex() - start.GetInstructionIndex(); + } +}; + +struct PPCSegmentRegisterAllocatorInfo_t +{ + // used during loop detection + bool isPartOfProcessedLoop{}; + sint32 lastIterationIndex{}; + // linked lists + struct raLivenessRange* linkedList_allSubranges{}; + std::unordered_map linkedList_perVirtualRegister; +}; + +struct IMLSegment +{ + sint32 momentaryIndex{}; // index in segment list, generally not kept up to date except if needed (necessary for loop detection) + sint32 loopDepth{}; + uint32 ppcAddress{}; // ppc address (0xFFFFFFFF if not associated with an address) + uint32 x64Offset{}; // x64 code offset of segment start + // list of intermediate instructions in this segment + std::vector imlList; + // segment link + IMLSegment* nextSegmentBranchNotTaken{}; // this is also the default for segments where there is no branch + IMLSegment* nextSegmentBranchTaken{}; + bool nextSegmentIsUncertain{}; + std::vector list_prevSegments{}; + // source for overwrite analysis (if nextSegmentIsUncertain is true) + // sometimes a segment is marked as an exit point, but for the purposes of dead code elimination we know the next segment + IMLSegment* deadCodeEliminationHintSeg{}; + std::vector list_deadCodeHintBy{}; + // enterable segments + bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary) + uint32 enterPPCAddress{}; // used if isEnterable is true + // register allocator info + PPCSegmentRegisterAllocatorInfo_t raInfo{}; + // segment state API + void SetEnterable(uint32 enterAddress); + void SetLinkBranchNotTaken(IMLSegment* imlSegmentDst); + void SetLinkBranchTaken(IMLSegment* imlSegmentDst); + + IMLSegment* GetBranchTaken() + { + return nextSegmentBranchTaken; + } + + IMLSegment* GetBranchNotTaken() + { + return nextSegmentBranchNotTaken; + } + + void SetNextSegmentForOverwriteHints(IMLSegment* seg) + { + cemu_assert_debug(!deadCodeEliminationHintSeg); + deadCodeEliminationHintSeg = seg; + if (seg) + seg->list_deadCodeHintBy.push_back(this); + } + + // instruction API + IMLInstruction* AppendInstruction(); + + bool HasSuffixInstruction() const; + sint32 GetSuffixInstructionIndex() const; + IMLInstruction* GetLastInstruction(); + + // segment points + IMLSegmentPoint* segmentPointList{}; +}; + + +void IMLSegment_SetLinkBranchNotTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst); +void IMLSegment_SetLinkBranchTaken(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst); +void IMLSegment_RelinkInputSegment(IMLSegment* imlSegmentOrig, IMLSegment* imlSegmentNew); +void IMLSegment_RemoveLink(IMLSegment* imlSegmentSrc, IMLSegment* imlSegmentDst); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h b/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h index 3fc48a93..96b5143e 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCFunctionBoundaryTracker.h @@ -10,7 +10,7 @@ class PPCFunctionBoundaryTracker public: struct PPCRange_t { - PPCRange_t() {}; + PPCRange_t() = default; PPCRange_t(uint32 _startAddress) : startAddress(_startAddress) {}; uint32 startAddress{}; @@ -21,6 +21,16 @@ public: }; public: + ~PPCFunctionBoundaryTracker() + { + while (!map_ranges.empty()) + { + PPCRange_t* range = *map_ranges.begin(); + delete range; + map_ranges.erase(map_ranges.begin()); + } + } + void trackStartPoint(MPTR startAddress) { processRange(startAddress, nullptr, nullptr); @@ -40,10 +50,34 @@ public: return false; } + std::vector GetRanges() + { + std::vector r; + for (auto& it : map_ranges) + r.emplace_back(*it); + return r; + } + + bool ContainsAddress(uint32 addr) const + { + for (auto& it : map_ranges) + { + if (addr >= it->startAddress && addr < it->getEndAddress()) + return true; + } + return false; + } + + const std::set& GetBranchTargets() const + { + return map_branchTargetsAll; + } + private: void addBranchDestination(PPCRange_t* sourceRange, MPTR address) { - map_branchTargets.emplace(address); + map_queuedBranchTargets.emplace(address); + map_branchTargetsAll.emplace(address); } // process flow of instruction @@ -114,7 +148,7 @@ private: Espresso::BOField BO; uint32 BI; bool LK; - Espresso::decodeOp_BCLR(opcode, BO, BI, LK); + Espresso::decodeOp_BCSPR(opcode, BO, BI, LK); if (BO.branchAlways() && !LK) { // unconditional BLR @@ -153,7 +187,7 @@ private: void checkForCollisions() { -#ifndef PUBLIC_RELEASE +#ifdef CEMU_DEBUG_ASSERT uint32 endOfPrevious = 0; for (auto itr : map_ranges) { @@ -218,7 +252,7 @@ private: auto rangeItr = map_ranges.begin(); PPCRange_t* previousRange = nullptr; - for (std::set::const_iterator targetItr = map_branchTargets.begin() ; targetItr != map_branchTargets.end(); ) + for (std::set::const_iterator targetItr = map_queuedBranchTargets.begin() ; targetItr != map_queuedBranchTargets.end(); ) { while (rangeItr != map_ranges.end() && ((*rangeItr)->startAddress + (*rangeItr)->length) <= (*targetItr)) { @@ -239,7 +273,7 @@ private: (*targetItr) < ((*rangeItr)->startAddress + (*rangeItr)->length)) { // delete visited targets - targetItr = map_branchTargets.erase(targetItr); + targetItr = map_queuedBranchTargets.erase(targetItr); continue; } @@ -289,5 +323,6 @@ private: }; std::set map_ranges; - std::set map_branchTargets; + std::set map_queuedBranchTargets; + std::set map_branchTargetsAll; }; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp index 61aa11ca..6125c7da 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.cpp @@ -2,19 +2,28 @@ #include "PPCFunctionBoundaryTracker.h" #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" #include "Cafe/OS/RPL/rpl.h" #include "util/containers/RangeStore.h" #include "Cafe/OS/libs/coreinit/coreinit_CodeGen.h" #include "config/ActiveSettings.h" #include "config/LaunchSettings.h" - -#include "util/helpers/fspinlock.h" #include "Common/ExceptionHandler/ExceptionHandler.h" +#include "Common/cpu_features.h" +#include "util/helpers/fspinlock.h" #include "util/helpers/helpers.h" - #include "util/MemMapper/MemMapper.h" +#include "IML/IML.h" +#include "IML/IMLRegisterAllocator.h" +#include "BackendX64/BackendX64.h" +#ifdef __aarch64__ +#include "BackendAArch64/BackendAArch64.h" +#endif +#include "util/highresolutiontimer/HighResolutionTimer.h" + +#define PPCREC_FORCE_SYNCHRONOUS_COMPILATION 0 // if 1, then function recompilation will block and execute on the thread that called PPCRecompiler_visitAddressNoBlock +#define PPCREC_LOG_RECOMPILATION_RESULTS 0 + struct PPCInvalidationRange { MPTR startAddress; @@ -38,29 +47,54 @@ void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_unvisited)(); PPCRecompilerInstanceData_t* ppcRecompilerInstanceData; +#if PPCREC_FORCE_SYNCHRONOUS_COMPILATION +static std::mutex s_singleRecompilationMutex; +#endif + bool ppcRecompilerEnabled = false; +void PPCRecompiler_recompileAtAddress(uint32 address); + // this function does never block and can fail if the recompiler lock cannot be acquired immediately void PPCRecompiler_visitAddressNoBlock(uint32 enterAddress) { +#if PPCREC_FORCE_SYNCHRONOUS_COMPILATION + if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited) + return; + PPCRecompilerState.recompilerSpinlock.lock(); + if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited) + { + PPCRecompilerState.recompilerSpinlock.unlock(); + return; + } + ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] = PPCRecompiler_leaveRecompilerCode_visited; + PPCRecompilerState.recompilerSpinlock.unlock(); + s_singleRecompilationMutex.lock(); + if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] == PPCRecompiler_leaveRecompilerCode_visited) + { + PPCRecompiler_recompileAtAddress(enterAddress); + } + s_singleRecompilationMutex.unlock(); + return; +#endif // quick read-only check without lock if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] != PPCRecompiler_leaveRecompilerCode_unvisited) return; // try to acquire lock - if (!PPCRecompilerState.recompilerSpinlock.tryAcquire()) + if (!PPCRecompilerState.recompilerSpinlock.try_lock()) return; auto funcPtr = ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4]; if (funcPtr != PPCRecompiler_leaveRecompilerCode_unvisited) { // was visited since previous check - PPCRecompilerState.recompilerSpinlock.release(); + PPCRecompilerState.recompilerSpinlock.unlock(); return; } // add to recompilation queue and flag as visited PPCRecompilerState.targetQueue.emplace(enterAddress); ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[enterAddress / 4] = PPCRecompiler_leaveRecompilerCode_visited; - PPCRecompilerState.recompilerSpinlock.release(); + PPCRecompilerState.recompilerSpinlock.unlock(); } void PPCRecompiler_recompileIfUnvisited(uint32 enterAddress) @@ -78,12 +112,12 @@ void PPCRecompiler_enter(PPCInterpreter_t* hCPU, PPCREC_JUMP_ENTRY funcPtr) PPCRecompiler_enterRecompilerCode((uint64)funcPtr, (uint64)hCPU); _controlfp(prevState, _MCW_RC); // debug recompiler exit - useful to find frequently executed functions which couldn't be recompiled - #ifndef PUBLIC_RELEASE + #ifdef CEMU_DEBUG_ASSERT if (hCPU->remainingCycles > 0 && GetAsyncKeyState(VK_F4)) { auto t = std::chrono::high_resolution_clock::now(); auto dur = std::chrono::duration_cast(t.time_since_epoch()).count(); - forceLog_printf("Recompiler exit: 0x%08x LR: 0x%08x Timestamp %lld.%04lld", hCPU->instructionPointer, hCPU->spr.LR, dur / 1000LL, (dur % 1000LL)); + cemuLog_log(LogType::Force, "Recompiler exit: 0x{:08x} LR: 0x{:08x} Timestamp {}.{:04}", hCPU->instructionPointer, hCPU->spr.LR, dur / 1000LL, (dur % 1000LL)); } #endif #else @@ -128,15 +162,15 @@ void PPCRecompiler_attemptEnter(PPCInterpreter_t* hCPU, uint32 enterAddress) PPCRecompiler_enter(hCPU, funcPtr); } } +bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext); -PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set& entryAddresses, std::vector>& entryPointsOut) +PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PPCRange_t range, std::set& entryAddresses, std::vector>& entryPointsOut, PPCFunctionBoundaryTracker& boundaryTracker) { if (range.startAddress >= PPC_REC_CODE_AREA_END) { - cemuLog_force("Attempting to recompile function outside of allowed code area"); + cemuLog_log(LogType::Force, "Attempting to recompile function outside of allowed code area"); return nullptr; } - uint32 codeGenRangeStart; uint32 codeGenRangeSize = 0; coreinit::OSGetCodegenVirtAddrRangeInternal(codeGenRangeStart, codeGenRangeSize); @@ -154,29 +188,69 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP PPCRecFunction_t* ppcRecFunc = new PPCRecFunction_t(); ppcRecFunc->ppcAddress = range.startAddress; ppcRecFunc->ppcSize = range.length; + +#if PPCREC_LOG_RECOMPILATION_RESULTS + BenchmarkTimer bt; + bt.Start(); +#endif + // generate intermediate code ppcImlGenContext_t ppcImlGenContext = { 0 }; - bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses); + ppcImlGenContext.debug_entryPPCAddress = range.startAddress; + bool compiledSuccessfully = PPCRecompiler_generateIntermediateCode(ppcImlGenContext, ppcRecFunc, entryAddresses, boundaryTracker); if (compiledSuccessfully == false) { - // todo: Free everything - PPCRecompiler_freeContext(&ppcImlGenContext); delete ppcRecFunc; - return NULL; + return nullptr; } + + uint32 ppcRecLowerAddr = LaunchSettings::GetPPCRecLowerAddr(); + uint32 ppcRecUpperAddr = LaunchSettings::GetPPCRecUpperAddr(); + + if (ppcRecLowerAddr != 0 && ppcRecUpperAddr != 0) + { + if (ppcRecFunc->ppcAddress < ppcRecLowerAddr || ppcRecFunc->ppcAddress > ppcRecUpperAddr) + { + delete ppcRecFunc; + return nullptr; + } + } + + // apply passes + if (!PPCRecompiler_ApplyIMLPasses(ppcImlGenContext)) + { + delete ppcRecFunc; + return nullptr; + } + +#if defined(ARCH_X86_64) // emit x64 code bool x64GenerationSuccess = PPCRecompiler_generateX64Code(ppcRecFunc, &ppcImlGenContext); if (x64GenerationSuccess == false) { - PPCRecompiler_freeContext(&ppcImlGenContext); return nullptr; } +#elif defined(__aarch64__) + bool aarch64GenerationSuccess = PPCRecompiler_generateAArch64Code(ppcRecFunc, &ppcImlGenContext); + if (aarch64GenerationSuccess == false) + { + return nullptr; + } +#endif + if (ActiveSettings::DumpRecompilerFunctionsEnabled()) + { + FileStream* fs = FileStream::createFile2(ActiveSettings::GetUserDataPath(fmt::format("dump/recompiler/ppc_{:08x}.bin", ppcRecFunc->ppcAddress))); + if (fs) + { + fs->writeData(ppcRecFunc->x86Code, ppcRecFunc->x86Size); + delete fs; + } + } // collect list of PPC-->x64 entry points entryPointsOut.clear(); - for (sint32 s = 0; s < ppcImlGenContext.segmentListCount; s++) + for(IMLSegment* imlSegment : ppcImlGenContext.segmentList2) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s]; if (imlSegment->isEnterable == false) continue; @@ -186,24 +260,108 @@ PPCRecFunction_t* PPCRecompiler_recompileFunction(PPCFunctionBoundaryTracker::PP entryPointsOut.emplace_back(ppcEnterOffset, x64Offset); } - PPCRecompiler_freeContext(&ppcImlGenContext); +#if PPCREC_LOG_RECOMPILATION_RESULTS + bt.Stop(); + uint32 codeHash = 0; + for (uint32 i = 0; i < ppcRecFunc->x86Size; i++) + { + codeHash = _rotr(codeHash, 3); + codeHash += ((uint8*)ppcRecFunc->x86Code)[i]; + } + cemuLog_log(LogType::Force, "[Recompiler] PPC 0x{:08x} -> x64: 0x{:x} Took {:.4}ms | Size {:04x} CodeHash {:08x}", (uint32)ppcRecFunc->ppcAddress, (uint64)(uintptr_t)ppcRecFunc->x86Code, bt.GetElapsedMilliseconds(), ppcRecFunc->x86Size, codeHash); +#endif + return ppcRecFunc; } +void PPCRecompiler_NativeRegisterAllocatorPass(ppcImlGenContext_t& ppcImlGenContext) +{ + IMLRegisterAllocatorParameters raParam; + + for (auto& it : ppcImlGenContext.mappedRegs) + raParam.regIdToName.try_emplace(it.second.GetRegID(), it.first); + +#if defined(ARCH_X86_64) + auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RAX); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDX); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RBX); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RBP); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RSI); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RDI); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R8); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R9); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R10); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R11); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_R12); + gprPhysPool.SetAvailable(IMLArchX86::PHYSREG_GPR_BASE + X86_REG_RCX); + + // add XMM registers, except XMM15 which is the temporary register + auto& fprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::F64); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 0); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 1); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 2); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 3); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 4); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 5); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 6); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 7); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 8); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 9); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 10); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 11); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 12); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 13); + fprPhysPool.SetAvailable(IMLArchX86::PHYSREG_FPR_BASE + 14); +#elif defined(__aarch64__) + auto& gprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::I64); + for (auto i = IMLArchAArch64::PHYSREG_GPR_BASE; i < IMLArchAArch64::PHYSREG_GPR_BASE + IMLArchAArch64::PHYSREG_GPR_COUNT; i++) + { + if (i == IMLArchAArch64::PHYSREG_GPR_BASE + 18) + continue; // Skip reserved platform register + gprPhysPool.SetAvailable(i); + } + + auto& fprPhysPool = raParam.GetPhysRegPool(IMLRegFormat::F64); + for (auto i = IMLArchAArch64::PHYSREG_FPR_BASE; i < IMLArchAArch64::PHYSREG_FPR_BASE + IMLArchAArch64::PHYSREG_FPR_COUNT; i++) + fprPhysPool.SetAvailable(i); +#endif + + IMLRegisterAllocator_AllocateRegisters(&ppcImlGenContext, raParam); +} + +bool PPCRecompiler_ApplyIMLPasses(ppcImlGenContext_t& ppcImlGenContext) +{ + // isolate entry points from function flow (enterable segments must not be the target of any other segment) + // this simplifies logic during register allocation + PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext); + + // merge certain float load+store patterns + IMLOptimizer_OptimizeDirectFloatCopies(&ppcImlGenContext); + // delay byte swapping for certain load+store patterns + IMLOptimizer_OptimizeDirectIntegerCopies(&ppcImlGenContext); + + IMLOptimizer_StandardOptimizationPass(ppcImlGenContext); + + PPCRecompiler_NativeRegisterAllocatorPass(ppcImlGenContext); + + return true; +} + bool PPCRecompiler_makeRecompiledFunctionActive(uint32 initialEntryPoint, PPCFunctionBoundaryTracker::PPCRange_t& range, PPCRecFunction_t* ppcRecFunc, std::vector>& entryPoints) { // update jump table - PPCRecompilerState.recompilerSpinlock.acquire(); + PPCRecompilerState.recompilerSpinlock.lock(); // check if the initial entrypoint is still flagged for recompilation // its possible that the range has been invalidated during the time it took to translate the function if (ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[initialEntryPoint / 4] != PPCRecompiler_leaveRecompilerCode_visited) { - PPCRecompilerState.recompilerSpinlock.release(); + PPCRecompilerState.recompilerSpinlock.unlock(); return false; } - // check if the current range got invalidated in the time it took to recompile it + // check if the current range got invalidated during the time it took to recompile it bool isInvalidated = false; for (auto& invRange : PPCRecompilerState.invalidationRanges) { @@ -221,7 +379,7 @@ bool PPCRecompiler_makeRecompiledFunctionActive(uint32 initialEntryPoint, PPCFun PPCRecompilerState.invalidationRanges.clear(); if (isInvalidated) { - PPCRecompilerState.recompilerSpinlock.release(); + PPCRecompilerState.recompilerSpinlock.unlock(); return false; } @@ -249,7 +407,7 @@ bool PPCRecompiler_makeRecompiledFunctionActive(uint32 initialEntryPoint, PPCFun { r.storedRange = rangeStore_ppcRanges.storeRange(ppcRecFunc, r.ppcAddress, r.ppcAddress + r.ppcSize); } - PPCRecompilerState.recompilerSpinlock.release(); + PPCRecompilerState.recompilerSpinlock.unlock(); return true; @@ -272,16 +430,16 @@ void PPCRecompiler_recompileAtAddress(uint32 address) // todo - use info from previously compiled ranges to determine full size of this function (and merge all the entryAddresses) // collect all currently known entry points for this range - PPCRecompilerState.recompilerSpinlock.acquire(); + PPCRecompilerState.recompilerSpinlock.lock(); std::set entryAddresses; entryAddresses.emplace(address); - PPCRecompilerState.recompilerSpinlock.release(); + PPCRecompilerState.recompilerSpinlock.unlock(); std::vector> functionEntryPoints; - auto func = PPCRecompiler_recompileFunction(range, entryAddresses, functionEntryPoints); + auto func = PPCRecompiler_recompileFunction(range, entryAddresses, functionEntryPoints, funcBoundaries); if (!func) { @@ -290,11 +448,20 @@ void PPCRecompiler_recompileAtAddress(uint32 address) bool r = PPCRecompiler_makeRecompiledFunctionActive(address, range, func, functionEntryPoints); } +std::thread s_threadRecompiler; +std::atomic_bool s_recompilerThreadStopSignal{false}; + void PPCRecompiler_thread() { - SetThreadName("PPCRecompiler_thread"); + SetThreadName("PPCRecompiler"); +#if PPCREC_FORCE_SYNCHRONOUS_COMPILATION + return; +#endif + while (true) { + if(s_recompilerThreadStopSignal) + return; std::this_thread::sleep_for(std::chrono::milliseconds(10)); // asynchronous recompilation: // 1) take address from queue @@ -302,10 +469,10 @@ void PPCRecompiler_thread() // 3) if yes -> calculate size, gather all entry points, recompile and update jump table while (true) { - PPCRecompilerState.recompilerSpinlock.acquire(); + PPCRecompilerState.recompilerSpinlock.lock(); if (PPCRecompilerState.targetQueue.empty()) { - PPCRecompilerState.recompilerSpinlock.release(); + PPCRecompilerState.recompilerSpinlock.unlock(); break; } auto enterAddress = PPCRecompilerState.targetQueue.front(); @@ -315,19 +482,26 @@ void PPCRecompiler_thread() if (funcPtr != PPCRecompiler_leaveRecompilerCode_visited) { // only recompile functions if marked as visited - PPCRecompilerState.recompilerSpinlock.release(); + PPCRecompilerState.recompilerSpinlock.unlock(); continue; } - PPCRecompilerState.recompilerSpinlock.release(); + PPCRecompilerState.recompilerSpinlock.unlock(); PPCRecompiler_recompileAtAddress(enterAddress); + if(s_recompilerThreadStopSignal) + return; } } } #define PPC_REC_ALLOC_BLOCK_SIZE (4*1024*1024) // 4MB -std::bitset<(MEMORY_CODEAREA_ADDR + MEMORY_CODEAREA_SIZE) / PPC_REC_ALLOC_BLOCK_SIZE> ppcRecompiler_reservedBlockMask; +constexpr uint32 PPCRecompiler_GetNumAddressSpaceBlocks() +{ + return (MEMORY_CODEAREA_ADDR + MEMORY_CODEAREA_SIZE + PPC_REC_ALLOC_BLOCK_SIZE - 1) / PPC_REC_ALLOC_BLOCK_SIZE; +} + +std::bitset ppcRecompiler_reservedBlockMask; void PPCRecompiler_reserveLookupTableBlock(uint32 offset) { @@ -342,7 +516,7 @@ void PPCRecompiler_reserveLookupTableBlock(uint32 offset) void* p3 = MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[offset/4]), (PPC_REC_ALLOC_BLOCK_SIZE/4)*sizeof(void*), MemMapper::PAGE_PERMISSION::P_RW, true); if( !p1 || !p3 ) { - forceLog_printf("Failed to allocate memory for recompiler (0x%08x)", offset); + cemuLog_log(LogType::Force, "Failed to allocate memory for recompiler (0x{:08x})", offset); cemu_assert(false); return; } @@ -376,7 +550,7 @@ struct ppcRecompilerFuncRange_t bool PPCRecompiler_findFuncRanges(uint32 addr, ppcRecompilerFuncRange_t* rangesOut, size_t* countInOut) { - PPCRecompilerState.recompilerSpinlock.acquire(); + PPCRecompilerState.recompilerSpinlock.lock(); size_t countIn = *countInOut; size_t countOut = 0; @@ -392,7 +566,7 @@ bool PPCRecompiler_findFuncRanges(uint32 addr, ppcRecompilerFuncRange_t* rangesO countOut++; } ); - PPCRecompilerState.recompilerSpinlock.release(); + PPCRecompilerState.recompilerSpinlock.unlock(); *countInOut = countOut; if (countOut > countIn) return false; @@ -420,7 +594,7 @@ void PPCRecompiler_invalidateTableRange(uint32 offset, uint32 size) void PPCRecompiler_deleteFunction(PPCRecFunction_t* func) { // assumes PPCRecompilerState.recompilerSpinlock is already held - cemu_assert_debug(PPCRecompilerState.recompilerSpinlock.isHolding()); + cemu_assert_debug(PPCRecompilerState.recompilerSpinlock.is_locked()); for (auto& r : func->list_ranges) { PPCRecompiler_invalidateTableRange(r.ppcAddress, r.ppcSize); @@ -439,7 +613,7 @@ void PPCRecompiler_invalidateRange(uint32 startAddr, uint32 endAddr) return; cemu_assert_debug(endAddr >= startAddr); - PPCRecompilerState.recompilerSpinlock.acquire(); + PPCRecompilerState.recompilerSpinlock.lock(); uint32 rStart; uint32 rEnd; @@ -458,43 +632,12 @@ void PPCRecompiler_invalidateRange(uint32 startAddr, uint32 endAddr) PPCRecompiler_deleteFunction(rFunc); } - PPCRecompilerState.recompilerSpinlock.release(); + PPCRecompilerState.recompilerSpinlock.unlock(); } -void PPCRecompiler_init() +#if defined(ARCH_X86_64) +void PPCRecompiler_initPlatform() { - if (ActiveSettings::GetCPUMode() == CPUMode::SinglecoreInterpreter) - { - ppcRecompilerEnabled = false; - return; - } - if (LaunchSettings::ForceInterpreter()) - { - cemuLog_log(LogType::Force, "Recompiler disabled. Command line --force-interpreter was passed"); - return; - } - if (ppcRecompilerInstanceData) - { - MemMapper::FreeReservation(ppcRecompilerInstanceData, sizeof(PPCRecompilerInstanceData_t)); - ppcRecompilerInstanceData = nullptr; - } - debug_printf("Allocating %dMB for recompiler instance data...\n", (sint32)(sizeof(PPCRecompilerInstanceData_t) / 1024 / 1024)); - ppcRecompilerInstanceData = (PPCRecompilerInstanceData_t*)MemMapper::ReserveMemory(nullptr, sizeof(PPCRecompilerInstanceData_t), MemMapper::PAGE_PERMISSION::P_RW); - MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom), sizeof(PPCRecompilerInstanceData_t) - offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom), MemMapper::PAGE_PERMISSION::P_RW, true); - PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions(); - - uint32 codeRegionEnd = RPLLoader_GetMaxCodeOffset(); - codeRegionEnd = (codeRegionEnd + PPC_REC_ALLOC_BLOCK_SIZE - 1) & ~(PPC_REC_ALLOC_BLOCK_SIZE - 1); - - uint32 codeRegionSize = codeRegionEnd - PPC_REC_CODE_AREA_START; - forceLogDebug_printf("Allocating recompiler tables for range 0x%08x-0x%08x", PPC_REC_CODE_AREA_START, codeRegionEnd); - - for (uint32 i = 0; i < codeRegionSize; i += PPC_REC_ALLOC_BLOCK_SIZE) - { - PPCRecompiler_reserveLookupTableBlock(i); - } - - // init x64 recompiler instance data ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[0] = 1ULL << 63ULL; ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom[1] = 0ULL; ppcRecompilerInstanceData->_x64XMM_xorNegateMaskPair[0] = 1ULL << 63ULL; @@ -530,64 +673,80 @@ void PPCRecompiler_init() ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[2] = ~0x80000000; ppcRecompilerInstanceData->_x64XMM_flushDenormalMaskResetSignBits[3] = ~0x80000000; - // setup GQR scale tables - - for (uint32 i = 0; i < 32; i++) - { - float a = 1.0f / (float)(1u << i); - float b = 0; - if (i == 0) - b = 4294967296.0f; - else - b = (float)(1u << (32u - i)); - - float ar = (float)(1u << i); - float br = 0; - if (i == 0) - br = 1.0f / 4294967296.0f; - else - br = 1.0f / (float)(1u << (32u - i)); - - ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[i * 2 + 0] = a; - ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[i * 2 + 1] = 1.0f; - ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[(i + 32) * 2 + 0] = b; - ppcRecompilerInstanceData->_psq_ld_scale_ps0_1[(i + 32) * 2 + 1] = 1.0f; - - ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[i * 2 + 0] = a; - ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[i * 2 + 1] = a; - ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[(i + 32) * 2 + 0] = b; - ppcRecompilerInstanceData->_psq_ld_scale_ps0_ps1[(i + 32) * 2 + 1] = b; - - ppcRecompilerInstanceData->_psq_st_scale_ps0_1[i * 2 + 0] = ar; - ppcRecompilerInstanceData->_psq_st_scale_ps0_1[i * 2 + 1] = 1.0f; - ppcRecompilerInstanceData->_psq_st_scale_ps0_1[(i + 32) * 2 + 0] = br; - ppcRecompilerInstanceData->_psq_st_scale_ps0_1[(i + 32) * 2 + 1] = 1.0f; - - ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[i * 2 + 0] = ar; - ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[i * 2 + 1] = ar; - ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[(i + 32) * 2 + 0] = br; - ppcRecompilerInstanceData->_psq_st_scale_ps0_ps1[(i + 32) * 2 + 1] = br; - } - // mxcsr ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOn = 0x1F80 | 0x8000; ppcRecompilerInstanceData->_x64XMM_mxCsr_ftzOff = 0x1F80; +} +#else +void PPCRecompiler_initPlatform() +{ + +} +#endif - // query processor extensions - int cpuInfo[4]; - cpuid(cpuInfo, 0x80000001); - hasLZCNTSupport = ((cpuInfo[2] >> 5) & 1) != 0; - cpuid(cpuInfo, 0x1); - hasMOVBESupport = ((cpuInfo[2] >> 22) & 1) != 0; - hasAVXSupport = ((cpuInfo[2] >> 28) & 1) != 0; - cpuidex(cpuInfo, 0x7, 0); - hasBMI2Support = ((cpuInfo[1] >> 8) & 1) != 0; +void PPCRecompiler_init() +{ + if (ActiveSettings::GetCPUMode() == CPUMode::SinglecoreInterpreter) + { + ppcRecompilerEnabled = false; + return; + } + if (LaunchSettings::ForceInterpreter() || LaunchSettings::ForceMultiCoreInterpreter()) + { + cemuLog_log(LogType::Force, "Recompiler disabled. Command line --force-interpreter or force-multicore-interpreter was passed"); + return; + } + if (ppcRecompilerInstanceData) + { + MemMapper::FreeReservation(ppcRecompilerInstanceData, sizeof(PPCRecompilerInstanceData_t)); + ppcRecompilerInstanceData = nullptr; + } + debug_printf("Allocating %dMB for recompiler instance data...\n", (sint32)(sizeof(PPCRecompilerInstanceData_t) / 1024 / 1024)); + ppcRecompilerInstanceData = (PPCRecompilerInstanceData_t*)MemMapper::ReserveMemory(nullptr, sizeof(PPCRecompilerInstanceData_t), MemMapper::PAGE_PERMISSION::P_RW); + MemMapper::AllocateMemory(&(ppcRecompilerInstanceData->_x64XMM_xorNegateMaskBottom), sizeof(PPCRecompilerInstanceData_t) - offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom), MemMapper::PAGE_PERMISSION::P_RW, true); +#ifdef ARCH_X86_64 + PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions(); +#elif defined(__aarch64__) + PPCRecompilerAArch64Gen_generateRecompilerInterfaceFunctions(); +#endif + PPCRecompiler_allocateRange(0, 0x1000); // the first entry is used for fallback to interpreter + PPCRecompiler_allocateRange(mmuRange_TRAMPOLINE_AREA.getBase(), mmuRange_TRAMPOLINE_AREA.getSize()); + PPCRecompiler_allocateRange(mmuRange_CODECAVE.getBase(), mmuRange_CODECAVE.getSize()); - forceLog_printf("Recompiler initialized. CPU extensions: %s%s%s", hasLZCNTSupport ? "LZCNT " : "", hasMOVBESupport ? "MOVBE " : "", hasAVXSupport ? "AVX " : ""); + PPCRecompiler_initPlatform(); + + cemuLog_log(LogType::Force, "Recompiler initialized"); ppcRecompilerEnabled = true; // launch recompilation thread - std::thread t_recompiler(PPCRecompiler_thread); - t_recompiler.detach(); + s_recompilerThreadStopSignal = false; + s_threadRecompiler = std::thread(PPCRecompiler_thread); +} + +void PPCRecompiler_Shutdown() +{ + // shut down recompiler thread + s_recompilerThreadStopSignal = true; + if(s_threadRecompiler.joinable()) + s_threadRecompiler.join(); + // clean up queues + while(!PPCRecompilerState.targetQueue.empty()) + PPCRecompilerState.targetQueue.pop(); + PPCRecompilerState.invalidationRanges.clear(); + // clean range store + rangeStore_ppcRanges.clear(); + // clean up memory + uint32 numBlocks = PPCRecompiler_GetNumAddressSpaceBlocks(); + for(uint32 i=0; ippcRecompilerFuncTable[offset/4]), (PPC_REC_ALLOC_BLOCK_SIZE/4)*sizeof(void*), true); + MemMapper::FreeMemory(&(ppcRecompilerInstanceData->ppcRecompilerDirectJumpTable[offset/4]), (PPC_REC_ALLOC_BLOCK_SIZE/4)*sizeof(void*), true); + // mark as unmapped + ppcRecompiler_reservedBlockMask[i] = false; + } } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h index ee0454ce..47902630 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompiler.h @@ -1,4 +1,4 @@ -#include +#pragma once #define PPC_REC_CODE_AREA_START (0x00000000) // lower bound of executable memory area. Recompiler expects this address to be 0 #define PPC_REC_CODE_AREA_END (0x10000000) // upper bound of executable memory area @@ -6,336 +6,113 @@ #define PPC_REC_ALIGN_TO_4MB(__v) (((__v)+4*1024*1024-1)&~(4*1024*1024-1)) -#define PPC_REC_MAX_VIRTUAL_GPR (40) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2) +#define PPC_REC_MAX_VIRTUAL_GPR (40 + 32) // enough to store 32 GPRs + a few SPRs + temp registers (usually only 1-2) -typedef struct +struct ppcRecRange_t { uint32 ppcAddress; uint32 ppcSize; - //void* x86Start; - //size_t x86Size; void* storedRange; -}ppcRecRange_t; +}; -typedef struct +struct PPCRecFunction_t { uint32 ppcAddress; uint32 ppcSize; // ppc code size of function void* x86Code; // pointer to x86 code size_t x86Size; std::vector list_ranges; -}PPCRecFunction_t; - -#define PPCREC_IML_OP_FLAG_SIGNEXTEND (1<<0) -#define PPCREC_IML_OP_FLAG_SWITCHENDIAN (1<<1) -#define PPCREC_IML_OP_FLAG_NOT_EXPANDED (1<<2) // set single-precision load instructions to indicate that the value should not be rounded to double-precision -#define PPCREC_IML_OP_FLAG_UNUSED (1<<7) // used to mark instructions that are not used - -typedef struct -{ - uint8 type; - uint8 operation; - uint8 crRegister; // set to 0xFF if not set, not all IML instruction types support cr. - uint8 crMode; // only used when crRegister is valid, used to differentiate between various forms of condition flag set/clear behavior - uint32 crIgnoreMask; // bit set for every respective CR bit that doesn't need to be updated - uint32 associatedPPCAddress; // ppc address that is associated with this instruction - union - { - struct - { - uint8 _padding[7]; - }padding; - struct - { - // R (op) A [update cr* in mode *] - uint8 registerResult; - uint8 registerA; - }op_r_r; - struct - { - // R = A (op) B [update cr* in mode *] - uint8 registerResult; - uint8 registerA; - uint8 registerB; - }op_r_r_r; - struct - { - // R = A (op) immS32 [update cr* in mode *] - uint8 registerResult; - uint8 registerA; - sint32 immS32; - }op_r_r_s32; - struct - { - // R/F = NAME or NAME = R/F - uint8 registerIndex; - uint8 copyWidth; - uint32 name; - uint8 flags; - }op_r_name; - struct - { - // R (op) s32 [update cr* in mode *] - uint8 registerIndex; - sint32 immS32; - }op_r_immS32; - struct - { - uint32 address; - uint8 flags; - }op_jumpmark; - struct - { - uint32 param; - uint32 param2; - uint16 paramU16; - }op_macro; - struct - { - uint32 jumpmarkAddress; - bool jumpAccordingToSegment; //PPCRecImlSegment_t* destinationSegment; // if set, this replaces jumpmarkAddress - uint8 condition; // only used when crRegisterIndex is 8 or above (update: Apparently only used to mark jumps without a condition? -> Cleanup) - uint8 crRegisterIndex; - uint8 crBitIndex; - bool bitMustBeSet; - }op_conditionalJump; - struct - { - uint8 registerData; - uint8 registerMem; - uint8 registerMem2; - uint8 registerGQR; - uint8 copyWidth; - //uint8 flags; - struct - { - bool swapEndian : 1; - bool signExtend : 1; - bool notExpanded : 1; // for floats - }flags2; - uint8 mode; // transfer mode (copy width, ps0/ps1 behavior) - sint32 immS32; - }op_storeLoad; - struct - { - struct - { - uint8 registerMem; - sint32 immS32; - }src; - struct - { - uint8 registerMem; - sint32 immS32; - }dst; - uint8 copyWidth; - }op_mem2mem; - struct - { - uint8 registerResult; - uint8 registerOperand; - uint8 flags; - }op_fpr_r_r; - struct - { - uint8 registerResult; - uint8 registerOperandA; - uint8 registerOperandB; - uint8 flags; - }op_fpr_r_r_r; - struct - { - uint8 registerResult; - uint8 registerOperandA; - uint8 registerOperandB; - uint8 registerOperandC; - uint8 flags; - }op_fpr_r_r_r_r; - struct - { - uint8 registerResult; - //uint8 flags; - }op_fpr_r; - struct - { - uint32 ppcAddress; - uint32 x64Offset; - }op_ppcEnter; - struct - { - uint8 crD; // crBitIndex (result) - uint8 crA; // crBitIndex - uint8 crB; // crBitIndex - }op_cr; - // conditional operations (emitted if supported by target platform) - struct - { - // r_s32 - uint8 registerIndex; - sint32 immS32; - // condition - uint8 crRegisterIndex; - uint8 crBitIndex; - bool bitMustBeSet; - }op_conditional_r_s32; - }; -}PPCRecImlInstruction_t; - -typedef struct _PPCRecImlSegment_t PPCRecImlSegment_t; - -typedef struct _ppcRecompilerSegmentPoint_t -{ - sint32 index; - PPCRecImlSegment_t* imlSegment; - _ppcRecompilerSegmentPoint_t* next; - _ppcRecompilerSegmentPoint_t* prev; -}ppcRecompilerSegmentPoint_t; - -struct raLivenessLocation_t -{ - sint32 index; - bool isRead; - bool isWrite; - - raLivenessLocation_t() {}; - - raLivenessLocation_t(sint32 index, bool isRead, bool isWrite) - : index(index), isRead(isRead), isWrite(isWrite) {}; }; -struct raLivenessSubrangeLink_t -{ - struct raLivenessSubrange_t* prev; - struct raLivenessSubrange_t* next; -}; +#include "Cafe/HW/Espresso/Recompiler/IML/IMLInstruction.h" +#include "Cafe/HW/Espresso/Recompiler/IML/IMLSegment.h" -struct raLivenessSubrange_t -{ - struct raLivenessRange_t* range; - PPCRecImlSegment_t* imlSegment; - ppcRecompilerSegmentPoint_t start; - ppcRecompilerSegmentPoint_t end; - // dirty state tracking - bool _noLoad; - bool hasStore; - bool hasStoreDelayed; - // next - raLivenessSubrange_t* subrangeBranchTaken; - raLivenessSubrange_t* subrangeBranchNotTaken; - // processing - uint32 lastIterationIndex; - // instruction locations - std::vector list_locations; - // linked list (subranges with same GPR virtual register) - raLivenessSubrangeLink_t link_sameVirtualRegisterGPR; - // linked list (all subranges for this segment) - raLivenessSubrangeLink_t link_segmentSubrangesGPR; -}; - -struct raLivenessRange_t -{ - sint32 virtualRegister; - sint32 physicalRegister; - sint32 name; - std::vector list_subranges; -}; - -struct PPCSegmentRegisterAllocatorInfo_t -{ - // analyzer stage - bool isPartOfProcessedLoop{}; // used during loop detection - sint32 lastIterationIndex{}; - // linked lists - raLivenessSubrange_t* linkedList_allSubranges{}; - raLivenessSubrange_t* linkedList_perVirtualGPR[PPC_REC_MAX_VIRTUAL_GPR]{}; -}; - -struct PPCRecVGPRDistances_t -{ - struct _RegArrayEntry - { - sint32 usageStart{}; - sint32 usageEnd{}; - }reg[PPC_REC_MAX_VIRTUAL_GPR]; - bool isProcessed[PPC_REC_MAX_VIRTUAL_GPR]{}; -}; - -typedef struct _PPCRecImlSegment_t -{ - sint32 momentaryIndex{}; // index in segment list, generally not kept up to date except if needed (necessary for loop detection) - sint32 startOffset{}; // offset to first instruction in iml instruction list - sint32 count{}; // number of instructions in segment - uint32 ppcAddress{}; // ppc address (0xFFFFFFFF if not associated with an address) - uint32 x64Offset{}; // x64 code offset of segment start - uint32 cycleCount{}; // number of PPC cycles required to execute this segment (roughly) - // list of intermediate instructions in this segment - PPCRecImlInstruction_t* imlList{}; - sint32 imlListSize{}; - sint32 imlListCount{}; - // segment link - _PPCRecImlSegment_t* nextSegmentBranchNotTaken{}; // this is also the default for segments where there is no branch - _PPCRecImlSegment_t* nextSegmentBranchTaken{}; - bool nextSegmentIsUncertain{}; - sint32 loopDepth{}; - //sList_t* list_prevSegments; - std::vector<_PPCRecImlSegment_t*> list_prevSegments{}; - // PPC range of segment - uint32 ppcAddrMin{}; - uint32 ppcAddrMax{}; - // enterable segments - bool isEnterable{}; // this segment can be entered from outside the recompiler (no preloaded registers necessary) - uint32 enterPPCAddress{}; // used if isEnterable is true - // jump destination segments - bool isJumpDestination{}; // segment is a destination for one or more (conditional) jumps - uint32 jumpDestinationPPCAddress{}; - // PPC FPR use mask - bool ppcFPRUsed[32]{}; // same as ppcGPRUsed, but for FPR - // CR use mask - uint32 crBitsInput{}; // bits that are expected to be set from the previous segment (read in this segment but not overwritten) - uint32 crBitsRead{}; // all bits that are read in this segment - uint32 crBitsWritten{}; // bits that are written in this segment - // register allocator info - PPCSegmentRegisterAllocatorInfo_t raInfo{}; - PPCRecVGPRDistances_t raDistances{}; - bool raRangeExtendProcessed{}; - // segment points - ppcRecompilerSegmentPoint_t* segmentPointList{}; -}PPCRecImlSegment_t; +struct IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(struct ppcImlGenContext_t* ppcImlGenContext); struct ppcImlGenContext_t { - PPCRecFunction_t* functionRef; + class PPCFunctionBoundaryTracker* boundaryTracker; uint32* currentInstruction; uint32 ppcAddressOfCurrentInstruction; + IMLSegment* currentOutputSegment; + struct PPCBasicBlockInfo* currentBasicBlock{}; // fpr mode bool LSQE{ true }; bool PSE{ true }; // cycle counter uint32 cyclesSinceLastBranch; // used to track ppc cycles - // temporary general purpose registers - uint32 mappedRegister[PPC_REC_MAX_VIRTUAL_GPR]; - // temporary floating point registers (single and double precision) - uint32 mappedFPRRegister[256]; - // list of intermediate instructions - PPCRecImlInstruction_t* imlList; - sint32 imlListSize; - sint32 imlListCount; + std::unordered_map mappedRegs; + + uint32 GetMaxRegId() const + { + if (mappedRegs.empty()) + return 0; + return mappedRegs.size()-1; + } + // list of segments - PPCRecImlSegment_t** segmentList; - sint32 segmentListSize; - sint32 segmentListCount; + std::vector segmentList2; // code generation control bool hasFPUInstruction; // if true, PPCEnter macro will create FP_UNAVAIL checks -> Not needed in user mode - // register allocator info - struct - { - std::vector list_ranges; - }raInfo; // analysis info struct { bool modifiesGQR[8]; }tracking; + // debug helpers + uint32 debug_entryPPCAddress{0}; + + ~ppcImlGenContext_t() + { + for (IMLSegment* imlSegment : segmentList2) + delete imlSegment; + segmentList2.clear(); + } + + // append raw instruction + IMLInstruction& emitInst() + { + return *PPCRecompilerImlGen_generateNewEmptyInstruction(this); + } + + IMLSegment* NewSegment() + { + IMLSegment* seg = new IMLSegment(); + segmentList2.emplace_back(seg); + return seg; + } + + size_t GetSegmentIndex(IMLSegment* seg) + { + for (size_t i = 0; i < segmentList2.size(); i++) + { + if (segmentList2[i] == seg) + return i; + } + cemu_assert_error(); + return 0; + } + + IMLSegment* InsertSegment(size_t index) + { + IMLSegment* newSeg = new IMLSegment(); + segmentList2.insert(segmentList2.begin() + index, 1, newSeg); + return newSeg; + } + + std::span InsertSegments(size_t index, size_t count) + { + segmentList2.insert(segmentList2.begin() + index, count, {}); + for (size_t i = index; i < (index + count); i++) + segmentList2[i] = new IMLSegment(); + return { segmentList2.data() + index, count}; + } + + void UpdateSegmentIndices() + { + for (size_t i = 0; i < segmentList2.size(); i++) + segmentList2[i]->momentaryIndex = (sint32)i; + } }; typedef void ATTR_MS_ABI (*PPCREC_JUMP_ENTRY)(); @@ -359,11 +136,6 @@ typedef struct alignas(16) float _x64XMM_constFloatMin[2]; alignas(16) uint32 _x64XMM_flushDenormalMask1[4]; alignas(16) uint32 _x64XMM_flushDenormalMaskResetSignBits[4]; - // PSQ load/store scale tables - double _psq_ld_scale_ps0_ps1[64 * 2]; - double _psq_ld_scale_ps0_1[64 * 2]; - double _psq_st_scale_ps0_ps1[64 * 2]; - double _psq_st_scale_ps0_1[64 * 2]; // MXCSR uint32 _x64XMM_mxCsr_ftzOn; uint32 _x64XMM_mxCsr_ftzOff; @@ -373,6 +145,7 @@ extern PPCRecompilerInstanceData_t* ppcRecompilerInstanceData; extern bool ppcRecompilerEnabled; void PPCRecompiler_init(); +void PPCRecompiler_Shutdown(); void PPCRecompiler_allocateRange(uint32 startAddress, uint32 size); @@ -384,14 +157,6 @@ extern void ATTR_MS_ABI (*PPCRecompiler_leaveRecompilerCode_unvisited)(); #define PPC_REC_INVALID_FUNCTION ((PPCRecFunction_t*)-1) -// CPUID -extern bool hasLZCNTSupport; -extern bool hasMOVBESupport; -extern bool hasBMI2Support; -extern bool hasAVXSupport; - -// todo - move some of the stuff above into PPCRecompilerInternal.h - // recompiler interface void PPCRecompiler_recompileIfUnvisited(uint32 enterAddress); diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h index 86af33b2..bfb2aed5 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIml.h @@ -1,293 +1,33 @@ +bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set& entryAddresses, class PPCFunctionBoundaryTracker& boundaryTracker); -#define PPCREC_CR_REG_TEMP 8 // there are only 8 cr registers (0-7) we use the 8th as temporary cr register that is never stored (BDNZ instruction for example) +IMLSegment* PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo); +IMLSegment* PPCIMLGen_CreateNewSegmentAsBranchTarget(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo); -enum -{ - PPCREC_IML_OP_ASSIGN, // '=' operator - PPCREC_IML_OP_ENDIAN_SWAP, // '=' operator with 32bit endian swap - PPCREC_IML_OP_ADD, // '+' operator - PPCREC_IML_OP_SUB, // '-' operator - PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, // complex operation, result = operand + ~operand2 + carry bit, updates carry bit - PPCREC_IML_OP_COMPARE_SIGNED, // arithmetic/signed comparison operator (updates cr) - PPCREC_IML_OP_COMPARE_UNSIGNED, // logical/unsigned comparison operator (updates cr) - PPCREC_IML_OP_MULTIPLY_SIGNED, // '*' operator (signed multiply) - PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, // unsigned 64bit multiply, store only high 32bit-word of result - PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, // signed 64bit multiply, store only high 32bit-word of result - PPCREC_IML_OP_DIVIDE_SIGNED, // '/' operator (signed divide) - PPCREC_IML_OP_DIVIDE_UNSIGNED, // '/' operator (unsigned divide) - PPCREC_IML_OP_ADD_CARRY, // complex operation, result = operand + carry bit, updates carry bit - PPCREC_IML_OP_ADD_CARRY_ME, // complex operation, result = operand + carry bit + (-1), updates carry bit - PPCREC_IML_OP_ADD_UPDATE_CARRY, // '+' operator but also updates carry flag - PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, // '+' operator and also adds carry, updates carry flag - // assign operators with cast - PPCREC_IML_OP_ASSIGN_S16_TO_S32, // copy 16bit and sign extend - PPCREC_IML_OP_ASSIGN_S8_TO_S32, // copy 8bit and sign extend - // binary operation - PPCREC_IML_OP_OR, // '|' operator - PPCREC_IML_OP_ORC, // '|' operator, second operand is complemented first - PPCREC_IML_OP_AND, // '&' operator - PPCREC_IML_OP_XOR, // '^' operator - PPCREC_IML_OP_LEFT_ROTATE, // left rotate operator - PPCREC_IML_OP_LEFT_SHIFT, // shift left operator - PPCREC_IML_OP_RIGHT_SHIFT, // right shift operator (unsigned) - PPCREC_IML_OP_NOT, // complement each bit - PPCREC_IML_OP_NEG, // negate - // ppc - PPCREC_IML_OP_RLWIMI, // RLWIMI instruction (rotate, merge based on mask) - PPCREC_IML_OP_SRAW, // SRAWI/SRAW instruction (algebraic shift right, sets ca flag) - PPCREC_IML_OP_SLW, // SLW (shift based on register by up to 63 bits) - PPCREC_IML_OP_SRW, // SRW (shift based on register by up to 63 bits) - PPCREC_IML_OP_CNTLZW, - PPCREC_IML_OP_SUBFC, // SUBFC and SUBFIC (subtract from and set carry) - PPCREC_IML_OP_DCBZ, // clear 32 bytes aligned to 0x20 - PPCREC_IML_OP_MFCR, // copy cr to gpr - PPCREC_IML_OP_MTCRF, // copy gpr to cr (with mask) - // condition register - PPCREC_IML_OP_CR_CLEAR, // clear cr bit - PPCREC_IML_OP_CR_SET, // set cr bit - PPCREC_IML_OP_CR_OR, // OR cr bits - PPCREC_IML_OP_CR_ORC, // OR cr bits, complement second input operand bit first - PPCREC_IML_OP_CR_AND, // AND cr bits - PPCREC_IML_OP_CR_ANDC, // AND cr bits, complement second input operand bit first - // FPU - PPCREC_IML_OP_FPR_ADD_BOTTOM, - PPCREC_IML_OP_FPR_ADD_PAIR, - PPCREC_IML_OP_FPR_SUB_PAIR, - PPCREC_IML_OP_FPR_SUB_BOTTOM, - PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, - PPCREC_IML_OP_FPR_MULTIPLY_PAIR, - PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, - PPCREC_IML_OP_FPR_DIVIDE_PAIR, - PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, - PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, - PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, - PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP, // leave bottom of destination untouched - PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, // leave bottom of destination untouched - PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, // leave top of destination untouched - PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, - PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, // expand bottom f32 to f64 in bottom and top half - PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, // calculate reciprocal with Espresso accuracy of source bottom half and write result to destination bottom and top half - PPCREC_IML_OP_FPR_FCMPO_BOTTOM, - PPCREC_IML_OP_FPR_FCMPU_BOTTOM, - PPCREC_IML_OP_FPR_FCMPU_TOP, - PPCREC_IML_OP_FPR_NEGATE_BOTTOM, - PPCREC_IML_OP_FPR_NEGATE_PAIR, - PPCREC_IML_OP_FPR_ABS_BOTTOM, // abs(fp0) - PPCREC_IML_OP_FPR_ABS_PAIR, - PPCREC_IML_OP_FPR_FRES_PAIR, // 1.0/fp approx (Espresso accuracy) - PPCREC_IML_OP_FPR_FRSQRTE_PAIR, // 1.0/sqrt(fp) approx (Espresso accuracy) - PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM, // -abs(fp0) - PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, // round 64bit double to 64bit double with 32bit float precision (in bottom half of xmm register) - PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, // round two 64bit doubles to 64bit double with 32bit float precision - PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT, - PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ, - PPCREC_IML_OP_FPR_SELECT_BOTTOM, // selectively copy bottom value from operand B or C based on value in operand A - PPCREC_IML_OP_FPR_SELECT_PAIR, // selectively copy top/bottom from operand B or C based on value in top/bottom of operand A - // PS - PPCREC_IML_OP_FPR_SUM0, - PPCREC_IML_OP_FPR_SUM1, -}; +void PPCIMLGen_AssertIfNotLastSegmentInstruction(ppcImlGenContext_t& ppcImlGenContext); -#define PPCREC_IML_OP_FPR_COPY_PAIR (PPCREC_IML_OP_ASSIGN) - -enum -{ - PPCREC_IML_MACRO_BLR, // macro for BLR instruction code - PPCREC_IML_MACRO_BLRL, // macro for BLRL instruction code - PPCREC_IML_MACRO_BCTR, // macro for BCTR instruction code - PPCREC_IML_MACRO_BCTRL, // macro for BCTRL instruction code - PPCREC_IML_MACRO_BL, // call to different function (can be within same function) - PPCREC_IML_MACRO_B_FAR, // branch to different function - PPCREC_IML_MACRO_COUNT_CYCLES, // decrease current remaining thread cycles by a certain amount - PPCREC_IML_MACRO_HLE, // HLE function call - PPCREC_IML_MACRO_MFTB, // get TB register value (low or high) - PPCREC_IML_MACRO_LEAVE, // leaves recompiler and switches to interpeter - // debugging - PPCREC_IML_MACRO_DEBUGBREAK, // throws a debugbreak -}; - -enum -{ - PPCREC_JUMP_CONDITION_NONE, - PPCREC_JUMP_CONDITION_E, // equal / zero - PPCREC_JUMP_CONDITION_NE, // not equal / not zero - PPCREC_JUMP_CONDITION_LE, // less or equal - PPCREC_JUMP_CONDITION_L, // less - PPCREC_JUMP_CONDITION_GE, // greater or equal - PPCREC_JUMP_CONDITION_G, // greater - // special case: - PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW, // needs special handling - PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW, // not summaryoverflow - -}; - -enum -{ - PPCREC_CR_MODE_COMPARE_SIGNED, - PPCREC_CR_MODE_COMPARE_UNSIGNED, // alias logic compare - // others: PPCREC_CR_MODE_ARITHMETIC, - PPCREC_CR_MODE_ARITHMETIC, // arithmetic use (for use with add/sub instructions without generating extra code) - PPCREC_CR_MODE_LOGICAL, -}; - -enum -{ - PPCREC_IML_TYPE_NONE, - PPCREC_IML_TYPE_NO_OP, // no-op instruction - PPCREC_IML_TYPE_JUMPMARK, // possible jump destination (generated before each ppc instruction) - PPCREC_IML_TYPE_R_R, // r* (op) *r - PPCREC_IML_TYPE_R_R_R, // r* = r* (op) r* - PPCREC_IML_TYPE_R_R_S32, // r* = r* (op) s32* - PPCREC_IML_TYPE_LOAD, // r* = [r*+s32*] - PPCREC_IML_TYPE_LOAD_INDEXED, // r* = [r*+r*] - PPCREC_IML_TYPE_STORE, // [r*+s32*] = r* - PPCREC_IML_TYPE_STORE_INDEXED, // [r*+r*] = r* - PPCREC_IML_TYPE_R_NAME, // r* = name - PPCREC_IML_TYPE_NAME_R, // name* = r* - PPCREC_IML_TYPE_R_S32, // r* (op) imm - PPCREC_IML_TYPE_MACRO, - PPCREC_IML_TYPE_CJUMP, // conditional jump - PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK, // jumps only if remaining thread cycles >= 0 - PPCREC_IML_TYPE_PPC_ENTER, // used to mark locations that should be written to recompilerCallTable - PPCREC_IML_TYPE_CR, // condition register specific operations (one or more operands) - // conditional - PPCREC_IML_TYPE_CONDITIONAL_R_S32, - // FPR - PPCREC_IML_TYPE_FPR_R_NAME, // name = f* - PPCREC_IML_TYPE_FPR_NAME_R, // f* = name - PPCREC_IML_TYPE_FPR_LOAD, // r* = (bitdepth) [r*+s32*] (single or paired single mode) - PPCREC_IML_TYPE_FPR_LOAD_INDEXED, // r* = (bitdepth) [r*+r*] (single or paired single mode) - PPCREC_IML_TYPE_FPR_STORE, // (bitdepth) [r*+s32*] = r* (single or paired single mode) - PPCREC_IML_TYPE_FPR_STORE_INDEXED, // (bitdepth) [r*+r*] = r* (single or paired single mode) - PPCREC_IML_TYPE_FPR_R_R, - PPCREC_IML_TYPE_FPR_R_R_R, - PPCREC_IML_TYPE_FPR_R_R_R_R, - PPCREC_IML_TYPE_FPR_R, - // special - PPCREC_IML_TYPE_MEM2MEM, // memory to memory copy (deprecated) - -}; - -enum -{ - PPCREC_NAME_NONE, - PPCREC_NAME_TEMPORARY, - PPCREC_NAME_R0 = 1000, - PPCREC_NAME_SPR0 = 2000, - PPCREC_NAME_FPR0 = 3000, - PPCREC_NAME_TEMPORARY_FPR0 = 4000, // 0 to 7 - //PPCREC_NAME_CR0 = 3000, // value mapped condition register (usually it isn't needed and can be optimized away) -}; - -// special cases for LOAD/STORE -#define PPC_REC_LOAD_LWARX_MARKER (100) // lwarx instruction (similar to LWZX but sets reserved address/value) -#define PPC_REC_STORE_STWCX_MARKER (100) // stwcx instruction (similar to STWX but writes only if reservation from LWARX is valid) -#define PPC_REC_STORE_STSWI_1 (200) // stswi nb = 1 -#define PPC_REC_STORE_STSWI_2 (201) // stswi nb = 2 -#define PPC_REC_STORE_STSWI_3 (202) // stswi nb = 3 -#define PPC_REC_STORE_LSWI_1 (200) // lswi nb = 1 -#define PPC_REC_STORE_LSWI_2 (201) // lswi nb = 2 -#define PPC_REC_STORE_LSWI_3 (202) // lswi nb = 3 - -#define PPC_REC_INVALID_REGISTER 0xFF - -#define PPCREC_CR_BIT_LT 0 -#define PPCREC_CR_BIT_GT 1 -#define PPCREC_CR_BIT_EQ 2 -#define PPCREC_CR_BIT_SO 3 - -enum -{ - // fpr load - PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, - PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, - PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, - PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0, - PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0, - PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_S16_PS0, - PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_U16_PS0, - PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_S8_PS0, - PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1, - PPCREC_FPR_LD_MODE_PSQ_U8_PS0, - PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1, - // fpr store - PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, // store 1 single precision float from ps0 - PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, // store 1 double precision float from ps0 - - PPCREC_FPR_ST_MODE_UI32_FROM_PS0, // store raw low-32bit of PS0 - - PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0, - PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0, - PPCREC_FPR_ST_MODE_PSQ_S8_PS0, - PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_U8_PS0, - PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_U16_PS0, - PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1, - PPCREC_FPR_ST_MODE_PSQ_S16_PS0, - PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1, -}; - -bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* PPCRecFunction, std::set& entryAddresses); -void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext); // todo - move to destructor - -PPCRecImlInstruction_t* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_pushBackIMLInstructions(PPCRecImlSegment_t* imlSegment, sint32 index, sint32 shiftBackCount); -PPCRecImlInstruction_t* PPCRecompiler_insertInstruction(PPCRecImlSegment_t* imlSegment, sint32 index); +IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext); +void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index, sint32 shiftBackCount); +IMLInstruction* PPCRecompiler_insertInstruction(IMLSegment* imlSegment, sint32 index); void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint32 index, sint32 count); -void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, PPCRecImlSegment_t* imlSegment, sint32 index); -void PPCRecompilerIml_removeSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint); +void PPCRecompilerIml_setSegmentPoint(IMLSegmentPoint* segmentPoint, IMLSegment* imlSegment, sint32 index); +void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint); -// GPR register management -uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false); -uint32 PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); +// Register management +IMLReg PPCRecompilerImlGen_LookupReg(ppcImlGenContext_t* ppcImlGenContext, IMLName mappedName, IMLRegFormat regFormat); -// FPR register management -uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew = false); -uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); +IMLReg PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName); // IML instruction generation -void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 jumpmarkAddress); -void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction); - -void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode); -void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet); -void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister = PPC_REC_INVALID_REGISTER, uint8 crMode = 0); - - - -// IML instruction generation (new style, can generate new instructions but also overwrite existing ones) - -void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction); -void PPCRecompilerImlGen_generateNewInstruction_memory_memory(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint8 srcMemReg, sint32 srcImmS32, uint8 dstMemReg, sint32 dstImmS32, uint8 copyWidth); - -void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister = PPC_REC_INVALID_REGISTER); +void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, IMLInstruction* imlInstruction, uint32 operation, IMLReg registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet); // IML generation - FPU -bool PPCRecompilerImlGen_LFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_LFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_LFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_LFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_LFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_STFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_STFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_STFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); +bool PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble); +bool PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble); +bool PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble); +bool PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool hasUpdate, bool isDouble); bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_STFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_STFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_FADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_FSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_FMUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); @@ -313,22 +53,17 @@ bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod bool PPCRecompilerImlGen_FSEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_FCTIWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); +bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate); +bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate); +bool PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1); +bool PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1); bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); -bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); +bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withNegative); bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); bool PPCRecompilerImlGen_PS_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode); @@ -347,76 +82,20 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o // IML general -bool PPCRecompiler_isSuffixInstruction(PPCRecImlInstruction_t* iml); -void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompilerIml_setLinkBranchNotTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst); -void PPCRecompilerIml_setLinkBranchTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst); -void PPCRecompilerIML_relinkInputSegment(PPCRecImlSegment_t* imlSegmentOrig, PPCRecImlSegment_t* imlSegmentNew); -void PPCRecompilerIML_removeLink(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst); void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext); -PPCRecImlInstruction_t* PPCRecompilerIML_getLastInstruction(PPCRecImlSegment_t* imlSegment); +void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function& genSegmentBranchTaken, const std::function& genSegmentBranchNotTaken); +void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function& genSegmentBranchNotTaken); // no else segment +void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count, sint32 defaultCaseIndex); -// IML analyzer -typedef struct +class IMLRedirectInstOutput { - uint32 readCRBits; - uint32 writtenCRBits; -}PPCRecCRTracking_t; - -bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment); -bool PPCRecompilerImlAnalyzer_canTypeWriteCR(PPCRecImlInstruction_t* imlInstruction); -void PPCRecompilerImlAnalyzer_getCRTracking(PPCRecImlInstruction_t* imlInstruction, PPCRecCRTracking_t* crTracking); - -// IML optimizer -bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext); - -bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext); - -void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext); - -void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext); - -// IML register allocator -void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext); - -// late optimizations -void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext); - -// debug - -void PPCRecompiler_dumpIMLSegment(PPCRecImlSegment_t* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo = false); +public: + IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment); + ~IMLRedirectInstOutput(); -typedef struct -{ - union - { - struct - { - sint16 readNamedReg1; - sint16 readNamedReg2; - sint16 readNamedReg3; - sint16 writtenNamedReg1; - }; - sint16 gpr[4]; // 3 read + 1 write - }; - // FPR - union - { - struct - { - // note: If destination operand is not fully written, it will be added as a read FPR as well - sint16 readFPR1; - sint16 readFPR2; - sint16 readFPR3; - sint16 readFPR4; // usually this is set to the result FPR if only partially overwritten - sint16 writtenFPR1; - }; - sint16 fpr[4]; - }; -}PPCImlOptimizerUsedRegisters_t; - -void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, PPCImlOptimizerUsedRegisters_t* registersUsed); +private: + ppcImlGenContext_t* m_context; + IMLSegment* m_prevSegment; +}; \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp deleted file mode 100644 index 4962d30d..00000000 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlAnalyzer.cpp +++ /dev/null @@ -1,137 +0,0 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "util/helpers/fixedSizeList.h" -#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h" - -/* - * Initializes a single segment and returns true if it is a finite loop - */ -bool PPCRecompilerImlAnalyzer_isTightFiniteLoop(PPCRecImlSegment_t* imlSegment) -{ - bool isTightFiniteLoop = false; - // base criteria, must jump to beginning of same segment - if (imlSegment->nextSegmentBranchTaken != imlSegment) - return false; - // loops using BDNZ are assumed to always be finite - for (sint32 t = 0; t < imlSegment->imlListCount; t++) - { - if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB && imlSegment->imlList[t].crRegister == 8) - { - return true; - } - } - // for non-BDNZ loops, check for common patterns - // risky approach, look for ADD/SUB operations and assume that potential overflow means finite (does not include r_r_s32 ADD/SUB) - // this catches most loops with load-update and store-update instructions, but also those with decrementing counters - FixedSizeList list_modifiedRegisters; - for (sint32 t = 0; t < imlSegment->imlListCount; t++) - { - if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && (imlSegment->imlList[t].operation == PPCREC_IML_OP_ADD || imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB) ) - { - list_modifiedRegisters.addUnique(imlSegment->imlList[t].op_r_immS32.registerIndex); - } - } - if (list_modifiedRegisters.count > 0) - { - // remove all registers from the list that are modified by non-ADD/SUB instructions - // todo: We should also cover the case where ADD+SUB on the same register cancel the effect out - PPCImlOptimizerUsedRegisters_t registersUsed; - for (sint32 t = 0; t < imlSegment->imlListCount; t++) - { - if (imlSegment->imlList[t].type == PPCREC_IML_TYPE_R_S32 && (imlSegment->imlList[t].operation == PPCREC_IML_OP_ADD || imlSegment->imlList[t].operation == PPCREC_IML_OP_SUB)) - continue; - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + t, ®istersUsed); - if(registersUsed.writtenNamedReg1 < 0) - continue; - list_modifiedRegisters.remove(registersUsed.writtenNamedReg1); - } - if (list_modifiedRegisters.count > 0) - { - return true; - } - } - return false; -} - -/* -* Returns true if the imlInstruction can overwrite CR (depending on value of ->crRegister) -*/ -bool PPCRecompilerImlAnalyzer_canTypeWriteCR(PPCRecImlInstruction_t* imlInstruction) -{ - if (imlInstruction->type == PPCREC_IML_TYPE_R_R) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R) - return true; - if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R) - return true; - return false; -} - -void PPCRecompilerImlAnalyzer_getCRTracking(PPCRecImlInstruction_t* imlInstruction, PPCRecCRTracking_t* crTracking) -{ - crTracking->readCRBits = 0; - crTracking->writtenCRBits = 0; - if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP) - { - if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - { - uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex); - crTracking->readCRBits = (crBitFlag); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex); - crTracking->readCRBits = crBitFlag; - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR) - { - crTracking->readCRBits = 0xFFFFFFFF; - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF) - { - crTracking->writtenCRBits |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CR) - { - if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR || - imlInstruction->operation == PPCREC_IML_OP_CR_SET) - { - uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); - crTracking->writtenCRBits = crBitFlag; - } - else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR || - imlInstruction->operation == PPCREC_IML_OP_CR_ORC || - imlInstruction->operation == PPCREC_IML_OP_CR_AND || - imlInstruction->operation == PPCREC_IML_OP_CR_ANDC) - { - uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); - crTracking->writtenCRBits = crBitFlag; - crBitFlag = 1 << (imlInstruction->op_cr.crA); - crTracking->readCRBits = crBitFlag; - crBitFlag = 1 << (imlInstruction->op_cr.crB); - crTracking->readCRBits |= crBitFlag; - } - else - assert_dbg(); - } - else if (PPCRecompilerImlAnalyzer_canTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7) - { - crTracking->writtenCRBits |= (0xF << (imlInstruction->crRegister * 4)); - } - else if ((imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER) - { - // overwrites CR0 - crTracking->writtenCRBits |= (0xF << 0); - } -} diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp index 7291ac41..e76a53fa 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGen.cpp @@ -1,563 +1,345 @@ #include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h" #include "Cafe/HW/Espresso/Interpreter/PPCInterpreterHelper.h" +#include "Cafe/HW/Espresso/EspressoISA.h" #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" -#include "PPCRecompilerImlRanges.h" -#include "util/helpers/StringBuf.h" +#include "IML/IML.h" +#include "IML/IMLRegisterAllocatorRanges.h" +#include "PPCFunctionBoundaryTracker.h" +#include "Cafe/OS/libs/coreinit/coreinit_Time.h" bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext); -uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenContext); -uint32 PPCRecompiler_getInstructionByOffset(ppcImlGenContext_t* ppcImlGenContext, uint32 offset); -PPCRecImlInstruction_t* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext) +struct PPCBasicBlockInfo { - if( ppcImlGenContext->imlListCount+1 > ppcImlGenContext->imlListSize ) + PPCBasicBlockInfo(uint32 startAddress, const std::set& entryAddresses) : startAddress(startAddress), lastAddress(startAddress) { - sint32 newSize = ppcImlGenContext->imlListCount*2 + 2; - ppcImlGenContext->imlList = (PPCRecImlInstruction_t*)realloc(ppcImlGenContext->imlList, sizeof(PPCRecImlInstruction_t)*newSize); - ppcImlGenContext->imlListSize = newSize; + isEnterable = entryAddresses.find(startAddress) != entryAddresses.end(); } - PPCRecImlInstruction_t* imlInstruction = ppcImlGenContext->imlList+ppcImlGenContext->imlListCount; - memset(imlInstruction, 0x00, sizeof(PPCRecImlInstruction_t)); - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; // dont update any cr register by default - imlInstruction->associatedPPCAddress = ppcImlGenContext->ppcAddressOfCurrentInstruction; - ppcImlGenContext->imlListCount++; - return imlInstruction; + + uint32 startAddress; + uint32 lastAddress; // inclusive + bool isEnterable{ false }; + bool hasContinuedFlow{ true }; // non-branch path goes to next segment, assumed by default + bool hasBranchTarget{ false }; + uint32 branchTarget{}; + + // associated IML segments + IMLSegment* firstSegment{}; // first segment in chain, used as branch target for other segments + IMLSegment* appendSegment{}; // last segment in chain, additional instructions should be appended to this segment + + void SetInitialSegment(IMLSegment* seg) + { + cemu_assert_debug(!firstSegment); + cemu_assert_debug(!appendSegment); + firstSegment = seg; + appendSegment = seg; + } + + IMLSegment* GetFirstSegmentInChain() + { + return firstSegment; + } + + IMLSegment* GetSegmentForInstructionAppend() + { + return appendSegment; + } +}; + +IMLInstruction* PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext_t* ppcImlGenContext) +{ + IMLInstruction& inst = ppcImlGenContext->currentOutputSegment->imlList.emplace_back(); + memset(&inst, 0x00, sizeof(IMLInstruction)); + return &inst; } -void PPCRecompilerImlGen_generateNewInstruction_jumpmark(ppcImlGenContext_t* ppcImlGenContext, uint32 address) +void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian) { - // no-op that indicates possible destination of a jump - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_JUMPMARK; - imlInstruction->op_jumpmark.address = address; -} - -void PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext_t* ppcImlGenContext, uint32 macroId, uint32 param, uint32 param2, uint16 paramU16) -{ - // no-op that indicates possible destination of a jump - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_MACRO; - imlInstruction->operation = macroId; - imlInstruction->op_macro.param = param; - imlInstruction->op_macro.param2 = param2; - imlInstruction->op_macro.paramU16 = paramU16; -} - -/* - * Generates a marker for Interpreter -> Recompiler entrypoints - * PPC_ENTER iml instructions have no associated PPC address but the instruction itself has one - */ -void PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcAddress) -{ - // no-op that indicates possible destination of a jump - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_PPC_ENTER; - imlInstruction->operation = 0; - imlInstruction->op_ppcEnter.ppcAddress = ppcAddress; - imlInstruction->op_ppcEnter.x64Offset = 0; - imlInstruction->associatedPPCAddress = 0; -} - -void PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerResult, uint8 registerA, uint8 crRegister, uint8 crMode) -{ - // operation with two register operands (e.g. "t0 = t1") - if(imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_R_R; - imlInstruction->operation = operation; - imlInstruction->crRegister = crRegister; - imlInstruction->crMode = crMode; - imlInstruction->op_r_r.registerResult = registerResult; - imlInstruction->op_r_r.registerA = registerA; -} - -void PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerResult, uint8 registerA, uint8 registerB, uint8 crRegister=PPC_REC_INVALID_REGISTER, uint8 crMode=0) -{ - // operation with three register operands (e.g. "t0 = t1 + t4") - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_R_R_R; - imlInstruction->operation = operation; - imlInstruction->crRegister = crRegister; - imlInstruction->crMode = crMode; - imlInstruction->op_r_r_r.registerResult = registerResult; - imlInstruction->op_r_r_r.registerA = registerA; - imlInstruction->op_r_r_r.registerB = registerB; -} - -void PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerResult, uint8 registerA, sint32 immS32, uint8 crRegister=PPC_REC_INVALID_REGISTER, uint8 crMode=0) -{ - // operation with two register operands and one signed immediate (e.g. "t0 = t1 + 1234") - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_R_R_S32; - imlInstruction->operation = operation; - imlInstruction->crRegister = crRegister; - imlInstruction->crMode = crMode; - imlInstruction->op_r_r_s32.registerResult = registerResult; - imlInstruction->op_r_r_s32.registerA = registerA; - imlInstruction->op_r_r_s32.immS32 = immS32; -} - -void PPCRecompilerImlGen_generateNewInstruction_name_r(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, uint32 name, uint32 copyWidth, bool signExtend, bool bigEndian) -{ - // Store name (e.g. "'r3' = t0" which translates to MOV [ESP+offset_r3], reg32) - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_NAME_R; - imlInstruction->operation = operation; - imlInstruction->op_r_name.registerIndex = registerIndex; - imlInstruction->op_r_name.name = name; - imlInstruction->op_r_name.copyWidth = copyWidth; - imlInstruction->op_r_name.flags = (signExtend?PPCREC_IML_OP_FLAG_SIGNEXTEND:0)|(bigEndian?PPCREC_IML_OP_FLAG_SWITCHENDIAN:0); -} - -void PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 copyWidth, bool signExtend, bool bigEndian, uint8 crRegister, uint32 crMode) -{ - // two variations: - // operation without store (e.g. "'r3' < 123" which has no effect other than updating a condition flags register) - // operation with store (e.g. "'r3' = 123") - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_R_S32; - imlInstruction->operation = operation; - imlInstruction->crRegister = crRegister; - imlInstruction->crMode = crMode; - imlInstruction->op_r_immS32.registerIndex = registerIndex; - imlInstruction->op_r_immS32.immS32 = immS32; -} - -void PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 operation, uint8 registerIndex, sint32 immS32, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) -{ - if(imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - else - memset(imlInstruction, 0, sizeof(PPCRecImlInstruction_t)); - imlInstruction->type = PPCREC_IML_TYPE_CONDITIONAL_R_S32; - imlInstruction->operation = operation; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - // r_s32 operation - imlInstruction->op_conditional_r_s32.registerIndex = registerIndex; - imlInstruction->op_conditional_r_s32.immS32 = immS32; - // condition - imlInstruction->op_conditional_r_s32.crRegisterIndex = crRegisterIndex; - imlInstruction->op_conditional_r_s32.crBitIndex = crBitIndex; - imlInstruction->op_conditional_r_s32.bitMustBeSet = bitMustBeSet; -} - - -void PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint32 jumpmarkAddress) -{ - // jump - if (imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - else - memset(imlInstruction, 0, sizeof(PPCRecImlInstruction_t)); - imlInstruction->type = PPCREC_IML_TYPE_CJUMP; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress; - imlInstruction->op_conditionalJump.jumpAccordingToSegment = false; - imlInstruction->op_conditionalJump.condition = PPCREC_JUMP_CONDITION_NONE; - imlInstruction->op_conditionalJump.crRegisterIndex = 0; - imlInstruction->op_conditionalJump.crBitIndex = 0; - imlInstruction->op_conditionalJump.bitMustBeSet = false; -} - -// jump based on segment branches -void PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction) -{ - // jump - if (imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->associatedPPCAddress = 0; - imlInstruction->type = PPCREC_IML_TYPE_CJUMP; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_conditionalJump.jumpmarkAddress = 0; - imlInstruction->op_conditionalJump.jumpAccordingToSegment = true; - imlInstruction->op_conditionalJump.condition = PPCREC_JUMP_CONDITION_NONE; - imlInstruction->op_conditionalJump.crRegisterIndex = 0; - imlInstruction->op_conditionalJump.crBitIndex = 0; - imlInstruction->op_conditionalJump.bitMustBeSet = false; -} - -void PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction) -{ - if (imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_NO_OP; - imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->crMode = 0; -} - -void PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext_t* ppcImlGenContext, uint32 operation, uint8 crD, uint8 crA, uint8 crB) -{ - // multiple variations: - // operation involving only one cr bit (like clear crD bit) - // operation involving three cr bits (like crD = crA or crB) - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_CR; - imlInstruction->operation = operation; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->crMode = 0; - imlInstruction->op_cr.crD = crD; - imlInstruction->op_cr.crA = crA; - imlInstruction->op_cr.crB = crB; -} - -void PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext_t* ppcImlGenContext, uint32 jumpmarkAddress, uint32 jumpCondition, uint32 crRegisterIndex, uint32 crBitIndex, bool bitMustBeSet) -{ - // conditional jump - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_CJUMP; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_conditionalJump.jumpmarkAddress = jumpmarkAddress; - imlInstruction->op_conditionalJump.condition = jumpCondition; - imlInstruction->op_conditionalJump.crRegisterIndex = crRegisterIndex; - imlInstruction->op_conditionalJump.crBitIndex = crBitIndex; - imlInstruction->op_conditionalJump.bitMustBeSet = bitMustBeSet; -} - -void PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool signExtend, bool switchEndian) -{ - // load from memory - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_LOAD; - imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_storeLoad.registerData = registerDestination; - imlInstruction->op_storeLoad.registerMem = registerMemory; - imlInstruction->op_storeLoad.immS32 = immS32; - imlInstruction->op_storeLoad.copyWidth = copyWidth; - //imlInstruction->op_storeLoad.flags = (signExtend ? PPCREC_IML_OP_FLAG_SIGNEXTEND : 0) | (switchEndian ? PPCREC_IML_OP_FLAG_SWITCHENDIAN : 0); - imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; - imlInstruction->op_storeLoad.flags2.signExtend = signExtend; -} - -void PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian) -{ - // load from memory - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + cemu_assert_debug(registerMemory1.IsValid()); + cemu_assert_debug(registerMemory2.IsValid()); + cemu_assert_debug(registerDestination.IsValid()); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_LOAD_INDEXED; imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->op_storeLoad.registerData = registerDestination; imlInstruction->op_storeLoad.registerMem = registerMemory1; imlInstruction->op_storeLoad.registerMem2 = registerMemory2; imlInstruction->op_storeLoad.copyWidth = copyWidth; - //imlInstruction->op_storeLoad.flags = (signExtend?PPCREC_IML_OP_FLAG_SIGNEXTEND:0)|(switchEndian?PPCREC_IML_OP_FLAG_SWITCHENDIAN:0); imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; imlInstruction->op_storeLoad.flags2.signExtend = signExtend; } -void PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory, sint32 immS32, uint32 copyWidth, bool switchEndian) +void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, IMLReg registerDestination, IMLReg registerMemory1, IMLReg registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian) { - // load from memory - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_STORE; - imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_storeLoad.registerData = registerSource; - imlInstruction->op_storeLoad.registerMem = registerMemory; - imlInstruction->op_storeLoad.immS32 = immS32; - imlInstruction->op_storeLoad.copyWidth = copyWidth; - //imlInstruction->op_storeLoad.flags = (switchEndian?PPCREC_IML_OP_FLAG_SWITCHENDIAN:0); - imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; - imlInstruction->op_storeLoad.flags2.signExtend = false; -} - -void PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 copyWidth, bool signExtend, bool switchEndian) -{ - // load from memory - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); + cemu_assert_debug(registerMemory1.IsValid()); + cemu_assert_debug(registerMemory2.IsValid()); + cemu_assert_debug(registerDestination.IsValid()); + IMLInstruction* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); imlInstruction->type = PPCREC_IML_TYPE_STORE_INDEXED; imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; imlInstruction->op_storeLoad.registerData = registerDestination; imlInstruction->op_storeLoad.registerMem = registerMemory1; imlInstruction->op_storeLoad.registerMem2 = registerMemory2; imlInstruction->op_storeLoad.copyWidth = copyWidth; - //imlInstruction->op_storeLoad.flags = (signExtend?PPCREC_IML_OP_FLAG_SIGNEXTEND:0)|(switchEndian?PPCREC_IML_OP_FLAG_SWITCHENDIAN:0); imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; imlInstruction->op_storeLoad.flags2.signExtend = signExtend; } -void PPCRecompilerImlGen_generateNewInstruction_memory_memory(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, uint8 srcMemReg, sint32 srcImmS32, uint8 dstMemReg, sint32 dstImmS32, uint8 copyWidth) +// create and fill two segments (branch taken and branch not taken) as a follow up to the current segment and then merge flow afterwards +void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function& genSegmentBranchTaken, const std::function& genSegmentBranchNotTaken) { - // copy from memory to memory - if(imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_MEM2MEM; - imlInstruction->operation = 0; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->op_mem2mem.src.registerMem = srcMemReg; - imlInstruction->op_mem2mem.src.immS32 = srcImmS32; - imlInstruction->op_mem2mem.dst.registerMem = dstMemReg; - imlInstruction->op_mem2mem.dst.immS32 = dstImmS32; - imlInstruction->op_mem2mem.copyWidth = copyWidth; + IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + + std::span segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, 3); + IMLSegment* segBranchNotTaken = segments[0]; + IMLSegment* segBranchTaken = segments[1]; + IMLSegment* segMerge = segments[2]; + + // link the segments + segMerge->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken()); + segMerge->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken()); + currentWriteSegment->SetLinkBranchTaken(segBranchTaken); + currentWriteSegment->SetLinkBranchNotTaken(segBranchNotTaken); + segBranchTaken->SetLinkBranchNotTaken(segMerge); + segBranchNotTaken->SetLinkBranchTaken(segMerge); + // generate code for branch taken segment + ppcImlGenContext.currentOutputSegment = segBranchTaken; + genSegmentBranchTaken(ppcImlGenContext); + cemu_assert_debug(ppcImlGenContext.currentOutputSegment == segBranchTaken); + // generate code for branch not taken segment + ppcImlGenContext.currentOutputSegment = segBranchNotTaken; + genSegmentBranchNotTaken(ppcImlGenContext); + cemu_assert_debug(ppcImlGenContext.currentOutputSegment == segBranchNotTaken); + ppcImlGenContext.emitInst().make_jump(); + // make merge segment the new write segment + ppcImlGenContext.currentOutputSegment = segMerge; + basicBlockInfo.appendSegment = segMerge; } -uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) +void PPCIMLGen_CreateSegmentBranchedPath(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, const std::function& genSegmentBranchNotTaken) { - if( mappedName == PPCREC_NAME_NONE ) + IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + + std::span segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, 2); + IMLSegment* segBranchNotTaken = segments[0]; + IMLSegment* segMerge = segments[1]; + + // link the segments + segMerge->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken()); + segMerge->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken()); + currentWriteSegment->SetLinkBranchTaken(segMerge); + currentWriteSegment->SetLinkBranchNotTaken(segBranchNotTaken); + segBranchNotTaken->SetLinkBranchNotTaken(segMerge); + // generate code for branch not taken segment + ppcImlGenContext.currentOutputSegment = segBranchNotTaken; + genSegmentBranchNotTaken(ppcImlGenContext); + cemu_assert_debug(ppcImlGenContext.currentOutputSegment == segBranchNotTaken); + // make merge segment the new write segment + ppcImlGenContext.currentOutputSegment = segMerge; + basicBlockInfo.appendSegment = segMerge; +} + +IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index); + +IMLRedirectInstOutput::IMLRedirectInstOutput(ppcImlGenContext_t* ppcImlGenContext, IMLSegment* outputSegment) : m_context(ppcImlGenContext) +{ + m_prevSegment = ppcImlGenContext->currentOutputSegment; + cemu_assert_debug(ppcImlGenContext->currentOutputSegment == ppcImlGenContext->currentBasicBlock->appendSegment); + if (outputSegment == ppcImlGenContext->currentOutputSegment) { - debug_printf("PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(): Invalid mappedName parameter\n"); - return PPC_REC_INVALID_REGISTER; + m_prevSegment = nullptr; + return; } - for(uint32 i=0; i<(PPC_REC_MAX_VIRTUAL_GPR-1); i++) + m_context->currentBasicBlock->appendSegment = outputSegment; + m_context->currentOutputSegment = outputSegment; +} + +IMLRedirectInstOutput::~IMLRedirectInstOutput() +{ + if (m_prevSegment) { - if( ppcImlGenContext->mappedRegister[i] == PPCREC_NAME_NONE ) + m_context->currentBasicBlock->appendSegment = m_prevSegment; + m_context->currentOutputSegment = m_prevSegment; + } +} + +// compare values and branch to segment with same index in segmentsOut. The last segment doesn't actually have any comparison and just is the default case. Thus compareValues is one shorter than count +void PPCIMLGen_CreateSegmentBranchedPathMultiple(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo, IMLSegment** segmentsOut, IMLReg compareReg, sint32* compareValues, sint32 count, sint32 defaultCaseIndex) +{ + IMLSegment* currentWriteSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + cemu_assert_debug(!currentWriteSegment->HasSuffixInstruction()); // must not already have a suffix instruction + + const sint32 numBranchSegments = count + 1; + const sint32 numCaseSegments = count; + + std::span segments = ppcImlGenContext.InsertSegments(ppcImlGenContext.GetSegmentIndex(currentWriteSegment) + 1, numBranchSegments - 1 + numCaseSegments + 1); + IMLSegment** extraBranchSegments = segments.data(); + IMLSegment** caseSegments = segments.data() + numBranchSegments - 1; + IMLSegment* mergeSegment = segments[numBranchSegments - 1 + numCaseSegments]; + + // move links to the merge segment + mergeSegment->SetLinkBranchTaken(currentWriteSegment->GetBranchTaken()); + mergeSegment->SetLinkBranchNotTaken(currentWriteSegment->GetBranchNotTaken()); + currentWriteSegment->SetLinkBranchTaken(nullptr); + currentWriteSegment->SetLinkBranchNotTaken(nullptr); + + for (sint32 i=0; imappedRegister[i] = mappedName; - return i; + cemu_assert_debug(i < numCaseSegments); + seg->SetLinkBranchTaken(caseSegments[i]); + seg->SetLinkBranchNotTaken(GetBranchSegment(i + 1)); + seg->AppendInstruction()->make_compare_s32(compareReg, compareValues[i], tmpBoolReg, IMLCondition::EQ); + seg->AppendInstruction()->make_conditional_jump(tmpBoolReg, true); + } + else + { + cemu_assert_debug(defaultCaseIndex < numCaseSegments); + seg->SetLinkBranchTaken(caseSegments[defaultCaseIndex]); + seg->AppendInstruction()->make_jump(); } } + // link case segments + for (sint32 i=0; iSetLinkBranchTaken(mergeSegment); + // -> Jumps are added after the instructions + } + else + { + seg->SetLinkBranchTaken(mergeSegment); + } + } + ppcImlGenContext.currentOutputSegment = mergeSegment; + basicBlockInfo.appendSegment = mergeSegment; +} + +IMLReg PPCRecompilerImlGen_LookupReg(ppcImlGenContext_t* ppcImlGenContext, IMLName mappedName, IMLRegFormat regFormat) +{ + auto it = ppcImlGenContext->mappedRegs.find(mappedName); + if (it != ppcImlGenContext->mappedRegs.end()) + return it->second; + // create new reg entry + IMLRegFormat baseFormat; + if (regFormat == IMLRegFormat::F64) + baseFormat = IMLRegFormat::F64; + else if (regFormat == IMLRegFormat::I32) + baseFormat = IMLRegFormat::I64; + else + { + cemu_assert_suspicious(); + } + IMLRegID newRegId = ppcImlGenContext->mappedRegs.size(); + IMLReg newReg(baseFormat, regFormat, 0, newRegId); + ppcImlGenContext->mappedRegs.try_emplace(mappedName, newReg); + return newReg; +} + +IMLName PPCRecompilerImlGen_GetRegName(ppcImlGenContext_t* ppcImlGenContext, IMLReg reg) +{ + for (auto& it : ppcImlGenContext->mappedRegs) + { + if (it.second.GetRegID() == reg.GetRegID()) + return it.first; + } + cemu_assert(false); return 0; } -uint32 PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) -{ - for(uint32 i=0; i< PPC_REC_MAX_VIRTUAL_GPR; i++) - { - if( ppcImlGenContext->mappedRegister[i] == mappedName ) - { - return i; - } - } - return PPC_REC_INVALID_REGISTER; -} - uint32 PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - if( mappedName == PPCREC_NAME_NONE ) - { - debug_printf("PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(): Invalid mappedName parameter\n"); - return PPC_REC_INVALID_REGISTER; - } - for(uint32 i=0; i<255; i++) - { - if( ppcImlGenContext->mappedFPRRegister[i] == PPCREC_NAME_NONE ) - { - ppcImlGenContext->mappedFPRRegister[i] = mappedName; - return i; - } - } + DEBUG_BREAK; + //if( mappedName == PPCREC_NAME_NONE ) + //{ + // debug_printf("PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(): Invalid mappedName parameter\n"); + // return PPC_REC_INVALID_REGISTER; + //} + //for(uint32 i=0; i<255; i++) + //{ + // if( ppcImlGenContext->mappedFPRRegister[i] == PPCREC_NAME_NONE ) + // { + // ppcImlGenContext->mappedFPRRegister[i] = mappedName; + // return i; + // } + //} return 0; } uint32 PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - for(uint32 i=0; i<255; i++) - { - if( ppcImlGenContext->mappedFPRRegister[i] == mappedName ) - { - return i; - } - } + DEBUG_BREAK; + //for(uint32 i=0; i<255; i++) + //{ + // if( ppcImlGenContext->mappedFPRRegister[i] == mappedName ) + // { + // return i; + // } + //} return PPC_REC_INVALID_REGISTER; } -/* - * Loads a PPC gpr into any of the available IML registers - * If loadNew is false, it will reuse already loaded instances - */ -uint32 PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew) +IMLReg PPCRecompilerImlGen_loadRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) { - if( loadNew == false ) - { - uint32 loadedRegisterIndex = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, mappedName); - if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) - return loadedRegisterIndex; - } - uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, mappedName); - return registerIndex; + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, mappedName, IMLRegFormat::I32); } -/* - * Reuse already loaded register if present - * Otherwise create new IML register and map the name. The register contents will be undefined - */ -uint32 PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) +IMLReg _GetRegGPR(ppcImlGenContext_t* ppcImlGenContext, uint32 index) { - uint32 loadedRegisterIndex = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, mappedName); - if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) - return loadedRegisterIndex; - uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, mappedName); - return registerIndex; + cemu_assert_debug(index < 32); + return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + index); } -/* - * Loads a PPC fpr into any of the available IML FPU registers - * If loadNew is false, it will check first if the fpr is already loaded into any IML register - */ -uint32 PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName, bool loadNew) +IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint32 index) { - if( loadNew == false ) - { - uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName); - if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) - return loadedRegisterIndex; - } - uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName); - return registerIndex; + cemu_assert_debug(index < 32); + return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CR + index); } -/* - * Checks if a PPC fpr register is already loaded into any IML register - * If no, it will create a new undefined temporary IML FPU register and map the name (effectively overwriting the old ppc register) - */ -uint32 PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext_t* ppcImlGenContext, uint32 mappedName) +IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit) { - uint32 loadedRegisterIndex = PPCRecompilerImlGen_findFPRRegisterByMappedName(ppcImlGenContext, mappedName); - if( loadedRegisterIndex != PPC_REC_INVALID_REGISTER ) - return loadedRegisterIndex; - uint32 registerIndex = PPCRecompilerImlGen_getAndLockFreeTemporaryFPR(ppcImlGenContext, mappedName); - return registerIndex; + cemu_assert_debug(crReg < 8); + cemu_assert_debug(crBit < 4); + return _GetRegCR(ppcImlGenContext, (crReg * 4) + crBit); } -void PPCRecompilerImlGen_TW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +IMLReg _GetRegTemporary(ppcImlGenContext_t* ppcImlGenContext, uint32 index) { -//#ifndef PUBLIC_RELEASE -// PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0); -//#endif - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_LEAVE, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0); + cemu_assert_debug(index < 4); + return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + index); } -bool PPCRecompilerImlGen_MTSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +// get throw-away register +// be careful to not collide with other temporary register +IMLReg _GetRegTemporaryS8(ppcImlGenContext_t* ppcImlGenContext, uint32 index) { - uint32 rD, spr1, spr2, spr; - PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2); - spr = spr1 | (spr2<<5); - if (spr == SPR_CTR || spr == SPR_LR) - { - uint32 gprReg = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD); - if (gprReg == PPC_REC_INVALID_REGISTER) - gprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - uint32 sprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, sprReg, gprReg); - } - else if (spr >= SPR_UGQR0 && spr <= SPR_UGQR7) - { - uint32 gprReg = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD); - if (gprReg == PPC_REC_INVALID_REGISTER) - gprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - uint32 sprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, sprReg, gprReg); - ppcImlGenContext->tracking.modifiesGQR[spr - SPR_UGQR0] = true; - } - else - return false; - return true; -} - -bool PPCRecompilerImlGen_MFSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 rD, spr1, spr2, spr; - PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2); - spr = spr1 | (spr2<<5); - if (spr == SPR_LR || spr == SPR_CTR) - { - uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); - uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprReg, sprReg); - } - else if (spr >= SPR_UGQR0 && spr <= SPR_UGQR7) - { - uint32 sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); - uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprReg, sprReg); - } - else - return false; - return true; -} - -bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 rD, spr1, spr2, spr; - PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2); - spr = spr1 | (spr2<<5); - - if (spr == 268 || spr == 269) - { - // TBL / TBU - uint32 param2 = spr | (rD << 16); - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_MFTB, ppcImlGenContext->ppcAddressOfCurrentInstruction, param2, 0); - return true; - } - return false; -} - -bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rD, rA, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MFCR, gprReg, 0, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - return true; -} - -bool PPCRecompilerImlGen_MTCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 rS; - uint32 crMask; - PPC_OPC_TEMPL_XFX(opcode, rS, crMask); - uint32 gprReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_MTCRF, gprReg, crMask, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - return true; -} - -void PPCRecompilerImlGen_CMP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 cr; - int rA, rB; - PPC_OPC_TEMPL_X(opcode, cr, rA, rB); - cr >>= 2; - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_COMPARE_SIGNED, gprRegisterA, gprRegisterB, cr, PPCREC_CR_MODE_COMPARE_SIGNED); -} - -void PPCRecompilerImlGen_CMPL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 cr; - int rA, rB; - PPC_OPC_TEMPL_X(opcode, cr, rA, rB); - cr >>= 2; - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegisterA, gprRegisterB, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED); -} - -void PPCRecompilerImlGen_CMPI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 cr; - int rA; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, cr, rA, imm); - cr >>= 2; - sint32 b = imm; - // load gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_SIGNED, gprRegister, b, 0, false, false, cr, PPCREC_CR_MODE_COMPARE_SIGNED); -} - -void PPCRecompilerImlGen_CMPLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 cr; - int rA; - uint32 imm; - PPC_OPC_TEMPL_D_UImm(opcode, cr, rA, imm); - cr >>= 2; - uint32 b = imm; - // load gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_COMPARE_UNSIGNED, gprRegister, (sint32)b, 0, false, false, cr, PPCREC_CR_MODE_COMPARE_UNSIGNED); + cemu_assert_debug(index < 4); + return PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + index); } bool PPCRecompiler_canInlineFunction(MPTR functionPtr, sint32* functionInstructionCount) { for (sint32 i = 0; i < 6; i++) { - uint32 opcode = memory_readU32(functionPtr+i*4); + uint32 opcode = memory_readU32(functionPtr + i * 4); switch ((opcode >> 26)) { case 14: // ADDI @@ -611,18 +393,220 @@ void PPCRecompiler_generateInlinedCode(ppcImlGenContext_t* ppcImlGenContext, uin { for (sint32 i = 0; i < instructionCount; i++) { - ppcImlGenContext->ppcAddressOfCurrentInstruction = startAddress + i*4; + ppcImlGenContext->ppcAddressOfCurrentInstruction = startAddress + i * 4; ppcImlGenContext->cyclesSinceLastBranch++; if (PPCRecompiler_decodePPCInstruction(ppcImlGenContext)) { - assert_dbg(); + cemu_assert_suspicious(); } } // add range - ppcRecRange_t recRange; - recRange.ppcAddress = startAddress; - recRange.ppcSize = instructionCount*4 + 4; // + 4 because we have to include the BLR - ppcImlGenContext->functionRef->list_ranges.push_back(recRange); + cemu_assert_unimplemented(); + //ppcRecRange_t recRange; + //recRange.ppcAddress = startAddress; + //recRange.ppcSize = instructionCount*4 + 4; // + 4 because we have to include the BLR + //ppcImlGenContext->functionRef->list_ranges.push_back(recRange); +} + +// for handling RC bit of many instructions +void PPCImlGen_UpdateCR0(ppcImlGenContext_t* ppcImlGenContext, IMLReg regR) +{ + IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + // todo - SO bit + + ppcImlGenContext->emitInst().make_compare_s32(regR, 0, crBitRegLT, IMLCondition::SIGNED_LT); + ppcImlGenContext->emitInst().make_compare_s32(regR, 0, crBitRegGT, IMLCondition::SIGNED_GT); + ppcImlGenContext->emitInst().make_compare_s32(regR, 0, crBitRegEQ, IMLCondition::EQ); + + //ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, crBitRegSO, 0); // todo - copy from XER + + //ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, registerR, registerR, 0, PPCREC_CR_MODE_LOGICAL); +} + +void PPCRecompilerImlGen_TW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + // split before and after to make sure the macro is in an isolated segment that we can make enterable + PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock); + ppcImlGenContext->currentOutputSegment->SetEnterable(ppcImlGenContext->ppcAddressOfCurrentInstruction); + PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext)->make_macro(PPCREC_IML_MACRO_LEAVE, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0, IMLREG_INVALID); + IMLSegment* middleSeg = PPCIMLGen_CreateSplitSegmentAtEnd(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock); + middleSeg->SetLinkBranchTaken(nullptr); + middleSeg->SetLinkBranchNotTaken(nullptr); +} + +bool PPCRecompilerImlGen_MTSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + uint32 rD, spr1, spr2, spr; + PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2); + spr = spr1 | (spr2<<5); + IMLReg gprReg = _GetRegGPR(ppcImlGenContext, rD); + if (spr == SPR_CTR || spr == SPR_LR) + { + IMLReg sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, sprReg, gprReg); + } + else if (spr >= SPR_UGQR0 && spr <= SPR_UGQR7) + { + IMLReg sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, sprReg, gprReg); + ppcImlGenContext->tracking.modifiesGQR[spr - SPR_UGQR0] = true; + } + else + return false; + return true; +} + +bool PPCRecompilerImlGen_MFSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + uint32 rD, spr1, spr2, spr; + PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2); + spr = spr1 | (spr2<<5); + IMLReg gprReg = _GetRegGPR(ppcImlGenContext, rD); + if (spr == SPR_LR || spr == SPR_CTR) + { + IMLReg sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprReg, sprReg); + } + else if (spr >= SPR_UGQR0 && spr <= SPR_UGQR7) + { + IMLReg sprReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + spr); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, gprReg, sprReg); + } + else + return false; + return true; +} + +ATTR_MS_ABI uint32 PPCRecompiler_GetTBL() +{ + return (uint32)coreinit::OSGetSystemTime(); +} + +ATTR_MS_ABI uint32 PPCRecompiler_GetTBU() +{ + return (uint32)(coreinit::OSGetSystemTime() >> 32); +} + +bool PPCRecompilerImlGen_MFTB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + uint32 rD, spr1, spr2, spr; + PPC_OPC_TEMPL_XO(opcode, rD, spr1, spr2); + spr = spr1 | (spr2<<5); + + if( spr == SPR_TBL || spr == SPR_TBU ) + { + IMLReg resultReg = _GetRegGPR(ppcImlGenContext, rD); + ppcImlGenContext->emitInst().make_call_imm(spr == SPR_TBL ? (uintptr_t)PPCRecompiler_GetTBL : (uintptr_t)PPCRecompiler_GetTBU, IMLREG_INVALID, IMLREG_INVALID, IMLREG_INVALID, resultReg); + return true; + } + return false; +} + +void PPCRecompilerImlGen_MCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + uint32 crD, crS, b; + PPC_OPC_TEMPL_X(opcode, crD, crS, b); + cemu_assert_debug((crD&3) == 0); + cemu_assert_debug((crS&3) == 0); + crD >>= 2; + crS >>= 2; + for (sint32 i = 0; i<4; i++) + { + IMLReg regCrSrcBit = _GetRegCR(ppcImlGenContext, crS * 4 + i); + IMLReg regCrDstBit = _GetRegCR(ppcImlGenContext, crD * 4 + i); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCrDstBit, regCrSrcBit); + } +} + +bool PPCRecompilerImlGen_MFCR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + sint32 rD, rA, rB; + PPC_OPC_TEMPL_X(opcode, rD, rA, rB); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regD, 0); + for (sint32 i = 0; i < 32; i++) + { + IMLReg regCrBit = _GetRegCR(ppcImlGenContext, i); + cemu_assert_debug(regCrBit.GetRegFormat() == IMLRegFormat::I32); // addition is only allowed between same-format regs + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, regD, regD, 1); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regD, regD, regCrBit); + } + return true; +} + +bool PPCRecompilerImlGen_MTCRF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + uint32 rS; + uint32 crMask; + PPC_OPC_TEMPL_XFX(opcode, rS, crMask); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0); + uint32 crBitMask = ppc_MTCRFMaskToCRBitMask(crMask); + for (sint32 f = 0; f < 32; f++) + { + if(((crBitMask >> f) & 1) == 0) + continue; + IMLReg regCrBit = _GetRegCR(ppcImlGenContext, f); + cemu_assert_debug(regCrBit.GetRegFormat() == IMLRegFormat::I32); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, regTmp, regS, (31-f)); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regCrBit, regTmp, 1); + } + return true; +} + +void PPCRecompilerImlGen_CMP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isUnsigned) +{ + uint32 cr; + int rA, rB; + PPC_OPC_TEMPL_X(opcode, cr, rA, rB); + cr >>= 2; + + IMLReg gprRegisterA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg gprRegisterB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regXerSO = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_SO); + + IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_compare(gprRegisterA, gprRegisterB, crBitRegLT, isUnsigned ? IMLCondition::UNSIGNED_LT : IMLCondition::SIGNED_LT); + ppcImlGenContext->emitInst().make_compare(gprRegisterA, gprRegisterB, crBitRegGT, isUnsigned ? IMLCondition::UNSIGNED_GT : IMLCondition::SIGNED_GT); + ppcImlGenContext->emitInst().make_compare(gprRegisterA, gprRegisterB, crBitRegEQ, IMLCondition::EQ); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, crBitRegSO, regXerSO); +} + +bool PPCRecompilerImlGen_CMPI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isUnsigned) +{ + uint32 cr; + int rA; + uint32 imm; + if (isUnsigned) + { + PPC_OPC_TEMPL_D_UImm(opcode, cr, rA, imm); + } + else + { + PPC_OPC_TEMPL_D_SImm(opcode, cr, rA, imm); + } + cr >>= 2; + + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regXerSO = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_SO); + + IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, cr, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_compare_s32(regA, (sint32)imm, crBitRegLT, isUnsigned ? IMLCondition::UNSIGNED_LT : IMLCondition::SIGNED_LT); + ppcImlGenContext->emitInst().make_compare_s32(regA, (sint32)imm, crBitRegGT, isUnsigned ? IMLCondition::UNSIGNED_GT : IMLCondition::SIGNED_GT); + ppcImlGenContext->emitInst().make_compare_s32(regA, (sint32)imm, crBitRegEQ, IMLCondition::EQ); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, crBitRegSO, regXerSO); + + return true; } bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -637,43 +621,26 @@ bool PPCRecompilerImlGen_B(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) if( opcode&PPC_OPC_LK ) { // function call - // check if function can be inlined - sint32 inlineFuncInstructionCount = 0; - if (PPCRecompiler_canInlineFunction(jumpAddressDest, &inlineFuncInstructionCount)) - { - // generate NOP iml instead of BL macro (this assures that segment PPC range remains intact) - PPCRecompilerImlGen_generateNewInstruction_noOp(ppcImlGenContext, NULL); - //forceLog_printf("Inline func 0x%08x at %08x", jumpAddressDest, ppcImlGenContext->ppcAddressOfCurrentInstruction); - uint32* prevInstructionPtr = ppcImlGenContext->currentInstruction; - ppcImlGenContext->currentInstruction = (uint32*)memory_getPointerFromVirtualOffset(jumpAddressDest); - PPCRecompiler_generateInlinedCode(ppcImlGenContext, jumpAddressDest, inlineFuncInstructionCount); - ppcImlGenContext->currentInstruction = prevInstructionPtr; - return true; - } - // generate funtion call instructions - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch, IMLREG_INVALID); return true; } // is jump destination within recompiled function? - if( jumpAddressDest >= ppcImlGenContext->functionRef->ppcAddress && jumpAddressDest < (ppcImlGenContext->functionRef->ppcAddress + ppcImlGenContext->functionRef->ppcSize) ) - { - // generate instruction - PPCRecompilerImlGen_generateNewInstruction_jump(ppcImlGenContext, NULL, jumpAddressDest); - } + if (ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest)) + ppcImlGenContext->emitInst().make_jump(); else - { - // todo: Inline this jump destination if possible (in many cases it's a bunch of GPR/FPR store instructions + BLR) - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - } + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch, IMLREG_INVALID); return true; } bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + PPCIMLGen_AssertIfNotLastSegmentInstruction(*ppcImlGenContext); + uint32 BO, BI, BD; PPC_OPC_TEMPL_B(opcode, BO, BI, BD); + Espresso::BOField boField(BO); + uint32 crRegister = BI/4; uint32 crBit = BI%4; uint32 jumpCondition = 0; @@ -682,6 +649,10 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) bool decrementerMustBeZero = (BO&2)!=0; // bit set -> branch if CTR = 0, bit not set -> branch if CTR != 0 bool ignoreCondition = (BO&16)!=0; + IMLReg regCRBit; + if (!ignoreCondition) + regCRBit = _GetRegCR(ppcImlGenContext, crRegister, crBit); + uint32 jumpAddressDest = BD; if( (opcode&PPC_OPC_AA) == 0 ) { @@ -690,37 +661,15 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) if( opcode&PPC_OPC_LK ) { + if (useDecrementer) + return false; // conditional function calls are not supported if( ignoreCondition == false ) { - // generate jump condition - if( conditionMustBeTrue ) - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; - } - else - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_L; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_G; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_E; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; - } - // generate instruction - //PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, 0); - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, !conditionMustBeTrue); - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4); + PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; + IMLSegment* blSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); + ppcImlGenContext->emitInst().make_conditional_jump(regCRBit, conditionMustBeTrue); + blSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_BL, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch, IMLREG_INVALID); return true; } return false; @@ -730,12 +679,11 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { if( ignoreCondition == false ) return false; // not supported for the moment - uint32 ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR, false); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_SUB, ctrRegister, 1, 0, false, false, PPCREC_CR_REG_TEMP, PPCREC_CR_MODE_ARITHMETIC); - if( decrementerMustBeZero ) - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, jumpAddressDest, PPCREC_JUMP_CONDITION_E, PPCREC_CR_REG_TEMP, 0, false); - else - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, jumpAddressDest, PPCREC_JUMP_CONDITION_NE, PPCREC_CR_REG_TEMP, 0, false); + IMLReg ctrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_CTR); + IMLReg tmpBoolReg = _GetRegTemporaryS8(ppcImlGenContext, 1); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_SUB, ctrRegister, ctrRegister, 1); + ppcImlGenContext->emitInst().make_compare_s32(ctrRegister, 0, tmpBoolReg, decrementerMustBeZero ? IMLCondition::EQ : IMLCondition::NEQ); + ppcImlGenContext->emitInst().make_conditional_jump(tmpBoolReg, true); return true; } else @@ -743,219 +691,90 @@ bool PPCRecompilerImlGen_BC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) if( ignoreCondition ) { // branch always, no condition and no decrementer - debugBreakpoint(); - crRegister = PPC_REC_INVALID_REGISTER; // not necessary but lets optimizer know we dont care for cr register on this instruction + // not supported + return false; } else { - // generate jump condition - if( conditionMustBeTrue ) - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; - } - else - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_L; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_G; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_E; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; - } - - if (jumpAddressDest >= ppcImlGenContext->functionRef->ppcAddress && jumpAddressDest < (ppcImlGenContext->functionRef->ppcAddress + ppcImlGenContext->functionRef->ppcSize)) + if (ppcImlGenContext->boundaryTracker->ContainsAddress(jumpAddressDest)) { // near jump - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, jumpAddressDest, jumpCondition, crRegister, crBit, conditionMustBeTrue); + ppcImlGenContext->emitInst().make_conditional_jump(regCRBit, conditionMustBeTrue); } else { // far jump - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4, jumpCondition, crRegister, crBit, !conditionMustBeTrue); - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_B_FAR, ppcImlGenContext->ppcAddressOfCurrentInstruction, jumpAddressDest, ppcImlGenContext->cyclesSinceLastBranch); - PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4); + debug_printf("PPCRecompilerImlGen_BC(): Far jump not supported yet"); + return false; } } } return true; } -bool PPCRecompilerImlGen_BCLR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +// BCCTR or BCLR +bool PPCRecompilerImlGen_BCSPR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 sprReg) { - uint32 BO, BI, BD; - PPC_OPC_TEMPL_XL(opcode, BO, BI, BD); + PPCIMLGen_AssertIfNotLastSegmentInstruction(*ppcImlGenContext); + Espresso::BOField BO; + uint32 BI; + bool LK; + Espresso::decodeOp_BCSPR(opcode, BO, BI, LK); uint32 crRegister = BI/4; uint32 crBit = BI%4; - uint32 jumpCondition = 0; + IMLReg regCRBit; + if (!BO.conditionIgnore()) + regCRBit = _GetRegCR(ppcImlGenContext, crRegister, crBit); - bool conditionMustBeTrue = (BO&8)!=0; - bool useDecrementer = (BO&4)==0; // bit not set -> decrement - bool decrementerMustBeZero = (BO&2)!=0; // bit set -> branch if CTR = 0, bit not set -> branch if CTR != 0 - bool ignoreCondition = (BO&16)!=0; - bool saveLR = (opcode&PPC_OPC_LK)!=0; - // since we skip this instruction if the condition is true, we need to invert the logic - bool invertedConditionMustBeTrue = !conditionMustBeTrue; - if( useDecrementer ) + IMLReg branchDestReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + sprReg); + if (LK) { - cemu_assert_debug(false); - return false; // unsupported + if (sprReg == SPR_LR) + { + // if the branch target is LR, then preserve it in a temporary + cemu_assert_suspicious(); // this case needs testing + IMLReg tmpRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, tmpRegister, branchDestReg); + branchDestReg = tmpRegister; + } + IMLReg registerLR = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_LR); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, registerLR, ppcImlGenContext->ppcAddressOfCurrentInstruction + 4); + } + + if (!BO.decrementerIgnore()) + { + cemu_assert_unimplemented(); + return false; + } + else if (!BO.conditionIgnore()) + { + // no decrementer but CR check + cemu_assert_debug(ppcImlGenContext->currentBasicBlock->hasContinuedFlow); + cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); + PPCBasicBlockInfo* currentBasicBlock = ppcImlGenContext->currentBasicBlock; + IMLSegment* bctrSeg = PPCIMLGen_CreateNewSegmentAsBranchTarget(*ppcImlGenContext, *currentBasicBlock); + ppcImlGenContext->emitInst().make_conditional_jump(regCRBit, !BO.conditionInverted()); + bctrSeg->AppendInstruction()->make_macro(PPCREC_IML_MACRO_B_TO_REG, 0, 0, 0, branchDestReg); } else { - if( ignoreCondition ) - { - // store LR - if( saveLR ) - { - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BLRL, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4); - } - else - { - // branch always, no condition and no decrementer - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - } - } - else - { - // store LR - if( saveLR ) - { - uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - } - // generate jump condition - if( invertedConditionMustBeTrue ) - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_L; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_G; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_E; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; - } - else - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; - } - // jump if BCLR condition NOT met (jump to jumpmark of next instruction, essentially skipping current instruction) - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, invertedConditionMustBeTrue); - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BLR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - } - } - return true; -} - -bool PPCRecompilerImlGen_BCCTR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - uint32 BO, BI, BD; - PPC_OPC_TEMPL_XL(opcode, BO, BI, BD); - - uint32 crRegister = BI/4; - uint32 crBit = BI%4; - - uint32 jumpCondition = 0; - - bool conditionMustBeTrue = (BO&8)!=0; - bool useDecrementer = (BO&4)==0; // bit not set -> decrement - bool decrementerMustBeZero = (BO&2)!=0; // bit set -> branch if CTR = 0, bit not set -> branch if CTR != 0 - bool ignoreCondition = (BO&16)!=0; - bool saveLR = (opcode&PPC_OPC_LK)!=0; - // since we skip this instruction if the condition is true, we need to invert the logic - bool invertedConditionMustBeTrue = !conditionMustBeTrue; - if( useDecrementer ) - { - assert_dbg(); - // if added, dont forget inverted logic - debug_printf("Rec: BCLR unsupported decrementer\n"); - return false; // unsupported - } - else - { - if( ignoreCondition ) - { - // store LR - if( saveLR ) - { - uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BCTRL, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - PPCRecompilerImlGen_generateNewInstruction_ppcEnter(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4); - } - else - { - // branch always, no condition and no decrementer - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BCTR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - } - } - else - { - // store LR - if( saveLR ) - { - uint32 registerLR = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_SPR0+SPR_LR); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerLR, (ppcImlGenContext->ppcAddressOfCurrentInstruction+4)&0x7FFFFFFF, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - } - // generate jump condition - if( invertedConditionMustBeTrue ) - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_L; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_G; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_E; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_SUMMARYOVERFLOW; - } - else - { - if( crBit == 0 ) - jumpCondition = PPCREC_JUMP_CONDITION_GE; - else if( crBit == 1 ) - jumpCondition = PPCREC_JUMP_CONDITION_LE; - else if( crBit == 2 ) - jumpCondition = PPCREC_JUMP_CONDITION_NE; - else if( crBit == 3 ) - jumpCondition = PPCREC_JUMP_CONDITION_NSUMMARYOVERFLOW; - } - // jump if BCLR condition NOT met (jump to jumpmark of next instruction, essentially skipping current instruction) - PPCRecompilerImlGen_generateNewInstruction_conditionalJump(ppcImlGenContext, ppcImlGenContext->ppcAddressOfCurrentInstruction+4, jumpCondition, crRegister, crBit, invertedConditionMustBeTrue); - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_BCTR, ppcImlGenContext->ppcAddressOfCurrentInstruction, 0, ppcImlGenContext->cyclesSinceLastBranch); - } + // branch always, no condition and no decrementer check + cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasContinuedFlow); + cemu_assert_debug(!ppcImlGenContext->currentBasicBlock->hasBranchTarget); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_B_TO_REG, 0, 0, 0, branchDestReg); } return true; } bool PPCRecompilerImlGen_ISYNC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - // does not need to be translated return true; } bool PPCRecompilerImlGen_SYNC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - // does not need to be translated return true; } @@ -963,102 +782,12 @@ bool PPCRecompilerImlGen_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - //hCPU->gpr[rD] = (int)hCPU->gpr[rA] + (int)hCPU->gpr[rB]; - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD, registerRD, registerRA, registerRB, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD, registerRD, registerRA, registerRB); - } - return true; -} - -bool PPCRecompilerImlGen_ADDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rD, rA, rB; - PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - //hCPU->gpr[rD] = (int)hCPU->gpr[rA] + (int)hCPU->gpr[rB]; -> Update carry - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, registerRB, 0, PPCREC_CR_MODE_LOGICAL); - else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, registerRB); - return true; -} - -bool PPCRecompilerImlGen_ADDE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rD, rA, rB; - PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - // hCPU->gpr[rD] = hCPU->gpr[rA] + hCPU->gpr[rB] + ca; - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA, 0, PPCREC_CR_MODE_LOGICAL); - else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA); - return true; -} - -bool PPCRecompilerImlGen_ADDZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rD, rA, rB; - PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - PPC_ASSERT(rB == 0); - //uint32 a = hCPU->gpr[rA]; - //uint32 ca = hCPU->xer_ca; - //hCPU->gpr[rD] = a + ca; - - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - // move rA to rD - if( registerRA != registerRD ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, registerRD, registerRA); - } - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD_CARRY, registerRD, registerRD, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD_CARRY, registerRD, registerRD); - } - return true; -} - -bool PPCRecompilerImlGen_ADDME(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rD, rA, rB; - PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - PPC_ASSERT(rB == 0); - //uint32 a = hCPU->gpr[rA]; - //uint32 ca = hCPU->xer_ca; - //hCPU->gpr[rD] = a + ca + -1; - - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - // move rA to rD - if( registerRA != registerRD ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, registerRD, registerRA); - } - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD_CARRY_ME, registerRD, registerRD, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD_CARRY_ME, registerRD, registerRD); - } + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regD, regA, regB); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } @@ -1067,22 +796,16 @@ bool PPCRecompilerImlGen_ADDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - //hCPU->gpr[rD] = (rA ? (int)hCPU->gpr[rA] : 0) + (int)imm; - if( rA != 0 ) + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + if (rA != 0) { - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if rD is already loaded, else use new temporary register - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, registerRD, registerRA, imm); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, regD, regA, imm); } else { - // rA not used, instruction is value assignment - // rD = imm - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regD, imm); } - // never updates any cr return true; } @@ -1091,49 +814,88 @@ bool PPCRecompilerImlGen_ADDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco int rD, rA; uint32 imm; PPC_OPC_TEMPL_D_Shift16(opcode, rD, rA, imm); - if( rA != 0 ) + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + if (rA != 0) { - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if rD is already loaded, else use new temporary register - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, registerRD, registerRA, (sint32)imm); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, regD, regA, (sint32)imm); } else { - // rA not used, instruction turns into simple value assignment - // rD = imm - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, registerRD, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regD, (sint32)imm); } - // never updates any cr return true; } -bool PPCRecompilerImlGen_ADDIC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_ADDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + // r = a + b -> update carry + sint32 rD, rA, rB; + PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); + IMLReg regRA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regRB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regRD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD, regRD, regRA, regRB, regCa); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regRD); + return true; +} + +bool PPCRecompilerImlGen_ADDIC_(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool updateCR0) { sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - // rD = rA + imm; - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if rD is already loaded, else use new temporary register - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, imm); - // never updates any cr + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD, regD, regA, (sint32)imm, regCa); + if(updateCR0) + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } -bool PPCRecompilerImlGen_ADDIC_(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_ADDE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - // this opcode is identical to ADDIC but additionally it updates CR0 - sint32 rD, rA; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - // rD = rA + imm; - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if rD is already loaded, else use new temporary register - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD_UPDATE_CARRY, registerRD, registerRA, imm, 0, PPCREC_CR_MODE_LOGICAL); + // r = a + b + carry -> update carry + sint32 rD, rA, rB; + PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); + IMLReg regRA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regRB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regRD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, regRB, regCa); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regRD); + return true; +} + +bool PPCRecompilerImlGen_ADDZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + // r = a + carry -> update carry + sint32 rD, rA, rB; + PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); + IMLReg regRA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regRD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, 0, regCa); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regRD); + return true; +} + +bool PPCRecompilerImlGen_ADDME(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + // r = a + 0xFFFFFFFF + carry -> update carry + sint32 rD, rA, rB; + PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); + IMLReg regRA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regRD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regRD, regRA, -1, regCa); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regRD); return true; } @@ -1141,74 +903,79 @@ bool PPCRecompilerImlGen_SUBF(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - // hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1; - // rD = rB - rA - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SUB, registerRD, registerRB, registerRA, 0, PPCREC_CR_MODE_LOGICAL); - else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SUB, registerRD, registerRB, registerRA); + // rD = ~rA + rB + 1 + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SUB, regD, regB, regA); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } bool PPCRecompilerImlGen_SUBFE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // d = ~a + b + ca; sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - // hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + ca; - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA, 0, PPCREC_CR_MODE_LOGICAL); - else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRB, registerRA); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); + ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, regB, regCa); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } bool PPCRecompilerImlGen_SUBFZE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // d = ~a + ca; sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - if( rB != 0 ) - debugBreakpoint(); - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRA, 0, PPCREC_CR_MODE_LOGICAL); - else - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY, registerRD, registerRA); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); + ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, 0, regCa); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } bool PPCRecompilerImlGen_SUBFC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // d = ~a + b + 1; sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - // hCPU->gpr[rD] = ~hCPU->gpr[rA] + hCPU->gpr[rB] + 1; - // rD = rB - rA - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SUBFC, registerRD, registerRA, registerRB); - if (opcode & PPC_OPC_RC) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, registerRD, registerRD, 0, PPCREC_CR_MODE_LOGICAL); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCa, 1); // set input carry to simulate offset of 1 + ppcImlGenContext->emitInst().make_r_r_r_carry(PPCREC_IML_OP_ADD_WITH_CARRY, regD, regTmp, regB, regCa); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } bool PPCRecompilerImlGen_SUBFIC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // d = ~a + imm + 1 sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - //uint32 a = hCPU->gpr[rA]; - //hCPU->gpr[rD] = ~a + imm + 1; - // cr0 is never affected - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_SUBFC, registerRD, registerRA, imm); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regCa = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); + IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regA); + ppcImlGenContext->emitInst().make_r_r_s32_carry(PPCREC_IML_OP_ADD, regD, regTmp, (sint32)imm + 1, regCa); return true; } @@ -1217,10 +984,9 @@ bool PPCRecompilerImlGen_MULLI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco int rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - // mulli instruction does not modify any flags - uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); - uint32 registerOperand = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand, (sint32)imm); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_MULTIPLY_SIGNED, regD, regA, (sint32)imm); return true; } @@ -1228,18 +994,16 @@ bool PPCRecompilerImlGen_MULLW(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - //hCPU->gpr[rD] = hCPU->gpr[rA] * hCPU->gpr[rB]; - uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); - uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); if (opcode & PPC_OPC_OE) { return false; } - if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_LOGICAL); - else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_SIGNED, registerResult, registerOperand1, registerOperand2); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_SIGNED, regD, regA, regB); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } @@ -1247,14 +1011,12 @@ bool PPCRecompilerImlGen_MULHW(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - //hCPU->gpr[rD] = ((sint64)(sint32)hCPU->gpr[rA] * (sint64)(sint32)hCPU->gpr[rB])>>32; - uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); - uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_LOGICAL); - else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, registerResult, registerOperand1, registerOperand2); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED, regD, regA, regB); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } @@ -1262,14 +1024,12 @@ bool PPCRecompilerImlGen_MULHWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - //hCPU->gpr[rD] = (hCPU->gpr[rA] * hCPU->gpr[rB])>>32; - uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); - uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_LOGICAL); - else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, registerResult, registerOperand1, registerOperand2); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED, regD, regA, regB); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } @@ -1277,18 +1037,12 @@ bool PPCRecompilerImlGen_DIVW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - // hCPU->gpr[rD] = (sint32)a / (sint32)b; - uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); - uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + IMLReg regR = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_SIGNED, regR, regA, regB); if (opcode & PPC_OPC_RC) - { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_DIVIDE_SIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_ARITHMETIC); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_DIVIDE_SIGNED, registerResult, registerOperand1, registerOperand2); - } + PPCImlGen_UpdateCR0(ppcImlGenContext, regR); return true; } @@ -1296,84 +1050,66 @@ bool PPCRecompilerImlGen_DIVWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - // hCPU->gpr[rD] = (uint32)a / (uint32)b; - uint32 registerResult = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); - uint32 registerOperand1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerOperand2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_DIVIDE_UNSIGNED, regD, regA, regB); if (opcode & PPC_OPC_RC) - { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_DIVIDE_UNSIGNED, registerResult, registerOperand1, registerOperand2, 0, PPCREC_CR_MODE_ARITHMETIC); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_DIVIDE_UNSIGNED, registerResult, registerOperand1, registerOperand2); - } + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } bool PPCRecompilerImlGen_RLWINM(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - int rS, rA, SH, MB, ME; + sint32 rS, rA, SH, MB, ME; PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME); uint32 mask = ppc_mask(MB, ME); - //uint32 v = ppc_word_rotl(hCPU->gpr[rS], SH); - //hCPU->gpr[rA] = v & mask; - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // handle special forms of RLWINM - if( SH == 0 && SH == (ME-SH) && MB == 0 ) - { - // CLRRWI - // todo - } - else if( ME == (31-SH) && MB == 0 ) + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + if( ME == (31-SH) && MB == 0 ) { // SLWI - if(opcode&PPC_OPC_RC) - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_SHIFT, registerRA, registerRS, SH, 0, PPCREC_CR_MODE_LOGICAL); - else - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_SHIFT, registerRA, registerRS, SH); - return true; + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, regA, regS, SH); } else if( SH == (32-MB) && ME == 31 ) { // SRWI - if(opcode&PPC_OPC_RC) - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_RIGHT_SHIFT, registerRA, registerRS, MB, 0, PPCREC_CR_MODE_LOGICAL); - else - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_RIGHT_SHIFT, registerRA, registerRS, MB); - return true; - } - // general handler - if( registerRA != registerRS ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, registerRA, registerRS); - if( SH != 0 ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_ROTATE, registerRA, SH, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - if(opcode&PPC_OPC_RC) - { - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, false, false, 0, PPCREC_CR_MODE_LOGICAL); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, regA, regS, MB); } else { - if( mask != 0xFFFFFFFF ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + // general handler + if (rA != rS) + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regA, regS); + if (SH != 0) + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_LEFT_ROTATE, regA, SH); + if (mask != 0xFFFFFFFF) + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regA, regA, (sint32)mask); } + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } bool PPCRecompilerImlGen_RLWIMI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - int rS, rA, SH, MB, ME; + sint32 rS, rA, SH, MB, ME; PPC_OPC_TEMPL_M(opcode, rS, rA, SH, MB, ME); - - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // pack RLWIMI parameters into single integer - uint32 vImm = MB|(ME<<8)|(SH<<16); - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_RLWIMI, registerRA, registerRS, (sint32)vImm, PPC_REC_INVALID_REGISTER, 0); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regR = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0); + uint32 mask = ppc_mask(MB, ME); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regTmp, regS); + if (SH) + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_LEFT_ROTATE, regTmp, SH); + if (mask != 0) + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regR, regR, (sint32)~mask); + if (mask != 0xFFFFFFFF) + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmp, regTmp, (sint32)mask); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regR, regR, regTmp); if (opcode & PPC_OPC_RC) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, registerRA, registerRA, 0, PPCREC_CR_MODE_LOGICAL); + PPCImlGen_UpdateCR0(ppcImlGenContext, regR); return true; } @@ -1381,61 +1117,61 @@ bool PPCRecompilerImlGen_RLWNM(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 rS, rA, rB, MB, ME; PPC_OPC_TEMPL_M(opcode, rS, rA, rB, MB, ME); - // uint32 v = ppc_word_rotl(hCPU->gpr[rS], hCPU->gpr[rB]); uint32 mask = ppc_mask(MB, ME); - // uint32 v = ppc_word_rotl(hCPU->gpr[rS], hCPU->gpr[rB]); - // hCPU->gpr[rA] = v & mask; - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_LEFT_ROTATE, registerRA, registerRS, registerRB); + IMLReg regS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_LEFT_ROTATE, regA, regS, regB); + if( mask != 0xFFFFFFFF ) + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regA, regA, (sint32)mask); if (opcode & PPC_OPC_RC) - { - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 32, false, false, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - if( mask != 0xFFFFFFFF ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, registerRA, (sint32)mask, 32, false, false, PPC_REC_INVALID_REGISTER, 0); - } + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } bool PPCRecompilerImlGen_SRAW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { + // unlike SRAWI, for SRAW the shift range is 0-63 (masked to 6 bits) + // but only shifts up to register bitwidth minus one are well defined in IML so this requires special handling for shifts >= 32 sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - //uint32 SH = hCPU->gpr[rB] & 0x3f; - //hCPU->gpr[rA] = hCPU->gpr[rS]; - //hCPU->xer_ca = 0; - //if (hCPU->gpr[rA] & 0x80000000) { - // uint32 ca = 0; - // for (uint32 i=0; i < SH; i++) { - // if (hCPU->gpr[rA] & 1) ca = 1; - // hCPU->gpr[rA] >>= 1; - // hCPU->gpr[rA] |= 0x80000000; - // } - // if (ca) hCPU->xer_ca = 1; - //} else { - // if (SH > 31) { - // hCPU->gpr[rA] = 0; - // } else { - // hCPU->gpr[rA] >>= SH; - // } - //} - //if (Opcode & PPC_OPC_RC) { - // // update cr0 flags - // ppc_update_cr0(hCPU, hCPU->gpr[rA]); - //} + IMLReg regS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + IMLReg regA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg regCarry = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_XER_CA); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( (opcode&PPC_OPC_RC) != 0 ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SRAW, registerRA, registerRS, registerRB, 0, PPCREC_CR_MODE_LOGICAL); - else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SRAW, registerRA, registerRS, registerRB); + IMLReg regTmpShiftAmount = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + IMLReg regTmpCondBool = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); + IMLReg regTmp1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2); + IMLReg regTmp2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3); + + // load masked shift factor into temporary register + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmpShiftAmount, regB, 0x3F); + ppcImlGenContext->emitInst().make_compare_s32(regTmpShiftAmount, 31, regTmpCondBool, IMLCondition::UNSIGNED_GT); + ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, true); + + PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, + [&](ppcImlGenContext_t& genCtx) + { + /* branch taken, shift size 32 or above */ + genCtx.emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, 31); // shift the sign bit into all the bits + genCtx.emitInst().make_compare_s32(regA, 0, regCarry, IMLCondition::NEQ); + }, + [&](ppcImlGenContext_t& genCtx) + { + /* branch not taken, shift size below 32 */ + genCtx.emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regTmp1, regS, 31); // signMask = input >> 31 (arithmetic shift) + genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regTmp2, 1); // shiftMask = ((1<emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regTmp, regS, 31); // signMask = input >> 31 (arithmetic shift) + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regTmp, regTmp, regS); // testValue = input & signMask & ((1<emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regTmp, regTmp, ((1 << SH) - 1)); + ppcImlGenContext->emitInst().make_compare_s32(regTmp, 0, regCarry, IMLCondition::NEQ); // ca = (testValue != 0) + // do the actual shift + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, regA, regS, (sint32)SH); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1459,17 +1204,12 @@ bool PPCRecompilerImlGen_SLW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if (opcode & PPC_OPC_RC) - { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SLW, registerRA, registerRS, registerRB, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SLW, registerRA, registerRS, registerRB, PPC_REC_INVALID_REGISTER, 0); - } + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SLW, regA, regS, regB); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1477,37 +1217,24 @@ bool PPCRecompilerImlGen_SRW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); - uint32 registerRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_SRW, regA, regS, regB); if (opcode & PPC_OPC_RC) - { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SRW, registerRA, registerRS, registerRB, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_SRW, registerRA, registerRS, registerRB, PPC_REC_INVALID_REGISTER, 0); - } + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } - bool PPCRecompilerImlGen_EXTSH(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - PPC_ASSERT(rB==0); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if ( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN_S16_TO_S32, registerRA, registerRS, 0, PPCREC_CR_MODE_ARITHMETIC); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN_S16_TO_S32, registerRA, registerRS); - } + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S16_TO_S32, regA, regS); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1515,16 +1242,11 @@ bool PPCRecompilerImlGen_EXTSB(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if ( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN_S8_TO_S32, registerRA, registerRS, 0, PPCREC_CR_MODE_ARITHMETIC); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN_S8_TO_S32, registerRA, registerRS); - } + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN_S8_TO_S32, regA, regS); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1532,30 +1254,11 @@ bool PPCRecompilerImlGen_CNTLZW(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - PPC_ASSERT(rB==0); - if( opcode&PPC_OPC_RC ) - { - return false; - } - uint32 registerRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); - uint32 registerRA = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_CNTLZW, registerRA, registerRS); - - //uint32 n=0; - //uint32 x=0x80000000; - //uint32 v=hCPU->gpr[rS]; - //while (!(v & x)) { - // n++; - // if (n==32) break; - // x>>=1; - //} - //hCPU->gpr[rA] = n; - //if (Opcode & PPC_OPC_RC) { - // // update cr0 flags - // ppc_update_cr0(hCPU, hCPU->gpr[rA]); - //} - - + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_CNTLZW, regA, regS); + if ((opcode & PPC_OPC_RC)) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -1563,438 +1266,124 @@ bool PPCRecompilerImlGen_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { sint32 rD, rA, rB; PPC_OPC_TEMPL_XO(opcode, rD, rA, rB); - PPC_ASSERT(rB == 0); - //hCPU->gpr[rD] = -((signed int)hCPU->gpr[rA]); - //if (Opcode & PPC_OPC_RC) { - // // update cr0 flags - // ppc_update_cr0(hCPU, hCPU->gpr[rD]); - //} - uint32 registerRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 registerRD = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NEG, registerRD, registerRA, 0, PPCREC_CR_MODE_ARITHMETIC); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NEG, registerRD, registerRA); - } + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NEG, regD, regA); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regD); return true; } -void PPCRecompilerImlGen_LWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_LOAD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool signExtend, bool isBigEndian, bool updateAddrReg) { int rA, rD; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, imm, 32, false, true); -} - -void PPCRecompilerImlGen_LWZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 32, false, true); -} - -void PPCRecompilerImlGen_LHA(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new temporary register - // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, imm, 16, true, true); -} - -void PPCRecompilerImlGen_LHAU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new temporary register - // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 16, true, true); -} - -void PPCRecompilerImlGen_LHZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - // note: Darksiders 2 has this instruction form but it is never executed. - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new temporary register - // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, imm, 16, false, true); -} - -void PPCRecompilerImlGen_LHZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new temporary register - // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 16, false, true); -} - -void PPCRecompilerImlGen_LBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load byte - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, imm, 8, false, true); -} - -void PPCRecompilerImlGen_LBZU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load byte - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 8, false, true); -} - -bool PPCRecompilerImlGen_LWZX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - if( rA == 0 ) - { - return false; - } - // hCPU->gpr[rD] = memory_readU8((rA?hCPU->gpr[rA]:0)+hCPU->gpr[rB]); - // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load word - PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 32, false, true); - return true; -} - -bool PPCRecompilerImlGen_LWZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - if( rA == 0 ) - { - return false; - } - // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // add rB to rA - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB); - // load word - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterA, 0, 32, false, true); - return true; -} - -bool PPCRecompilerImlGen_LWBRX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - // load memory rA and rB into register - uint32 gprRegisterA = 0; - if( rA ) - gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD); - if (destinationRegister == PPC_REC_INVALID_REGISTER) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0 + rD); // else just create new register - // load word - if( rA ) - PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 32, false, false); - else - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterB, 0, 32, false, false); - return true; -} - -bool PPCRecompilerImlGen_LHAX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return true; - } - // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load half word - PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 16, true, true); - return true; -} - -bool PPCRecompilerImlGen_LHAUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return true; - } - // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // add rB to rA - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB); - // load half word - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterA, 0, 16, true, true); - return true; -} - -bool PPCRecompilerImlGen_LHZX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return true; - } - // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load half word - PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 16, false, true); - return true; -} - -bool PPCRecompilerImlGen_LHZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return true; - } - // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // add rB to rA - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB); - // load hald word - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterA, 0, 16, false, true); - return true; -} - -void PPCRecompilerImlGen_LHBRX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - // load memory rA and rB into register - uint32 gprRegisterA = rA != 0 ? PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false) : 0; - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD); - if (destinationRegister == PPC_REC_INVALID_REGISTER) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0 + rD); // else just create new register - // load half word (little-endian) - if (rA == 0) - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterB, 0, 16, false, false); - else - PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 16, false, false); -} - -bool PPCRecompilerImlGen_LBZX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - if( rA == 0 ) - { - // special case where rA is ignored and only rB is used - return false; - } - // hCPU->gpr[rD] = memory_readU8((rA?hCPU->gpr[rA]:0)+hCPU->gpr[rB]); - // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load byte - PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 8, false, true); - return true; -} - -bool PPCRecompilerImlGen_LBZUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rD, rB; - PPC_OPC_TEMPL_X(opcode, rD, rA, rB); + IMLReg regMemAddr; if (rA == 0) { - // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return true; + if (updateAddrReg) + return false; // invalid instruction form + regMemAddr = _GetRegTemporary(ppcImlGenContext, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemAddr, 0); } - // load memory rA and rB into register - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0 + rD); - if (destinationRegister == PPC_REC_INVALID_REGISTER) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0 + rD); // else just create new register - // add rB to rA - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB); - // load byte - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterA, 0, 8, false, true); + else + { + if (updateAddrReg && rA == rD) + return false; // invalid instruction form + regMemAddr = _GetRegGPR(ppcImlGenContext, rA); + } + if (updateAddrReg) + { + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, regMemAddr, regMemAddr, (sint32)imm); + imm = 0; + } + IMLReg regDst = _GetRegGPR(ppcImlGenContext, rD); + ppcImlGenContext->emitInst().make_r_memory(regDst, regMemAddr, (sint32)imm, bitWidth, signExtend, isBigEndian); return true; } -bool PPCRecompilerImlGen_LWARX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +void PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool signExtend, bool isBigEndian, bool updateAddrReg) { + // if rA == rD, then the EA wont be stored to rA. We could set updateAddrReg to false in such cases but the end result is the same since the loaded value would overwrite rA sint32 rA, rD, rB; PPC_OPC_TEMPL_X(opcode, rD, rA, rB); - // load memory rA and rB into register - uint32 gprRegisterA = rA != 0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false):0; - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load word - if( rA != 0 ) - PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, PPC_REC_LOAD_LWARX_MARKER, false, true); + updateAddrReg = updateAddrReg && (rA != 0); + IMLReg regA = rA != 0 ? _GetRegGPR(ppcImlGenContext, rA) : IMLREG_INVALID; + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regDst = _GetRegGPR(ppcImlGenContext, rD); + if (updateAddrReg) + { + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regA, regA, regB); + // use single register addressing + regB = regA; + regA = IMLREG_INVALID; + } + if(regA.IsValid()) + PPCRecompilerImlGen_generateNewInstruction_r_memory_indexed(ppcImlGenContext, regDst, regA, regB, bitWidth, signExtend, isBigEndian); else - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegisterB, 0, PPC_REC_LOAD_LWARX_MARKER, false, true); + ppcImlGenContext->emitInst().make_r_memory(regDst, regB, 0, bitWidth, signExtend, isBigEndian); +} + +bool PPCRecompilerImlGen_STORE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool isBigEndian, bool updateAddrReg) +{ + int rA, rD; + uint32 imm; + PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); + IMLReg regA; + if (rA != 0) + { + regA = _GetRegGPR(ppcImlGenContext, rA); + } + else + { + if (updateAddrReg) + return false; // invalid instruction form + regA = _GetRegTemporary(ppcImlGenContext, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regA, 0); + } + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); + if (updateAddrReg) + { + if (rD == rA) + { + // make sure to keep source data intact + regD = _GetRegTemporary(ppcImlGenContext, 0); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regD, regA); + } + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, regA, regA, (sint32)imm); + imm = 0; + } + ppcImlGenContext->emitInst().make_memory_r(regD, regA, (sint32)imm, bitWidth, isBigEndian); + return true; +} + +bool PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 bitWidth, bool isBigEndian, bool updateAddrReg) +{ + sint32 rA, rS, rB; + PPC_OPC_TEMPL_X(opcode, rS, rA, rB); + IMLReg regA = rA != 0 ? _GetRegGPR(ppcImlGenContext, rA) : IMLREG_INVALID; + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regSrc = _GetRegGPR(ppcImlGenContext, rS); + if (updateAddrReg) + { + if(rA == 0) + return false; // invalid instruction form + if (regSrc == regA) + { + // make sure to keep source data intact + regSrc = _GetRegTemporary(ppcImlGenContext, 0); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regSrc, regA); + } + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regA, regA, regB); + // use single register addressing + regB = regA; + regA = IMLREG_INVALID; + } + if (regA.IsInvalid()) + ppcImlGenContext->emitInst().make_memory_r(regSrc, regB, 0, bitWidth, isBigEndian); + else + PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, regSrc, regA, regB, bitWidth, false, isBigEndian); return true; } @@ -2003,257 +1392,33 @@ void PPCRecompilerImlGen_LMW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode sint32 rD, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - //uint32 ea = (rA ? hCPU->gpr[rA] : 0) + imm; + cemu_assert_debug(rA != 0); sint32 index = 0; - while( rD <= 31 ) + while (rD <= 31) { - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regD = _GetRegGPR(ppcImlGenContext, rD); // load word - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, imm+index*4, 32, false, true); + ppcImlGenContext->emitInst().make_r_memory(regD, regA, (sint32)imm + index * 4, 32, false, true); // next rD++; index++; } } -void PPCRecompilerImlGen_STW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - // note: Darksiders 2 has this instruction form but it is never executed. - //PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister - // store word - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, imm, 32, true); -} - -void PPCRecompilerImlGen_STWU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // store&update instructions where rD==rA store the register contents without added imm, therefore we need to handle it differently - // get memory gpr register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // get source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister - // add imm to memory register early if possible - if( rD != rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - // store word - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 32, true); - // add imm to memory register late if we couldn't do it early - if( rD == rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); -} - -void PPCRecompilerImlGen_STH(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister - // load half - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, imm, 16, true); -} - -void PPCRecompilerImlGen_STHU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // get memory gpr register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // get source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister - // add imm to memory register early if possible - if( rD != rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - // store word - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 16, true); - // add imm to memory register late if we couldn't do it early - if( rD == rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); -} - -void PPCRecompilerImlGen_STB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rS; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rS, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister - // store byte - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, imm, 8, true); -} - -void PPCRecompilerImlGen_STBU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, rD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, rD, rA, imm); - if( rA == 0 ) - { - // special form where gpr is ignored and only imm is used - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_DEBUGBREAK, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->ppcAddressOfCurrentInstruction, ppcImlGenContext->cyclesSinceLastBranch); - return; - } - // get memory gpr register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // get source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rD, false); // can be the same as gprRegister - // add imm to memory register early if possible - if( rD != rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - // store byte - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, (rD==rA)?imm:0, 8, true); - // add imm to memory register late if we couldn't do it early - if( rD == rA ) - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); -} - -// generic indexed store (STWX, STHX, STBX, STWUX. If bitReversed == true -> STHBRX) -bool PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 storeBitWidth, bool byteReversed = false) -{ - sint32 rA, rS, rB; - PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - // prepare registers - uint32 gprRegisterA; - if(rA != 0) - gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - // store word - if (rA == 0) - { - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, destinationRegister, gprRegisterB, 0, storeBitWidth, !byteReversed); - } - else - PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, storeBitWidth, false, !byteReversed); - return true; -} - -bool PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, uint32 storeBitWidth) -{ - sint32 rA, rS, rB; - PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - if( rA == 0 ) - { - // not supported - return false; - } - if( rS == rA || rS == rB ) - { - // prepare registers - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - // store word - PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, storeBitWidth, false, true); - // update EA after store - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB); - return true; - } - // prepare registers - uint32 gprRegisterA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 sourceRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - // update EA - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegisterA, gprRegisterB); - // store word - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegisterA, 0, storeBitWidth, true); - return true; -} - -bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rS, rB; - PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - // prepare registers - uint32 gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false):0; - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - // store word - if( rA != 0 ) - PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, PPC_REC_STORE_STWCX_MARKER, false, true); - else - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, destinationRegister, gprRegisterB, 0, PPC_REC_STORE_STWCX_MARKER, true); - return true; -} - -bool PPCRecompilerImlGen_STWBRX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rA, rS, rB; - PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - // prepare registers - uint32 gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false):0; - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - uint32 destinationRegister = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - // store word - if( rA != 0 ) - PPCRecompilerImlGen_generateNewInstruction_memory_r_indexed(ppcImlGenContext, destinationRegister, gprRegisterA, gprRegisterB, 32, false, false); - else - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, destinationRegister, gprRegisterB, 0, 32, false); - return true; -} - void PPCRecompilerImlGen_STMW(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 rS, rA; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, rS, rA, imm); + cemu_assert_debug(rA != 0); sint32 index = 0; while( rS <= 31 ) { - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); // store word - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, imm+index*4, 32, true); + ppcImlGenContext->emitInst().make_memory_r(regS, regA, (sint32)imm + index * 4, 32, true); // next rS++; index++; @@ -2266,70 +1431,43 @@ bool PPCRecompilerImlGen_LSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod PPC_OPC_TEMPL_X(opcode, rD, rA, nb); if( nb == 0 ) nb = 32; - if( nb == 4 ) + + if (rA == 0) { - // if nb == 4 this instruction immitates LWZ - if( rA == 0 ) - { -#ifndef PUBLIC_RELEASE - assert_dbg(); // special form where gpr is ignored and only imm is used -#endif - return false; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 32, false, true); - return true; + cemu_assert_unimplemented(); // special form where gpr is ignored and EA is 0 + return false; } - else if( nb == 2 ) + + // potential optimization: On x86 unaligned access is allowed and we could handle the case nb==4 with a single memory read, and nb==2 with a memory read and shift + + IMLReg memReg = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0); + uint32 memOffset = 0; + while (nb > 0) { - // if nb == 2 this instruction immitates a LHZ but the result is shifted left by 16 bits - if( rA == 0 ) - { -#ifndef PUBLIC_RELEASE - assert_dbg(); // special form where gpr is ignored and only imm is used -#endif + if (rD == rA) return false; - } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, 16, false, true); - // shift - PPCRecompilerImlGen_generateNewInstruction_r_r_s32(ppcImlGenContext, PPCREC_IML_OP_LEFT_SHIFT, destinationRegister, destinationRegister, 16); - return true; - } - else if( nb == 3 ) - { - // if nb == 3 this instruction loads a 3-byte big-endian and the result is shifted left by 8 bits - if( rA == 0 ) + cemu_assert(rD < 32); + IMLReg regDst = _GetRegGPR(ppcImlGenContext, rD); + // load bytes one-by-one + for (sint32 b = 0; b < 4; b++) { -#ifndef PUBLIC_RELEASE - assert_dbg(); // special form where gpr is ignored and only imm is used -#endif - return false; + ppcImlGenContext->emitInst().make_r_memory(regTmp, memReg, memOffset + b, 8, false, false); + sint32 shiftAmount = (3 - b) * 8; + if(shiftAmount) + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, regTmp, regTmp, shiftAmount); + if(b == 0) + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regDst, regTmp); + else + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regDst, regDst, regTmp); + nb--; + if (nb == 0) + break; } - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // check if destination register is already loaded - uint32 destinationRegister = PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, PPCREC_NAME_R0+rD); - if( destinationRegister == PPC_REC_INVALID_REGISTER ) - destinationRegister = PPCRecompilerImlGen_getAndLockFreeTemporaryGPR(ppcImlGenContext, PPCREC_NAME_R0+rD); // else just create new register - // load half - PPCRecompilerImlGen_generateNewInstruction_r_memory(ppcImlGenContext, destinationRegister, gprRegister, 0, PPC_REC_STORE_LSWI_3, false, true); - return true; + memOffset += 4; + rD++; } - debug_printf("PPCRecompilerImlGen_LSWI(): Unsupported nb value %d\n", nb); - return false; + return true; } bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2338,38 +1476,111 @@ bool PPCRecompilerImlGen_STSWI(ppcImlGenContext_t* ppcImlGenContext, uint32 opco PPC_OPC_TEMPL_X(opcode, rS, rA, nb); if( nb == 0 ) nb = 32; - if( nb == 4 ) + + IMLReg regMem = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0); + uint32 memOffset = 0; + while (nb > 0) { - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister - // store word - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, 0, 32, true); - return true; + if (rS == rA) + return false; + cemu_assert(rS < 32); + IMLReg regSrc = _GetRegGPR(ppcImlGenContext, rS); + // store bytes one-by-one + for (sint32 b = 0; b < 4; b++) + { + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regTmp, regSrc); + sint32 shiftAmount = (3 - b) * 8; + if (shiftAmount) + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, regTmp, regTmp, shiftAmount); + ppcImlGenContext->emitInst().make_memory_r(regTmp, regMem, memOffset + b, 8, false); + nb--; + if (nb == 0) + break; + } + memOffset += 4; + rS++; } - else if( nb == 2 ) - { - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister - // store half-word (shifted << 16) - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, 0, PPC_REC_STORE_STSWI_2, false); - return true; - } - else if( nb == 3 ) - { - // load memory gpr into register - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // load source register - uint32 sourceRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS, false); // can be the same as gprRegister - // store 3-byte-word (shifted << 8) - PPCRecompilerImlGen_generateNewInstruction_memory_r(ppcImlGenContext, sourceRegister, gprRegister, 0, PPC_REC_STORE_STSWI_3, false); - return true; - } - debug_printf("PPCRecompilerImlGen_STSWI(): Unsupported nb value %d\n", nb); - return false; + return true; +} + +bool PPCRecompilerImlGen_LWARX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + sint32 rA, rD, rB; + PPC_OPC_TEMPL_X(opcode, rD, rA, rB); + + IMLReg regA = rA != 0 ? PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA) : IMLREG_INVALID; + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); + IMLReg regD = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rD); + IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_EA); + IMLReg regMemResVal = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_VAL); + // calculate EA + if (regA.IsValid()) + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regMemResEA, regA, regB); + else + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResEA, regB); + // load word + ppcImlGenContext->emitInst().make_r_memory(regD, regMemResEA, 0, 32, false, true); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResVal, regD); + return true; +} + +bool PPCRecompilerImlGen_STWCX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +{ + sint32 rA, rS, rB; + PPC_OPC_TEMPL_X(opcode, rS, rA, rB); + IMLReg regA = rA != 0 ? PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA) : IMLREG_INVALID; + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rB); + IMLReg regData = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rS); + IMLReg regTmpDataBE = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2); + IMLReg regTmpCompareBE = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 3); + // calculate EA + IMLReg regCalcEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + if (regA.IsValid()) + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regCalcEA, regA, regB); + else + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCalcEA, regB); + // get CR bit regs and set LT, GT and SO immediately + IMLReg regCrLT = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_LT); + IMLReg regCrGT = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_GT); + IMLReg regCrEQ = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_EQ); + IMLReg regCrSO = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_SO); + IMLReg regXerSO = _GetRegCR(ppcImlGenContext, 0, Espresso::CR_BIT_INDEX_SO); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrLT, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrGT, 0); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regCrSO, regXerSO); + // get regs for reservation address and value + IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_EA); + IMLReg regMemResVal = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_CPU_MEMRES_VAL); + // compare calculated EA with reservation + IMLReg regTmpBool = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); + ppcImlGenContext->emitInst().make_compare(regCalcEA, regMemResEA, regTmpBool, IMLCondition::EQ); + ppcImlGenContext->emitInst().make_conditional_jump(regTmpBool, true); + + PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, + [&](ppcImlGenContext_t& genCtx) + { + /* branch taken, EA matching */ + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, regTmpDataBE, regData); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ENDIAN_SWAP, regTmpCompareBE, regMemResVal); + ppcImlGenContext->emitInst().make_atomic_cmp_store(regMemResEA, regTmpCompareBE, regTmpDataBE, regCrEQ); + }, + [&](ppcImlGenContext_t& genCtx) + { + /* branch not taken, EA mismatching */ + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrEQ, 0); + } + ); + + // reset reservation + // I found contradictory information of whether the reservation is cleared in all cases, so unit testing would be required + // Most sources state that it is cleared on successful store. They don't explicitly mention what happens on failure + // "The PowerPC 600 series, part 7: Atomic memory access and cache coherency" states that it is always cleared + // There may also be different behavior between individual PPC architectures + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemResEA, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regMemResVal, 0); + + return true; } bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -2378,92 +1589,39 @@ bool PPCRecompilerImlGen_DCBZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod rA = (opcode>>16)&0x1F; rB = (opcode>>11)&0x1F; // prepare registers - uint32 gprRegisterA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false):0; - uint32 gprRegisterB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // store - if( rA != 0 ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_DCBZ, gprRegisterA, gprRegisterB); + IMLReg regA = rA!=0?PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA):IMLREG_INVALID; + IMLReg regB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + // load zero into a temporary register + IMLReg regZero = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regZero, 0); + // prepare EA and align it to cacheline + IMLReg regMemResEA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); + if(rA != 0) + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, regMemResEA, regA, regB); else - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_DCBZ, gprRegisterB, gprRegisterB); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regMemResEA, regB); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regMemResEA, regMemResEA, ~31); + // zero out the cacheline + for(sint32 i = 0; i < 32; i += 4) + ppcImlGenContext->emitInst().make_memory_r(regZero, regMemResEA, i, 32, false); return true; } -bool PPCRecompilerImlGen_OR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_OR_NOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool complementResult) { int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - // check for MR mnemonic - if( rS == rB ) - { - // simple register copy - if( rA != rS ) // check if no-op - { - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - } - } - else - { - if( opcode&PPC_OPC_RC ) - { - // no effect but CR is updated - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprSourceReg, gprSourceReg, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - // no-op - } - } - } + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + if(rS == rB) // check for MR mnemonic + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regA, regS); else - { - // rA = rS | rA - sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg ) - { - // make sure we don't overwrite rS or rA - if( gprSource1Reg == gprDestReg ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource1Reg); - } - if( opcode&PPC_OPC_RC ) - { - // fixme: merge CR update into OR instruction above - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL); - } - } - else - { - // rA = rS - if( gprDestReg != gprSource1Reg ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); - } - // rA |= rB - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); - } - } - } + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regA, regS, regB); + if(complementResult) + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regA, regA); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -2471,151 +1629,33 @@ bool PPCRecompilerImlGen_ORC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - // hCPU->gpr[rA] = hCPU->gpr[rS] | ~hCPU->gpr[rB]; - sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ORC, gprDestReg, gprSource1Reg, gprSource2Reg, 0, PPCREC_CR_MODE_LOGICAL); - else - PPCRecompilerImlGen_generateNewInstruction_r_r_r(ppcImlGenContext, PPCREC_IML_OP_ORC, gprDestReg, gprSource1Reg, gprSource2Reg); + // rA = rS | ~rB; + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regA, regS, regTmp); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } -bool PPCRecompilerImlGen_NOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_AND_NAND(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool complementResult) { int rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - //hCPU->gpr[rA] = ~(hCPU->gpr[rS] | hCPU->gpr[rB]); - // check for NOT mnemonic - if( rS == rB ) - { - // simple register copy with NOT - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( gprDestReg != gprSourceReg ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_ARITHMETIC); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); - } - } + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + if (regS == regB) + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_ASSIGN, regA, regS); else - { - // rA = rS | rA - sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg ) - { - // make sure we don't overwrite rS or rA - if( gprSource1Reg == gprDestReg ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource1Reg); - } - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); - if( opcode&PPC_OPC_RC ) - { - // fixme: merge CR update into OR instruction above - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL); - } - } - else - { - // rA = rS - if( gprDestReg != gprSource1Reg ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); - } - // rA |= rB - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprDestReg, gprSource2Reg); - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_ARITHMETIC); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); - } - } - } - return true; -} - -bool PPCRecompilerImlGen_AND(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rS, rA, rB; - PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - // check for MR mnemonic - if( rS == rB ) - { - // simple register copy - if( rA != rS ) // check if no-op - { - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - } - } - else - { - cemu_assert_unimplemented(); // no-op -> verify this case - } - } - else - { - // rA = rS & rA - sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg ) - { - // make sure we don't overwrite rS or rA - if( gprSource1Reg == gprDestReg ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprSource2Reg); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprSource1Reg); - } - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL); - } - } - else - { - // rA = rS - if( gprDestReg != gprSource1Reg ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); - } - // rA &= rB - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprSource2Reg, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprSource2Reg); - } - } - } + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regA, regS, regB); + if (complementResult) + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regA, regA); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } @@ -2623,277 +1663,101 @@ bool PPCRecompilerImlGen_ANDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - //hCPU->gpr[rA] = hCPU->gpr[rS] & ~hCPU->gpr[rB]; - //if (Opcode & PPC_OPC_RC) { - if( rS == rB ) - { - // result is always 0 -> replace with XOR rA,rA - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprDestReg); - } - } - else if( rA == rB ) - { - // rB already in rA, therefore we complement rA first and then AND it with rS - sint32 gprRS = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = ~rA - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprRA, gprRA); - // rA &= rS - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprRA, gprRS, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprRA, gprRS); - } - } - else - { - // a & (~b) is the same as ~((~a) | b) - sint32 gprRA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - sint32 gprRB = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - sint32 gprRS = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - // move rS to rA (if required) - if( gprRA != gprRS ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprRA, gprRS); - } - // rS already in rA, therefore we complement rS first and then OR it with rB - // rA = ~rA - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprRA, gprRA); - // rA |= rB - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_OR, gprRA, gprRB); - // rA = ~rA - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprRA, gprRA, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprRA, gprRA); - } - } + // rA = rS & ~rB; + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + IMLReg regTmp = _GetRegTemporary(ppcImlGenContext, 0); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regTmp, regB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regA, regS, regTmp); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } -void PPCRecompilerImlGen_ANDI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rS, rA; - uint32 imm; - PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); - // ANDI. always sets cr0 flags - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA &= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, false, false, 0, PPCREC_CR_MODE_LOGICAL); -} - -void PPCRecompilerImlGen_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rS, rA; - uint32 imm; - PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); - // ANDI. always sets cr0 flags - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA &= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_AND, gprDestReg, (sint32)imm, 0, false, false, 0, PPCREC_CR_MODE_LOGICAL); -} - -bool PPCRecompilerImlGen_XOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_XOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool complementResult) { sint32 rS, rA, rB; PPC_OPC_TEMPL_X(opcode, rS, rA, rB); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); if( rS == rB ) { - // xor register with itself - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprDestReg); - } + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regA, 0); } else { - // rA = rS ^ rA - sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg ) - { - // make sure we don't overwrite rS or rA - if( gprSource1Reg == gprDestReg ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource1Reg); - } - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_AND, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL); - } - } - else - { - // rA = rS - if( gprDestReg != gprSource1Reg ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); - } - // rA ^= rB - if( opcode&PPC_OPC_RC ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg, 0, PPCREC_CR_MODE_LOGICAL); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); - } - } + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regB = _GetRegGPR(ppcImlGenContext, rB); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, regA, regS, regB); } + if (complementResult) + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NOT, regA, regA); + if (opcode & PPC_OPC_RC) + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); return true; } - -bool PPCRecompilerImlGen_EQV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +void PPCRecompilerImlGen_ANDI_ANDIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isShifted) { - sint32 rS, rA, rB; - PPC_OPC_TEMPL_X(opcode, rS, rA, rB); - if( rS == rB ) + sint32 rS, rA; + uint32 imm; + if (isShifted) { - // xor register with itself, then invert - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprDestReg); - if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL); - else - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); + PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); } else { - // rA = ~(rS ^ rA) - sint32 gprSource1Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprSource2Reg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - if( gprSource1Reg == gprDestReg || gprSource2Reg == gprDestReg ) - { - // make sure we don't overwrite rS or rA - if( gprSource1Reg == gprDestReg ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource1Reg); - } - } - else - { - // rA = rS - if( gprDestReg != gprSource1Reg ) - { - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSource1Reg); - } - // rA ^= rB - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_XOR, gprDestReg, gprSource2Reg); - } - if( opcode&PPC_OPC_RC ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg, 0, PPCREC_CR_MODE_LOGICAL); - else - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_NOT, gprDestReg, gprDestReg); + PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); } - return true; + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, regA, regS, (sint32)imm); + // ANDI/ANDIS always updates cr0 + PPCImlGen_UpdateCR0(ppcImlGenContext, regA); } -void PPCRecompilerImlGen_ORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +void PPCRecompilerImlGen_ORI_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isShifted) { sint32 rS, rA; uint32 imm; - PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); - // ORI does not set cr0 flags - //hCPU->gpr[rA] = hCPU->gpr[rS] | imm; - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA |= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + if (isShifted) + { + PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); + } + else + { + PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); + } + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_OR, regA, regS, (sint32)imm); } -void PPCRecompilerImlGen_ORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +void PPCRecompilerImlGen_XORI_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isShifted) { sint32 rS, rA; uint32 imm; - PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); - // ORI does not set cr0 flags - //hCPU->gpr[rA] = hCPU->gpr[rS] | imm; - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA |= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_OR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); -} - -void PPCRecompilerImlGen_XORI(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rS, rA; - uint32 imm; - PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); - //hCPU->gpr[rA] = hCPU->gpr[rS] ^ imm; - // XORI does not set cr0 flags - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA |= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); -} - -void PPCRecompilerImlGen_XORIS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 rS, rA; - uint32 imm; - PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); - //hCPU->gpr[rA] = hCPU->gpr[rS] ^ imm; - // XORIS does not set cr0 flags - sint32 gprSourceReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rS); - sint32 gprDestReg = PPCRecompilerImlGen_loadOverwriteRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); - // rA = rS - if( gprDestReg != gprSourceReg ) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ASSIGN, gprDestReg, gprSourceReg); - // rA |= imm32 - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_XOR, gprDestReg, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); + if (isShifted) + { + PPC_OPC_TEMPL_D_Shift16(opcode, rS, rA, imm); + } + else + { + PPC_OPC_TEMPL_D_UImm(opcode, rS, rA, imm); + } + IMLReg regS = _GetRegGPR(ppcImlGenContext, rS); + IMLReg regA = _GetRegGPR(ppcImlGenContext, rA); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regA, regS, (sint32)imm); } bool PPCRecompilerImlGen_CROR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_OR, crD, crA, crB); + IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA); + IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB); + IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regCrR, regCrA, regCrB); return true; } @@ -2901,7 +1765,12 @@ bool PPCRecompilerImlGen_CRORC(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_ORC, crD, crA, crB); + IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA); + IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB); + IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD); + IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_OR, regCrR, regCrA, regTmp); return true; } @@ -2909,7 +1778,10 @@ bool PPCRecompilerImlGen_CRAND(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_AND, crD, crA, crB); + IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA); + IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB); + IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD); + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regCrR, regCrA, regCrB); return true; } @@ -2917,7 +1789,12 @@ bool PPCRecompilerImlGen_CRANDC(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_ANDC, crD, crA, crB); + IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA); + IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB); + IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD); + IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_AND, regCrR, regCrA, regTmp); return true; } @@ -2925,17 +1802,15 @@ bool PPCRecompilerImlGen_CRXOR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - if (crA == crB) + IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA); + IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB); + IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD); + if (regCrA == regCrB) { - // both operands equal, clear bit in crD - // PPC's assert() uses this to pass a parameter to OSPanic - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_CLEAR, crD, 0, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrR, 0); return true; } - else - { - return false; - } + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, regCrR, regCrA, regCrB); return true; } @@ -2943,23 +1818,24 @@ bool PPCRecompilerImlGen_CREQV(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { int crD, crA, crB; PPC_OPC_TEMPL_X(opcode, crD, crA, crB); - if (crA == crB) + IMLReg regCrA = _GetRegCR(ppcImlGenContext, crA); + IMLReg regCrB = _GetRegCR(ppcImlGenContext, crB); + IMLReg regCrR = _GetRegCR(ppcImlGenContext, crD); + if (regCrA == regCrB) { - // both operands equal, set bit in crD - PPCRecompilerImlGen_generateNewInstruction_cr(ppcImlGenContext, PPCREC_IML_OP_CR_SET, crD, 0, 0); + ppcImlGenContext->emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, regCrR, 1); return true; } - else - { - return false; - } + IMLReg regTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_XOR, regTmp, regCrB, 1); // invert crB + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_XOR, regCrR, regCrA, regTmp); return true; } bool PPCRecompilerImlGen_HLE(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { uint32 hleFuncId = opcode&0xFFFF; - PPCRecompilerImlGen_generateNewInstruction_macro(ppcImlGenContext, PPCREC_IML_MACRO_HLE, ppcImlGenContext->ppcAddressOfCurrentInstruction, hleFuncId, 0); + ppcImlGenContext->emitInst().make_macro(PPCREC_IML_MACRO_HLE, ppcImlGenContext->ppcAddressOfCurrentInstruction, hleFuncId, 0, IMLREG_INVALID); return true; } @@ -2970,12 +1846,6 @@ uint32 PPCRecompiler_iterateCurrentInstruction(ppcImlGenContext_t* ppcImlGenCont return v; } -uint32 PPCRecompiler_getInstructionByOffset(ppcImlGenContext_t* ppcImlGenContext, uint32 offset) -{ - uint32 v = CPU_swapEndianU32(*(ppcImlGenContext->currentInstruction + offset/4)); - return v; -} - uint32 PPCRecompiler_getCurrentInstruction(ppcImlGenContext_t* ppcImlGenContext) { uint32 v = CPU_swapEndianU32(*(ppcImlGenContext->currentInstruction)); @@ -2988,480 +1858,10 @@ uint32 PPCRecompiler_getPreviousInstruction(ppcImlGenContext_t* ppcImlGenContext return v; } -char _tempOpcodename[32]; - -const char* PPCRecompiler_getOpcodeDebugName(PPCRecImlInstruction_t* iml) -{ - uint32 op = iml->operation; - if (op == PPCREC_IML_OP_ASSIGN) - return "MOV"; - else if (op == PPCREC_IML_OP_ADD) - return "ADD"; - else if (op == PPCREC_IML_OP_SUB) - return "SUB"; - else if (op == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY) - return "ADDCSC"; - else if (op == PPCREC_IML_OP_OR) - return "OR"; - else if (op == PPCREC_IML_OP_AND) - return "AND"; - else if (op == PPCREC_IML_OP_XOR) - return "XOR"; - else if (op == PPCREC_IML_OP_LEFT_SHIFT) - return "LSH"; - else if (op == PPCREC_IML_OP_RIGHT_SHIFT) - return "RSH"; - else if (op == PPCREC_IML_OP_MULTIPLY_SIGNED) - return "MULS"; - else if (op == PPCREC_IML_OP_DIVIDE_SIGNED) - return "DIVS"; - - sprintf(_tempOpcodename, "OP0%02x_T%d", iml->operation, iml->type); - return _tempOpcodename; -} - -void PPCRecDebug_addRegisterParam(StringBuf& strOutput, sint32 virtualRegister, bool isLast = false) -{ - if (isLast) - { - if (virtualRegister < 10) - strOutput.addFmt("t{} ", virtualRegister); - else - strOutput.addFmt("t{}", virtualRegister); - return; - } - if (virtualRegister < 10) - strOutput.addFmt("t{} , ", virtualRegister); - else - strOutput.addFmt("t{}, ", virtualRegister); -} - -void PPCRecDebug_addS32Param(StringBuf& strOutput, sint32 val, bool isLast = false) -{ - if (isLast) - { - strOutput.addFmt("0x{:08x}", val); - return; - } - strOutput.addFmt("0x{:08x}, ", val); -} - -void PPCRecompilerDebug_printLivenessRangeInfo(StringBuf& currentLineText, PPCRecImlSegment_t* imlSegment, sint32 offset) -{ - // pad to 70 characters - sint32 index = currentLineText.getLen(); - while (index < 70) - { - debug_printf(" "); - index++; - } - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - while (subrangeItr) - { - if (offset == subrangeItr->start.index) - { - if (false)//subrange->isDirtied && i == subrange->becomesDirtyAtIndex.index) - { - debug_printf("*%-2d", subrangeItr->range->virtualRegister); - } - else - { - debug_printf("|%-2d", subrangeItr->range->virtualRegister); - } - } - else if (false)//subrange->isDirtied && i == subrange->becomesDirtyAtIndex.index ) - { - debug_printf("* "); - } - else if (offset >= subrangeItr->start.index && offset < subrangeItr->end.index) - { - debug_printf("| "); - } - else - { - debug_printf(" "); - } - index += 3; - // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; - } -} - -void PPCRecompiler_dumpIMLSegment(PPCRecImlSegment_t* imlSegment, sint32 segmentIndex, bool printLivenessRangeInfo) -{ - StringBuf strOutput(1024); - - strOutput.addFmt("SEGMENT 0x{:04x} 0x{:08x} PPC 0x{:08x} - 0x{:08x} Loop-depth {}", segmentIndex, imlSegment->ppcAddress, imlSegment->ppcAddrMin, imlSegment->ppcAddrMax, imlSegment->loopDepth); - if (imlSegment->isEnterable) - { - strOutput.addFmt(" ENTERABLE (0x{:08x})", imlSegment->enterPPCAddress); - } - else if( imlSegment->isJumpDestination ) - { - strOutput.addFmt(" JUMP-DEST (0x{:08x})", imlSegment->jumpDestinationPPCAddress); - } - - debug_printf("%s\n", strOutput.c_str()); - - strOutput.reset(); - strOutput.addFmt("SEGMENT NAME 0x{:016x}", (uintptr_t)imlSegment); - debug_printf("%s", strOutput.c_str()); - - if (printLivenessRangeInfo) - { - PPCRecompilerDebug_printLivenessRangeInfo(strOutput, imlSegment, RA_INTER_RANGE_START); - } - debug_printf("\n"); - - sint32 lineOffsetParameters = 18; - - for(sint32 i=0; iimlListCount; i++) - { - // don't log NOP instructions unless they have an associated PPC address - if(imlSegment->imlList[i].type == PPCREC_IML_TYPE_NO_OP && imlSegment->imlList[i].associatedPPCAddress == MPTR_NULL) - continue; - strOutput.reset(); - strOutput.addFmt("{:08x} ", imlSegment->imlList[i].associatedPPCAddress); - if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME || imlSegment->imlList[i].type == PPCREC_IML_TYPE_NAME_R) - { - if(imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME) - strOutput.add("LD_NAME"); - else - strOutput.add("ST_NAME"); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_name.registerIndex); - - strOutput.addFmt("name_{} (", imlSegment->imlList[i].op_r_name.registerIndex, imlSegment->imlList[i].op_r_name.name); - if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_R0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_R0+999) ) - { - strOutput.addFmt("r{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_R0); - } - else if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_SPR0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_SPR0+999) ) - { - strOutput.addFmt("spr{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_SPR0); - } - else - strOutput.add("ukn"); - strOutput.add(")"); - } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_R ) - { - strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(imlSegment->imlList+i)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r.registerResult); - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r.registerA, true); - - if( imlSegment->imlList[i].crRegister != PPC_REC_INVALID_REGISTER ) - { - strOutput.addFmt(" -> CR{}", imlSegment->imlList[i].crRegister); - } - } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_R_R ) - { - strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(imlSegment->imlList + i)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r_r.registerResult); - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r_r.registerA); - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r_r.registerB, true); - if( imlSegment->imlList[i].crRegister != PPC_REC_INVALID_REGISTER ) - { - strOutput.addFmt(" -> CR{}", imlSegment->imlList[i].crRegister); - } - } - else if (imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_R_S32) - { - strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(imlSegment->imlList + i)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r_s32.registerResult); - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_r_s32.registerA); - PPCRecDebug_addS32Param(strOutput, imlSegment->imlList[i].op_r_r_s32.immS32, true); - - if (imlSegment->imlList[i].crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> CR{}", imlSegment->imlList[i].crRegister); - } - } - else if (imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_S32) - { - strOutput.addFmt("{}", PPCRecompiler_getOpcodeDebugName(imlSegment->imlList + i)); - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_r_immS32.registerIndex); - PPCRecDebug_addS32Param(strOutput, imlSegment->imlList[i].op_r_immS32.immS32, true); - - if (imlSegment->imlList[i].crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> CR{}", imlSegment->imlList[i].crRegister); - } - } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_JUMPMARK ) - { - strOutput.addFmt("jm_{:08x}:", imlSegment->imlList[i].op_jumpmark.address); - } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_PPC_ENTER ) - { - strOutput.addFmt("ppcEnter_{:08x}:", imlSegment->imlList[i].op_ppcEnter.ppcAddress); - } - else if(imlSegment->imlList[i].type == PPCREC_IML_TYPE_LOAD || imlSegment->imlList[i].type == PPCREC_IML_TYPE_STORE || - imlSegment->imlList[i].type == PPCREC_IML_TYPE_LOAD_INDEXED || imlSegment->imlList[i].type == PPCREC_IML_TYPE_STORE_INDEXED ) - { - if(imlSegment->imlList[i].type == PPCREC_IML_TYPE_LOAD || imlSegment->imlList[i].type == PPCREC_IML_TYPE_LOAD_INDEXED) - strOutput.add("LD_"); - else - strOutput.add("ST_"); - - if (imlSegment->imlList[i].op_storeLoad.flags2.signExtend) - strOutput.add("S"); - else - strOutput.add("U"); - strOutput.addFmt("{}", imlSegment->imlList[i].op_storeLoad.copyWidth); - - while ((sint32)strOutput.getLen() < lineOffsetParameters) - strOutput.add(" "); - - PPCRecDebug_addRegisterParam(strOutput, imlSegment->imlList[i].op_storeLoad.registerData); - - if(imlSegment->imlList[i].type == PPCREC_IML_TYPE_LOAD_INDEXED || imlSegment->imlList[i].type == PPCREC_IML_TYPE_STORE_INDEXED) - strOutput.addFmt("[t{}+t{}]", imlSegment->imlList[i].op_storeLoad.registerMem, imlSegment->imlList[i].op_storeLoad.registerMem2); - else - strOutput.addFmt("[t{}+{}]", imlSegment->imlList[i].op_storeLoad.registerMem, imlSegment->imlList[i].op_storeLoad.immS32); - } - else if (imlSegment->imlList[i].type == PPCREC_IML_TYPE_MEM2MEM) - { - strOutput.addFmt("{} [t{}+{}] = [t{}+{}]", imlSegment->imlList[i].op_mem2mem.copyWidth, imlSegment->imlList[i].op_mem2mem.dst.registerMem, imlSegment->imlList[i].op_mem2mem.dst.immS32, imlSegment->imlList[i].op_mem2mem.src.registerMem, imlSegment->imlList[i].op_mem2mem.src.immS32); - } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_CJUMP ) - { - if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_E) - strOutput.add("JE"); - else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NE) - strOutput.add("JNE"); - else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_G) - strOutput.add("JG"); - else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_GE) - strOutput.add("JGE"); - else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_L) - strOutput.add("JL"); - else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_LE) - strOutput.add("JLE"); - else if (imlSegment->imlList[i].op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE) - strOutput.add("JALW"); // jump always - else - cemu_assert_unimplemented(); - strOutput.addFmt(" jm_{:08x} (cr{})", imlSegment->imlList[i].op_conditionalJump.jumpmarkAddress, imlSegment->imlList[i].crRegister); - } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_NO_OP ) - { - strOutput.add("NOP"); - } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_MACRO ) - { - if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_BLR ) - { - strOutput.addFmt("MACRO BLR 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, (sint32)imlSegment->imlList[i].op_macro.paramU16); - } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_BLRL ) - { - strOutput.addFmt("MACRO BLRL 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, (sint32)imlSegment->imlList[i].op_macro.paramU16); - } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_BCTR ) - { - strOutput.addFmt("MACRO BCTR 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, (sint32)imlSegment->imlList[i].op_macro.paramU16); - } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_BCTRL ) - { - strOutput.addFmt("MACRO BCTRL 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, (sint32)imlSegment->imlList[i].op_macro.paramU16); - } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_BL ) - { - strOutput.addFmt("MACRO BL 0x{:08x} -> 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, imlSegment->imlList[i].op_macro.param2, (sint32)imlSegment->imlList[i].op_macro.paramU16); - } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_B_FAR ) - { - strOutput.addFmt("MACRO B_FAR 0x{:08x} -> 0x{:08x} cycles (depr): {}", imlSegment->imlList[i].op_macro.param, imlSegment->imlList[i].op_macro.param2, (sint32)imlSegment->imlList[i].op_macro.paramU16); - } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_LEAVE ) - { - strOutput.addFmt("MACRO LEAVE ppc: 0x{:08x}", imlSegment->imlList[i].op_macro.param); - } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_HLE ) - { - strOutput.addFmt("MACRO HLE ppcAddr: 0x{:08x} funcId: 0x{:08x}", imlSegment->imlList[i].op_macro.param, imlSegment->imlList[i].op_macro.param2); - } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_MFTB ) - { - strOutput.addFmt("MACRO MFTB ppcAddr: 0x{:08x} sprId: 0x{:08x}", imlSegment->imlList[i].op_macro.param, imlSegment->imlList[i].op_macro.param2); - } - else if( imlSegment->imlList[i].operation == PPCREC_IML_MACRO_COUNT_CYCLES ) - { - strOutput.addFmt("MACRO COUNT_CYCLES cycles: {}", imlSegment->imlList[i].op_macro.param); - } - else - { - strOutput.addFmt("MACRO ukn operation {}", imlSegment->imlList[i].operation); - } - } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_R_NAME ) - { - strOutput.addFmt("fpr_t{} = name_{} (", imlSegment->imlList[i].op_r_name.registerIndex, imlSegment->imlList[i].op_r_name.name); - if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_FPR0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_FPR0+999) ) - { - strOutput.addFmt("fpr{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_FPR0); - } - else if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_TEMPORARY_FPR0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_TEMPORARY_FPR0+999) ) - { - strOutput.addFmt("tempFpr{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_TEMPORARY_FPR0); - } - else - strOutput.add("ukn"); - strOutput.add(")"); - } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_NAME_R ) - { - strOutput.addFmt("name_{} (", imlSegment->imlList[i].op_r_name.name); - if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_FPR0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_FPR0+999) ) - { - strOutput.addFmt("fpr{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_FPR0); - } - else if( imlSegment->imlList[i].op_r_name.name >= PPCREC_NAME_TEMPORARY_FPR0 && imlSegment->imlList[i].op_r_name.name < (PPCREC_NAME_TEMPORARY_FPR0+999) ) - { - strOutput.addFmt("tempFpr{}", imlSegment->imlList[i].op_r_name.name-PPCREC_NAME_TEMPORARY_FPR0); - } - else - strOutput.add("ukn"); - strOutput.addFmt(") = fpr_t{}", imlSegment->imlList[i].op_r_name.registerIndex); - } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_LOAD ) - { - strOutput.addFmt("fpr_t{} = ", imlSegment->imlList[i].op_storeLoad.registerData); - if( imlSegment->imlList[i].op_storeLoad.flags2.signExtend ) - strOutput.add("S"); - else - strOutput.add("U"); - strOutput.addFmt("{} [t{}+{}] mode {}", imlSegment->imlList[i].op_storeLoad.copyWidth / 8, imlSegment->imlList[i].op_storeLoad.registerMem, imlSegment->imlList[i].op_storeLoad.immS32, imlSegment->imlList[i].op_storeLoad.mode); - if (imlSegment->imlList[i].op_storeLoad.flags2.notExpanded) - { - strOutput.addFmt(" "); - } - } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_STORE ) - { - if( imlSegment->imlList[i].op_storeLoad.flags2.signExtend ) - strOutput.add("S"); - else - strOutput.add("U"); - strOutput.addFmt("{} [t{}+{}]", imlSegment->imlList[i].op_storeLoad.copyWidth/8, imlSegment->imlList[i].op_storeLoad.registerMem, imlSegment->imlList[i].op_storeLoad.immS32); - strOutput.addFmt("= fpr_t{} mode {}\n", imlSegment->imlList[i].op_storeLoad.registerData, imlSegment->imlList[i].op_storeLoad.mode); - } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_R_R ) - { - strOutput.addFmt("{:-6} ", PPCRecompiler_getOpcodeDebugName(&imlSegment->imlList[i])); - strOutput.addFmt("fpr{:02d}, fpr{:02d}", imlSegment->imlList[i].op_fpr_r_r.registerResult, imlSegment->imlList[i].op_fpr_r_r.registerOperand); - } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_R_R_R_R ) - { - strOutput.addFmt("{:-6} ", PPCRecompiler_getOpcodeDebugName(&imlSegment->imlList[i])); - strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}, fpr{:02d}", imlSegment->imlList[i].op_fpr_r_r_r_r.registerResult, imlSegment->imlList[i].op_fpr_r_r_r_r.registerOperandA, imlSegment->imlList[i].op_fpr_r_r_r_r.registerOperandB, imlSegment->imlList[i].op_fpr_r_r_r_r.registerOperandC); - } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_R_R_R ) - { - strOutput.addFmt("{:-6} ", PPCRecompiler_getOpcodeDebugName(&imlSegment->imlList[i])); - strOutput.addFmt("fpr{:02d}, fpr{:02d}, fpr{:02d}", imlSegment->imlList[i].op_fpr_r_r_r.registerResult, imlSegment->imlList[i].op_fpr_r_r_r.registerOperandA, imlSegment->imlList[i].op_fpr_r_r_r.registerOperandB); - } - else if (imlSegment->imlList[i].type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) - { - strOutput.addFmt("CYCLE_CHECK jm_{:08x}\n", imlSegment->imlList[i].op_conditionalJump.jumpmarkAddress); - } - else if (imlSegment->imlList[i].type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - strOutput.addFmt("t{} ", imlSegment->imlList[i].op_conditional_r_s32.registerIndex); - bool displayAsHex = false; - if (imlSegment->imlList[i].operation == PPCREC_IML_OP_ASSIGN) - { - displayAsHex = true; - strOutput.add("="); - } - else - strOutput.addFmt("(unknown operation CONDITIONAL_R_S32 {})", imlSegment->imlList[i].operation); - if (displayAsHex) - strOutput.addFmt(" 0x{:x}", imlSegment->imlList[i].op_conditional_r_s32.immS32); - else - strOutput.addFmt(" {}", imlSegment->imlList[i].op_conditional_r_s32.immS32); - strOutput.add(" (conditional)"); - if (imlSegment->imlList[i].crRegister != PPC_REC_INVALID_REGISTER) - { - strOutput.addFmt(" -> and update CR{}", imlSegment->imlList[i].crRegister); - } - } - else - { - strOutput.addFmt("Unknown iml type {}", imlSegment->imlList[i].type); - } - debug_printf("%s", strOutput.c_str()); - if (printLivenessRangeInfo) - { - PPCRecompilerDebug_printLivenessRangeInfo(strOutput, imlSegment, i); - } - debug_printf("\n"); - } - // all ranges - if (printLivenessRangeInfo) - { - debug_printf("Ranges-VirtReg "); - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - while(subrangeItr) - { - debug_printf("v%-2d", subrangeItr->range->virtualRegister); - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; - } - debug_printf("\n"); - debug_printf("Ranges-PhysReg "); - subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - while (subrangeItr) - { - debug_printf("p%-2d", subrangeItr->range->physicalRegister); - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; - } - debug_printf("\n"); - } - // branch info - debug_printf("Links from: "); - for (sint32 i = 0; i < imlSegment->list_prevSegments.size(); i++) - { - if (i) - debug_printf(", "); - debug_printf("%p", (void*)imlSegment->list_prevSegments[i]); - } - debug_printf("\n"); - debug_printf("Links to: "); - if (imlSegment->nextSegmentBranchNotTaken) - debug_printf("%p (no branch), ", (void*)imlSegment->nextSegmentBranchNotTaken); - if (imlSegment->nextSegmentBranchTaken) - debug_printf("%p (branch)", (void*)imlSegment->nextSegmentBranchTaken); - debug_printf("\n"); -} - -void PPCRecompiler_dumpIML(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext) -{ - for(sint32 f=0; fsegmentListCount; f++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[f]; - PPCRecompiler_dumpIMLSegment(imlSegment, f); - debug_printf("\n"); - } -} - -void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, PPCRecImlSegment_t* imlSegment, sint32 index) +void PPCRecompilerIml_setSegmentPoint(IMLSegmentPoint* segmentPoint, IMLSegment* imlSegment, sint32 index) { segmentPoint->imlSegment = imlSegment; - segmentPoint->index = index; + segmentPoint->SetInstructionIndex(index); if (imlSegment->segmentPointList) imlSegment->segmentPointList->prev = segmentPoint; segmentPoint->prev = nullptr; @@ -3469,7 +1869,7 @@ void PPCRecompilerIml_setSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint, imlSegment->segmentPointList = segmentPoint; } -void PPCRecompilerIml_removeSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoint) +void PPCRecompilerIml_removeSegmentPoint(IMLSegmentPoint* segmentPoint) { if (segmentPoint->prev) segmentPoint->prev->next = segmentPoint->next; @@ -3481,147 +1881,60 @@ void PPCRecompilerIml_removeSegmentPoint(ppcRecompilerSegmentPoint_t* segmentPoi /* * Insert multiple no-op instructions -* Warning: Can invalidate any previous instruction structs from the same segment +* Warning: Can invalidate any previous instruction pointers from the same segment */ -void PPCRecompiler_pushBackIMLInstructions(PPCRecImlSegment_t* imlSegment, sint32 index, sint32 shiftBackCount) +void PPCRecompiler_pushBackIMLInstructions(IMLSegment* imlSegment, sint32 index, sint32 shiftBackCount) { - cemu_assert(index >= 0 && index <= imlSegment->imlListCount); + cemu_assert_debug(index >= 0 && index <= imlSegment->imlList.size()); + + imlSegment->imlList.insert(imlSegment->imlList.begin() + index, shiftBackCount, {}); + + memset(imlSegment->imlList.data() + index, 0, sizeof(IMLInstruction) * shiftBackCount); - if (imlSegment->imlListCount + shiftBackCount > imlSegment->imlListSize) - { - sint32 newSize = imlSegment->imlListCount + shiftBackCount + std::max(2, imlSegment->imlListSize/2); - imlSegment->imlList = (PPCRecImlInstruction_t*)realloc(imlSegment->imlList, sizeof(PPCRecImlInstruction_t)*newSize); - imlSegment->imlListSize = newSize; - } - for (sint32 i = (sint32)imlSegment->imlListCount - 1; i >= index; i--) - { - memcpy(imlSegment->imlList + (i + shiftBackCount), imlSegment->imlList + i, sizeof(PPCRecImlInstruction_t)); - } // fill empty space with NOP instructions for (sint32 i = 0; i < shiftBackCount; i++) { imlSegment->imlList[index + i].type = PPCREC_IML_TYPE_NONE; } - imlSegment->imlListCount += shiftBackCount; + // update position of segment points if (imlSegment->segmentPointList) { - ppcRecompilerSegmentPoint_t* segmentPoint = imlSegment->segmentPointList; + IMLSegmentPoint* segmentPoint = imlSegment->segmentPointList; while (segmentPoint) { - if (segmentPoint->index != RA_INTER_RANGE_START && segmentPoint->index != RA_INTER_RANGE_END) - { - if (segmentPoint->index >= index) - segmentPoint->index += shiftBackCount; - } - // next + segmentPoint->ShiftIfAfter(index, shiftBackCount); segmentPoint = segmentPoint->next; } } } -/* -* Insert and return new instruction at index -* Warning: Can invalidate any previous instruction structs from the same segment -*/ -PPCRecImlInstruction_t* PPCRecompiler_insertInstruction(PPCRecImlSegment_t* imlSegment, sint32 index) +IMLInstruction* PPCRecompiler_insertInstruction(IMLSegment* imlSegment, sint32 index) { PPCRecompiler_pushBackIMLInstructions(imlSegment, index, 1); - return imlSegment->imlList + index; + return imlSegment->imlList.data() + index; } -/* -* Append and return new instruction at the end of the segment -* Warning: Can invalidate any previous instruction structs from the same segment -*/ -PPCRecImlInstruction_t* PPCRecompiler_appendInstruction(PPCRecImlSegment_t* imlSegment) +IMLInstruction* PPCRecompiler_appendInstruction(IMLSegment* imlSegment) { - sint32 index = imlSegment->imlListCount; - if (index >= imlSegment->imlListSize) - { - sint32 newSize = index+1; - imlSegment->imlList = (PPCRecImlInstruction_t*)realloc(imlSegment->imlList, sizeof(PPCRecImlInstruction_t)*newSize); - imlSegment->imlListSize = newSize; - } - imlSegment->imlListCount++; - memset(imlSegment->imlList + index, 0, sizeof(PPCRecImlInstruction_t)); - return imlSegment->imlList + index; + size_t index = imlSegment->imlList.size(); + imlSegment->imlList.emplace_back(); + memset(imlSegment->imlList.data() + index, 0, sizeof(IMLInstruction)); + return imlSegment->imlList.data() + index; +} + +IMLSegment* PPCRecompilerIml_appendSegment(ppcImlGenContext_t* ppcImlGenContext) +{ + IMLSegment* segment = new IMLSegment(); + ppcImlGenContext->segmentList2.emplace_back(segment); + return segment; } void PPCRecompilerIml_insertSegments(ppcImlGenContext_t* ppcImlGenContext, sint32 index, sint32 count) { - if( (ppcImlGenContext->segmentListCount+count) > ppcImlGenContext->segmentListSize ) - { - // allocate space for more segments - ppcImlGenContext->segmentListSize += count; - ppcImlGenContext->segmentList = (PPCRecImlSegment_t**)realloc(ppcImlGenContext->segmentList, ppcImlGenContext->segmentListSize*sizeof(PPCRecImlSegment_t*)); - } - for(sint32 i=(sint32)ppcImlGenContext->segmentListCount-1; i>=index; i--) - { - memcpy(ppcImlGenContext->segmentList+(i+count), ppcImlGenContext->segmentList+i, sizeof(PPCRecImlSegment_t*)); - } - ppcImlGenContext->segmentListCount += count; - for(sint32 i=0; isegmentList+index+i, 0x00, sizeof(PPCRecImlSegment_t*)); - ppcImlGenContext->segmentList[index+i] = (PPCRecImlSegment_t*)malloc(sizeof(PPCRecImlSegment_t)); - memset(ppcImlGenContext->segmentList[index+i], 0x00, sizeof(PPCRecImlSegment_t)); - ppcImlGenContext->segmentList[index + i]->list_prevSegments = std::vector(); - } -} - -/* - * Allocate and init a new iml instruction segment - */ -PPCRecImlSegment_t* PPCRecompiler_generateImlSegment(ppcImlGenContext_t* ppcImlGenContext) -{ - if( ppcImlGenContext->segmentListCount >= ppcImlGenContext->segmentListSize ) - { - // allocate space for more segments - ppcImlGenContext->segmentListSize *= 2; - ppcImlGenContext->segmentList = (PPCRecImlSegment_t**)realloc(ppcImlGenContext->segmentList, ppcImlGenContext->segmentListSize*sizeof(PPCRecImlSegment_t*)); - } - PPCRecImlSegment_t* ppcRecSegment = new PPCRecImlSegment_t(); - ppcImlGenContext->segmentList[ppcImlGenContext->segmentListCount] = ppcRecSegment; - ppcImlGenContext->segmentListCount++; - return ppcRecSegment; -} - -void PPCRecompiler_freeContext(ppcImlGenContext_t* ppcImlGenContext) -{ - if (ppcImlGenContext->imlList) - { - free(ppcImlGenContext->imlList); - ppcImlGenContext->imlList = nullptr; - } - for(sint32 i=0; isegmentListCount; i++) - { - free(ppcImlGenContext->segmentList[i]->imlList); - delete ppcImlGenContext->segmentList[i]; - } - ppcImlGenContext->segmentListCount = 0; - if (ppcImlGenContext->segmentList) - { - free(ppcImlGenContext->segmentList); - ppcImlGenContext->segmentList = nullptr; - } -} - -bool PPCRecompiler_isSuffixInstruction(PPCRecImlInstruction_t* iml) -{ - if (iml->type == PPCREC_IML_TYPE_MACRO && (iml->operation == PPCREC_IML_MACRO_BLR || iml->operation == PPCREC_IML_MACRO_BCTR) || - iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_BL || - iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_B_FAR || - iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_BLRL || - iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_BCTRL || - iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_LEAVE || - iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_HLE || - iml->type == PPCREC_IML_TYPE_MACRO && iml->operation == PPCREC_IML_MACRO_MFTB || - iml->type == PPCREC_IML_TYPE_PPC_ENTER || - iml->type == PPCREC_IML_TYPE_CJUMP || - iml->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) - return true; - return false; + ppcImlGenContext->segmentList2.insert(ppcImlGenContext->segmentList2.begin() + index, count, nullptr); + for (sint32 i = 0; i < count; i++) + ppcImlGenContext->segmentList2[index + i] = new IMLSegment(); } bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) @@ -3643,15 +1956,18 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) switch (PPC_getBits(opcode, 25, 5)) { case 0: - PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext, opcode); + if( !PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext, opcode) ) + unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; case 1: - PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext, opcode); + if( !PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext, opcode) ) + unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; case 2: - PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext, opcode); + if( !PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext, opcode) ) + unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; default: @@ -3692,23 +2008,23 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 12: // multiply scalar - if (PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext, opcode) == false) + case 12: // PS_MULS0 + if (PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 13: // multiply scalar - if (PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext, opcode) == false) + case 13: // PS_MULS1 + if (PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 14: // multiply add scalar - if (PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext, opcode) == false) + case 14: // PS_MADDS0 + if (PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 15: // multiply add scalar - if (PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext, opcode) == false) + case 15: // PS_MADDS1 + if (PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; @@ -3775,22 +2091,22 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 28: // multiply sub paired - if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode) == false) + case 28: // PS_MSUB + if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 29: // multiply add paired + case 29: // PS_MADD if (PPCRecompilerImlGen_PS_MADD(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 30: // negative multiply sub paired - if (PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext, opcode) == false) + case 30: // PS_NMSUB + if (PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 31: // negative multiply add paired + case 31: // PS_NMADD if (PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; @@ -3804,20 +2120,23 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) PPCRecompilerImlGen_MULLI(ppcImlGenContext, opcode); break; case 8: // SUBFIC - PPCRecompilerImlGen_SUBFIC(ppcImlGenContext, opcode); + if (!PPCRecompilerImlGen_SUBFIC(ppcImlGenContext, opcode)) + unsupportedInstructionFound = true; break; case 10: // CMPLI - PPCRecompilerImlGen_CMPLI(ppcImlGenContext, opcode); + if (!PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode, true)) + unsupportedInstructionFound = true; break; case 11: // CMPI - PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode); + if (!PPCRecompilerImlGen_CMPI(ppcImlGenContext, opcode, false)) + unsupportedInstructionFound = true; break; case 12: // ADDIC - if (PPCRecompilerImlGen_ADDIC(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; break; case 13: // ADDIC. - if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_ADDIC_(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; break; case 14: // ADDI @@ -3849,8 +2168,11 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) case 19: // opcode category 19 switch (PPC_getBits(opcode, 30, 10)) { - case 16: - if (PPCRecompilerImlGen_BCLR(ppcImlGenContext, opcode) == false) + case 0: + PPCRecompilerImlGen_MCRF(ppcImlGenContext, opcode); + break; + case 16: // BCLR + if (PPCRecompilerImlGen_BCSPR(ppcImlGenContext, opcode, SPR_LR) == false) unsupportedInstructionFound = true; break; case 129: @@ -3881,8 +2203,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_CROR(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 528: - if (PPCRecompilerImlGen_BCCTR(ppcImlGenContext, opcode) == false) + case 528: // BCCTR + if (PPCRecompilerImlGen_BCSPR(ppcImlGenContext, opcode, SPR_CTR) == false) unsupportedInstructionFound = true; break; default: @@ -3902,37 +2224,34 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_RLWNM(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 24: - PPCRecompilerImlGen_ORI(ppcImlGenContext, opcode); + case 24: // ORI + PPCRecompilerImlGen_ORI_ORIS(ppcImlGenContext, opcode, false); break; - case 25: - PPCRecompilerImlGen_ORIS(ppcImlGenContext, opcode); + case 25: // ORIS + PPCRecompilerImlGen_ORI_ORIS(ppcImlGenContext, opcode, true); break; - case 26: - PPCRecompilerImlGen_XORI(ppcImlGenContext, opcode); + case 26: // XORI + PPCRecompilerImlGen_XORI_XORIS(ppcImlGenContext, opcode, false); break; - case 27: - PPCRecompilerImlGen_XORIS(ppcImlGenContext, opcode); + case 27: // XORIS + PPCRecompilerImlGen_XORI_XORIS(ppcImlGenContext, opcode, true); break; - case 28: - PPCRecompilerImlGen_ANDI(ppcImlGenContext, opcode); + case 28: // ANDI + PPCRecompilerImlGen_ANDI_ANDIS(ppcImlGenContext, opcode, false); break; - case 29: - PPCRecompilerImlGen_ANDIS(ppcImlGenContext, opcode); + case 29: // ANDIS + PPCRecompilerImlGen_ANDI_ANDIS(ppcImlGenContext, opcode, true); break; case 31: // opcode category switch (PPC_getBits(opcode, 30, 10)) { case 0: - PPCRecompilerImlGen_CMP(ppcImlGenContext, opcode); + PPCRecompilerImlGen_CMP(ppcImlGenContext, opcode, false); break; case 4: PPCRecompilerImlGen_TW(ppcImlGenContext, opcode); break; case 8: - // todo: Check if we can optimize this pattern: - // SUBFC + SUBFE - // SUBFC if (PPCRecompilerImlGen_SUBFC(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; @@ -3952,9 +2271,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_LWARX(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 23: - if (PPCRecompilerImlGen_LWZX(ppcImlGenContext, opcode) == false) - unsupportedInstructionFound = true; + case 23: // LWZX + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, false); break; case 24: if (PPCRecompilerImlGen_SLW(ppcImlGenContext, opcode) == false) @@ -3964,12 +2282,12 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_CNTLZW(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 28: - if (PPCRecompilerImlGen_AND(ppcImlGenContext, opcode) == false) + case 28: // AND + if (!PPCRecompilerImlGen_AND_NAND(ppcImlGenContext, opcode, false)) unsupportedInstructionFound = true; break; case 32: - PPCRecompilerImlGen_CMPL(ppcImlGenContext, opcode); + PPCRecompilerImlGen_CMP(ppcImlGenContext, opcode, true); // CMPL break; case 40: if (PPCRecompilerImlGen_SUBF(ppcImlGenContext, opcode) == false) @@ -3978,12 +2296,11 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) case 54: // DBCST - Generates no code break; - case 55: - if (PPCRecompilerImlGen_LWZUX(ppcImlGenContext, opcode) == false) - unsupportedInstructionFound = true; + case 55: // LWZUX + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, true, true); break; - case 60: - if (PPCRecompilerImlGen_ANDC(ppcImlGenContext, opcode) == false) + case 60: // ANDC + if (!PPCRecompilerImlGen_ANDC(ppcImlGenContext, opcode)) unsupportedInstructionFound = true; break; case 75: @@ -3993,20 +2310,18 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) case 86: // DCBF -> No-Op break; - case 87: - if (PPCRecompilerImlGen_LBZX(ppcImlGenContext, opcode) == false) - unsupportedInstructionFound = true; + case 87: // LBZX + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, false); break; case 104: if (PPCRecompilerImlGen_NEG(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 119: - if (PPCRecompilerImlGen_LBZUX(ppcImlGenContext, opcode) == false) - unsupportedInstructionFound = true; + case 119: // LBZUX + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 8, false, true, true); break; - case 124: - if (PPCRecompilerImlGen_NOR(ppcImlGenContext, opcode) == false) + case 124: // NOR + if (!PPCRecompilerImlGen_OR_NOR(ppcImlGenContext, opcode, true)) unsupportedInstructionFound = true; break; case 136: @@ -4018,19 +2333,20 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; break; case 144: - PPCRecompilerImlGen_MTCRF(ppcImlGenContext, opcode); + if( !PPCRecompilerImlGen_MTCRF(ppcImlGenContext, opcode)) + unsupportedInstructionFound = true; break; case 150: - if (PPCRecompilerImlGen_STWCX(ppcImlGenContext, opcode) == false) + if (!PPCRecompilerImlGen_STWCX(ppcImlGenContext, opcode)) unsupportedInstructionFound = true; break; - case 151: - if (PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 32) == false) + case 151: // STWX + if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 32, true, false)) unsupportedInstructionFound = true; break; - case 183: - if (PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext, opcode, 32) == false) - unsupportedInstructionFound = true; + case 183: // STWUX + if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 32, true, true)) + unsupportedInstructionFound = true; break; case 200: if (PPCRecompilerImlGen_SUBFZE(ppcImlGenContext, opcode) == false) @@ -4040,8 +2356,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_ADDZE(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 215: - if (PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 8) == false) + case 215: // STBX + if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 8, true, false)) unsupportedInstructionFound = true; break; case 234: @@ -4052,59 +2368,56 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_MULLW(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 247: - if (PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext, opcode, 8) == false) + case 247: // STBUX + if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 8, true, true)) unsupportedInstructionFound = true; break; case 266: if (PPCRecompilerImlGen_ADD(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 279: - if (PPCRecompilerImlGen_LHZX(ppcImlGenContext, opcode) == false) + case 279: // LHZX + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, false); + break; + case 284: // EQV (alias to NXOR) + if (!PPCRecompilerImlGen_XOR(ppcImlGenContext, opcode, true)) unsupportedInstructionFound = true; break; - case 284: - PPCRecompilerImlGen_EQV(ppcImlGenContext, opcode); + case 311: // LHZUX + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, true, true); break; - case 311: - if (PPCRecompilerImlGen_LHZUX(ppcImlGenContext, opcode) == false) - unsupportedInstructionFound = true; - break; - case 316: - if (PPCRecompilerImlGen_XOR(ppcImlGenContext, opcode) == false) + case 316: // XOR + if (!PPCRecompilerImlGen_XOR(ppcImlGenContext, opcode, false)) unsupportedInstructionFound = true; break; case 339: if (PPCRecompilerImlGen_MFSPR(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 343: - if (PPCRecompilerImlGen_LHAX(ppcImlGenContext, opcode) == false) - unsupportedInstructionFound = true; + case 343: // LHAX + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, false); break; case 371: if (PPCRecompilerImlGen_MFTB(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 375: - if (PPCRecompilerImlGen_LHAUX(ppcImlGenContext, opcode) == false) - unsupportedInstructionFound = true; + case 375: // LHAUX + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, true, true, true); break; - case 407: - if (PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16) == false) + case 407: // STHX + if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16, true, false)) unsupportedInstructionFound = true; break; case 412: if (PPCRecompilerImlGen_ORC(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 439: - if (PPCRecompilerImlGen_STORE_INDEXED_UPDATE(ppcImlGenContext, opcode, 16) == false) + case 439: // STHUX + if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16, true, true)) unsupportedInstructionFound = true; break; - case 444: - if (PPCRecompilerImlGen_OR(ppcImlGenContext, opcode) == false) + case 444: // OR + if (!PPCRecompilerImlGen_OR_NOR(ppcImlGenContext, opcode, false)) unsupportedInstructionFound = true; break; case 459: @@ -4114,17 +2427,19 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_MTSPR(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; + case 476: // NAND + if (!PPCRecompilerImlGen_AND_NAND(ppcImlGenContext, opcode, true)) + unsupportedInstructionFound = true; + break; case 491: if (PPCRecompilerImlGen_DIVW(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 534: - if (PPCRecompilerImlGen_LWBRX(ppcImlGenContext, opcode) == false) - unsupportedInstructionFound = true; - ppcImlGenContext->hasFPUInstruction = true; + case 534: // LWBRX + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 32, false, false, false); break; - case 535: - if (PPCRecompilerImlGen_LFSX(ppcImlGenContext, opcode) == false) + case 535: // LFSX + if (PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext, opcode, false, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; @@ -4132,8 +2447,8 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) if (PPCRecompilerImlGen_SRW(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 567: - if (PPCRecompilerImlGen_LFSUX(ppcImlGenContext, opcode) == false) + case 567: // LFSUX + if (PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext, opcode, true, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; @@ -4144,38 +2459,42 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) case 598: PPCRecompilerImlGen_SYNC(ppcImlGenContext, opcode); break; - case 599: - if (PPCRecompilerImlGen_LFDX(ppcImlGenContext, opcode) == false) + case 599: // LFDX + if (PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext, opcode, false, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 631: - if (PPCRecompilerImlGen_LFDUX(ppcImlGenContext, opcode) == false) + case 631: // LFDUX + if (PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext, opcode, true, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 662: - if (PPCRecompilerImlGen_STWBRX(ppcImlGenContext, opcode) == false) + case 662: // STWBRX + if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 32, false, false)) unsupportedInstructionFound = true; break; - case 663: - if (PPCRecompilerImlGen_STFSX(ppcImlGenContext, opcode) == false) + case 663: // STFSX + if (PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext, opcode, false, false) == false) unsupportedInstructionFound = true; break; - case 695: - if (PPCRecompilerImlGen_STFSUX(ppcImlGenContext, opcode) == false) + case 695: // STFSUX + if (PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext, opcode, true, false) == false) unsupportedInstructionFound = true; break; case 725: if (PPCRecompilerImlGen_STSWI(ppcImlGenContext, opcode) == false) unsupportedInstructionFound = true; break; - case 727: - if (PPCRecompilerImlGen_STFDX(ppcImlGenContext, opcode) == false) + case 727: // STFDX + if (PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext, opcode, false, true) == false) unsupportedInstructionFound = true; break; - case 790: - PPCRecompilerImlGen_LHBRX(ppcImlGenContext, opcode); + case 759: // STFDUX + if (PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext, opcode, true, true) == false) + unsupportedInstructionFound = true; + break; + case 790: // LHBRX + PPCRecompilerImlGen_LOAD_INDEXED(ppcImlGenContext, opcode, 16, false, false, false); break; case 792: if (PPCRecompilerImlGen_SRAW(ppcImlGenContext, opcode) == false) @@ -4186,7 +2505,7 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) unsupportedInstructionFound = true; break; case 918: // STHBRX - if (PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16, true) == false) + if (!PPCRecompilerImlGen_STORE_INDEXED(ppcImlGenContext, opcode, 16, false, true)) unsupportedInstructionFound = true; break; case 922: @@ -4210,47 +2529,61 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) break; } break; - case 32: - PPCRecompilerImlGen_LWZ(ppcImlGenContext, opcode); + case 32: // LWZ + if(!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 32, false, true, false)) + unsupportedInstructionFound = true; break; - case 33: - PPCRecompilerImlGen_LWZU(ppcImlGenContext, opcode); + case 33: // LWZU + if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 32, false, true, true)) + unsupportedInstructionFound = true; break; - case 34: - PPCRecompilerImlGen_LBZ(ppcImlGenContext, opcode); + case 34: // LBZ + if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 8, false, true, false)) + unsupportedInstructionFound = true; break; - case 35: - PPCRecompilerImlGen_LBZU(ppcImlGenContext, opcode); + case 35: // LBZU + if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 8, false, true, true)) + unsupportedInstructionFound = true; break; - case 36: - PPCRecompilerImlGen_STW(ppcImlGenContext, opcode); + case 36: // STW + if(!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 32, true, false)) + unsupportedInstructionFound = true; break; - case 37: - PPCRecompilerImlGen_STWU(ppcImlGenContext, opcode); + case 37: // STWU + if (!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 32, true, true)) + unsupportedInstructionFound = true; break; - case 38: - PPCRecompilerImlGen_STB(ppcImlGenContext, opcode); + case 38: // STB + if (!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 8, true, false)) + unsupportedInstructionFound = true; break; - case 39: - PPCRecompilerImlGen_STBU(ppcImlGenContext, opcode); + case 39: // STBU + if (!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 8, true, true)) + unsupportedInstructionFound = true; break; - case 40: - PPCRecompilerImlGen_LHZ(ppcImlGenContext, opcode); + case 40: // LHZ + if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 16, false, true, false)) + unsupportedInstructionFound = true; break; - case 41: - PPCRecompilerImlGen_LHZU(ppcImlGenContext, opcode); + case 41: // LHZU + if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 16, false, true, true)) + unsupportedInstructionFound = true; break; - case 42: - PPCRecompilerImlGen_LHA(ppcImlGenContext, opcode); + case 42: // LHA + if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 16, true, true, false)) + unsupportedInstructionFound = true; break; - case 43: - PPCRecompilerImlGen_LHAU(ppcImlGenContext, opcode); + case 43: // LHAU + if (!PPCRecompilerImlGen_LOAD(ppcImlGenContext, opcode, 16, true, true, true)) + unsupportedInstructionFound = true; break; - case 44: - PPCRecompilerImlGen_STH(ppcImlGenContext, opcode); + case 44: // STH + if (!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 16, true, false)) + unsupportedInstructionFound = true; break; - case 45: - PPCRecompilerImlGen_STHU(ppcImlGenContext, opcode); + case 45: // STHU + if (!PPCRecompilerImlGen_STORE(ppcImlGenContext, opcode, 16, true, true)) + unsupportedInstructionFound = true; break; case 46: PPCRecompilerImlGen_LMW(ppcImlGenContext, opcode); @@ -4258,53 +2591,53 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) case 47: PPCRecompilerImlGen_STMW(ppcImlGenContext, opcode); break; - case 48: - if (PPCRecompilerImlGen_LFS(ppcImlGenContext, opcode) == false) + case 48: // LFS + if (PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext, opcode, false, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 49: - if (PPCRecompilerImlGen_LFSU(ppcImlGenContext, opcode) == false) + case 49: // LFSU + if (PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext, opcode, true, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 50: - if (PPCRecompilerImlGen_LFD(ppcImlGenContext, opcode) == false) + case 50: // LFD + if (PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext, opcode, false, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 51: - if (PPCRecompilerImlGen_LFDU(ppcImlGenContext, opcode) == false) + case 51: // LFDU + if (PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext, opcode, true, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 52: - if (PPCRecompilerImlGen_STFS(ppcImlGenContext, opcode) == false) + case 52: // STFS + if (PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext, opcode, false, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 53: - if (PPCRecompilerImlGen_STFSU(ppcImlGenContext, opcode) == false) + case 53: // STFSU + if (PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext, opcode, true, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 54: - if (PPCRecompilerImlGen_STFD(ppcImlGenContext, opcode) == false) + case 54: // STFD + if (PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext, opcode, false, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; - case 55: - if (PPCRecompilerImlGen_STFDU(ppcImlGenContext, opcode) == false) + case 55: // STFDU + if (PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext, opcode, true, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; case 56: - if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; case 57: - if (PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_PSQ_L(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; @@ -4357,12 +2690,12 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) } break; case 60: - if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode, false) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; case 61: - if (PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext, opcode) == false) + if (PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext, opcode, true) == false) unsupportedInstructionFound = true; ppcImlGenContext->hasFPUInstruction = true; break; @@ -4471,556 +2804,482 @@ bool PPCRecompiler_decodePPCInstruction(ppcImlGenContext_t* ppcImlGenContext) return unsupportedInstructionFound; } -bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses) +// returns false if code flow is not interrupted +bool PPCRecompiler_CheckIfInstructionEndsSegment(PPCFunctionBoundaryTracker& boundaryTracker, uint32 instructionAddress, uint32 opcode, bool& makeNextInstEnterable, bool& continueDefaultPath, bool& hasBranchTarget, uint32& branchTarget) { - //ppcImlGenContext_t ppcImlGenContext = { 0 }; - ppcImlGenContext.functionRef = ppcRecFunc; - // add entire range + hasBranchTarget = false; + branchTarget = 0xFFFFFFFF; + makeNextInstEnterable = false; + continueDefaultPath = false; + switch (Espresso::GetPrimaryOpcode(opcode)) + { + case Espresso::PrimaryOpcode::VIRTUAL_HLE: + { + makeNextInstEnterable = true; + hasBranchTarget = false; + continueDefaultPath = false; + return true; + } + case Espresso::PrimaryOpcode::BC: + { + uint32 BD, BI; + Espresso::BOField BO; + bool AA, LK; + Espresso::decodeOp_BC(opcode, BD, BO, BI, AA, LK); + if (!LK) + { + hasBranchTarget = true; + branchTarget = (AA ? BD : BD) + instructionAddress; + if (!boundaryTracker.ContainsAddress(branchTarget)) + hasBranchTarget = false; // far jump + } + makeNextInstEnterable = LK; + continueDefaultPath = true; + return true; + } + case Espresso::PrimaryOpcode::B: + { + uint32 LI; + bool AA, LK; + Espresso::decodeOp_B(opcode, LI, AA, LK); + if (!LK) + { + hasBranchTarget = true; + branchTarget = AA ? LI : LI + instructionAddress; + if (!boundaryTracker.ContainsAddress(branchTarget)) + hasBranchTarget = false; // far jump + } + makeNextInstEnterable = LK; + continueDefaultPath = false; + return true; + } + case Espresso::PrimaryOpcode::GROUP_19: + switch (Espresso::GetGroup19Opcode(opcode)) + { + case Espresso::Opcode19::BCLR: + case Espresso::Opcode19::BCCTR: + { + Espresso::BOField BO; + uint32 BI; + bool LK; + Espresso::decodeOp_BCSPR(opcode, BO, BI, LK); + continueDefaultPath = !BO.conditionIgnore() || !BO.decrementerIgnore(); // if branch is always taken then there is no continued path + makeNextInstEnterable = Espresso::DecodeLK(opcode); + return true; + } + default: + break; + } + break; + case Espresso::PrimaryOpcode::GROUP_31: + switch (Espresso::GetGroup31Opcode(opcode)) + { + default: + break; + } + break; + default: + break; + } + return false; +} + +void PPCRecompiler_DetermineBasicBlockRange(std::vector& basicBlockList, PPCFunctionBoundaryTracker& boundaryTracker, uint32 ppcStart, uint32 ppcEnd, const std::set& combinedBranchTargets, const std::set& entryAddresses) +{ + cemu_assert_debug(ppcStart <= ppcEnd); + + uint32 currentAddr = ppcStart; + + PPCBasicBlockInfo* curBlockInfo = &basicBlockList.emplace_back(currentAddr, entryAddresses); + + uint32 basicBlockStart = currentAddr; + while (currentAddr <= ppcEnd) + { + curBlockInfo->lastAddress = currentAddr; + uint32 opcode = memory_readU32(currentAddr); + bool nextInstIsEnterable = false; + bool hasBranchTarget = false; + bool hasContinuedFlow = false; + uint32 branchTarget = 0; + if (PPCRecompiler_CheckIfInstructionEndsSegment(boundaryTracker, currentAddr, opcode, nextInstIsEnterable, hasContinuedFlow, hasBranchTarget, branchTarget)) + { + curBlockInfo->hasBranchTarget = hasBranchTarget; + curBlockInfo->branchTarget = branchTarget; + curBlockInfo->hasContinuedFlow = hasContinuedFlow; + // start new basic block, except if this is the last instruction + if (currentAddr >= ppcEnd) + break; + curBlockInfo = &basicBlockList.emplace_back(currentAddr + 4, entryAddresses); + curBlockInfo->isEnterable = curBlockInfo->isEnterable || nextInstIsEnterable; + currentAddr += 4; + continue; + } + currentAddr += 4; + if (currentAddr <= ppcEnd) + { + if (combinedBranchTargets.find(currentAddr) != combinedBranchTargets.end()) + { + // instruction is branch target, start new basic block + curBlockInfo = &basicBlockList.emplace_back(currentAddr, entryAddresses); + } + } + + } +} + +std::vector PPCRecompiler_DetermineBasicBlockRange(PPCFunctionBoundaryTracker& boundaryTracker, const std::set& entryAddresses) +{ + cemu_assert(!entryAddresses.empty()); + std::vector basicBlockList; + + const std::set branchTargets = boundaryTracker.GetBranchTargets(); + auto funcRanges = boundaryTracker.GetRanges(); + + std::set combinedBranchTargets = branchTargets; + combinedBranchTargets.insert(entryAddresses.begin(), entryAddresses.end()); + + for (auto& funcRangeIt : funcRanges) + PPCRecompiler_DetermineBasicBlockRange(basicBlockList, boundaryTracker, funcRangeIt.startAddress, funcRangeIt.startAddress + funcRangeIt.length - 4, combinedBranchTargets, entryAddresses); + + // mark all segments that start at entryAddresses as enterable (debug code for verification, can be removed) + size_t numMarkedEnterable = 0; + for (auto& basicBlockIt : basicBlockList) + { + if (entryAddresses.find(basicBlockIt.startAddress) != entryAddresses.end()) + { + cemu_assert_debug(basicBlockIt.isEnterable); + numMarkedEnterable++; + } + } + cemu_assert_debug(numMarkedEnterable == entryAddresses.size()); + + // todo - inline BL, currently this is done in the instruction handler of BL but this will mean that instruction cycle increasing is ignored + + return basicBlockList; +} + +bool PPCIMLGen_FillBasicBlock(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo) +{ + ppcImlGenContext.currentOutputSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + ppcImlGenContext.currentInstruction = (uint32*)(memory_base + basicBlockInfo.startAddress); + + uint32* firstCurrentInstruction = ppcImlGenContext.currentInstruction; + uint32* endCurrentInstruction = (uint32*)(memory_base + basicBlockInfo.lastAddress); + + while (ppcImlGenContext.currentInstruction <= endCurrentInstruction) + { + uint32 addressOfCurrentInstruction = (uint32)((uint8*)ppcImlGenContext.currentInstruction - memory_base); + ppcImlGenContext.ppcAddressOfCurrentInstruction = addressOfCurrentInstruction; + + if (PPCRecompiler_decodePPCInstruction(&ppcImlGenContext)) + { + cemuLog_logDebug(LogType::Force, "PPCRecompiler: Unsupported instruction at 0x{:08x}", addressOfCurrentInstruction); + ppcImlGenContext.currentOutputSegment = nullptr; + return false; + } + } + ppcImlGenContext.currentOutputSegment = nullptr; + return true; +} + +// returns split segment from which the continued segment is available via seg->GetBranchNotTaken() +IMLSegment* PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo) +{ + IMLSegment* writeSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + + IMLSegment* continuedSegment = ppcImlGenContext.InsertSegment(ppcImlGenContext.GetSegmentIndex(writeSegment) + 1); + + continuedSegment->SetLinkBranchTaken(writeSegment->GetBranchTaken()); + continuedSegment->SetLinkBranchNotTaken(writeSegment->GetBranchNotTaken()); + + writeSegment->SetLinkBranchNotTaken(continuedSegment); + writeSegment->SetLinkBranchTaken(nullptr); + + if (ppcImlGenContext.currentOutputSegment == writeSegment) + ppcImlGenContext.currentOutputSegment = continuedSegment; + + cemu_assert_debug(basicBlockInfo.appendSegment == writeSegment); + basicBlockInfo.appendSegment = continuedSegment; + + return writeSegment; +} + +// generates a new segment and sets it as branch target for the current write segment. Returns the created segment +IMLSegment* PPCIMLGen_CreateNewSegmentAsBranchTarget(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo) +{ + IMLSegment* writeSegment = basicBlockInfo.GetSegmentForInstructionAppend(); + IMLSegment* branchTargetSegment = ppcImlGenContext.NewSegment(); + cemu_assert_debug(!writeSegment->GetBranchTaken()); // must not have a target already + writeSegment->SetLinkBranchTaken(branchTargetSegment); + return branchTargetSegment; +} + +// verify that current instruction is the last instruction of the active basic block +void PPCIMLGen_AssertIfNotLastSegmentInstruction(ppcImlGenContext_t& ppcImlGenContext) +{ + cemu_assert_debug(ppcImlGenContext.currentBasicBlock->lastAddress == ppcImlGenContext.ppcAddressOfCurrentInstruction); +} + +bool PPCRecompiler_IsBasicBlockATightFiniteLoop(IMLSegment* imlSegment, PPCBasicBlockInfo& basicBlockInfo) +{ + // if we detect a finite loop we can skip generating the cycle check + // currently we only check for BDNZ loops since thats reasonably safe to rely on + // however there are other forms of loops that can be classified as finite, + // but detecting those involves analyzing PPC code and we dont have the infrastructure for that (e.g. IML has CheckRegisterUsage but we dont have an equivalent for PPC code) + + // base criteria, must jump to beginning of same segment + if (imlSegment->nextSegmentBranchTaken != imlSegment) + return false; + + uint32 opcode = *(uint32be*)(memory_base + basicBlockInfo.lastAddress); + if (Espresso::GetPrimaryOpcode(opcode) != Espresso::PrimaryOpcode::BC) + return false; + uint32 BO, BI, BD; + PPC_OPC_TEMPL_B(opcode, BO, BI, BD); + Espresso::BOField boField(BO); + if(!boField.conditionIgnore() || boField.branchAlways()) + return false; + if(boField.decrementerIgnore()) + return false; + return true; +} + +void PPCRecompiler_HandleCycleCheckCount(ppcImlGenContext_t& ppcImlGenContext, PPCBasicBlockInfo& basicBlockInfo) +{ + IMLSegment* imlSegment = basicBlockInfo.GetFirstSegmentInChain(); + if (!basicBlockInfo.hasBranchTarget) + return; + if (basicBlockInfo.branchTarget > basicBlockInfo.startAddress) + return; + + if (PPCRecompiler_IsBasicBlockATightFiniteLoop(imlSegment, basicBlockInfo)) + return; + + // make the segment enterable so execution can return after passing a check + basicBlockInfo.GetFirstSegmentInChain()->SetEnterable(basicBlockInfo.startAddress); + + IMLSegment* splitSeg = PPCIMLGen_CreateSplitSegmentAtEnd(ppcImlGenContext, basicBlockInfo); + splitSeg->AppendInstruction()->make_cjump_cycle_check(); + + IMLSegment* exitSegment = ppcImlGenContext.NewSegment(); + splitSeg->SetLinkBranchTaken(exitSegment); + + exitSegment->AppendInstruction()->make_macro(PPCREC_IML_MACRO_LEAVE, basicBlockInfo.startAddress, 0, 0, IMLREG_INVALID); + + cemu_assert_debug(splitSeg->nextSegmentBranchNotTaken); + // let the IML optimizer and RA know that the original segment should be used during analysis for dead code elimination + exitSegment->SetNextSegmentForOverwriteHints(splitSeg->nextSegmentBranchNotTaken); +} + +void PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext_t& ppcImlGenContext) +{ + for (IMLSegment* segIt : ppcImlGenContext.segmentList2) + { + // handle empty segment + if (segIt->imlList.empty()) + { + cemu_assert_debug(segIt->GetBranchNotTaken()); + continue; + } + // check last instruction of segment + IMLInstruction* imlInstruction = segIt->GetLastInstruction(); + if (imlInstruction->type == PPCREC_IML_TYPE_MACRO) + { + auto macroType = imlInstruction->operation; + switch (macroType) + { + case PPCREC_IML_MACRO_B_TO_REG: + case PPCREC_IML_MACRO_BL: + case PPCREC_IML_MACRO_B_FAR: + case PPCREC_IML_MACRO_HLE: + case PPCREC_IML_MACRO_LEAVE: + segIt->nextSegmentIsUncertain = true; + break; + case PPCREC_IML_MACRO_DEBUGBREAK: + case PPCREC_IML_MACRO_COUNT_CYCLES: + break; + default: + cemu_assert_unimplemented(); + } + } + } +} + +bool PPCRecompiler_GenerateIML(ppcImlGenContext_t& ppcImlGenContext, PPCFunctionBoundaryTracker& boundaryTracker, std::set& entryAddresses) +{ + std::vector basicBlockList = PPCRecompiler_DetermineBasicBlockRange(boundaryTracker, entryAddresses); + + // create segments + std::unordered_map addrToBB; + ppcImlGenContext.segmentList2.resize(basicBlockList.size()); + for (size_t i = 0; i < basicBlockList.size(); i++) + { + PPCBasicBlockInfo& basicBlockInfo = basicBlockList[i]; + IMLSegment* seg = new IMLSegment(); + seg->ppcAddress = basicBlockInfo.startAddress; + if(basicBlockInfo.isEnterable) + seg->SetEnterable(basicBlockInfo.startAddress); + ppcImlGenContext.segmentList2[i] = seg; + cemu_assert_debug(addrToBB.find(basicBlockInfo.startAddress) == addrToBB.end()); + basicBlockInfo.SetInitialSegment(seg); + addrToBB.emplace(basicBlockInfo.startAddress, &basicBlockInfo); + } + // link segments + for (size_t i = 0; i < basicBlockList.size(); i++) + { + PPCBasicBlockInfo& bbInfo = basicBlockList[i]; + cemu_assert_debug(bbInfo.GetFirstSegmentInChain() == bbInfo.GetSegmentForInstructionAppend()); + IMLSegment* seg = ppcImlGenContext.segmentList2[i]; + if (bbInfo.hasBranchTarget) + { + PPCBasicBlockInfo* targetBB = addrToBB[bbInfo.branchTarget]; + cemu_assert_debug(targetBB); + IMLSegment_SetLinkBranchTaken(seg, targetBB->GetFirstSegmentInChain()); + } + if (bbInfo.hasContinuedFlow) + { + PPCBasicBlockInfo* targetBB = addrToBB[bbInfo.lastAddress + 4]; + if (!targetBB) + { + cemuLog_log(LogType::Recompiler, "Recompiler was unable to link segment [0x{:08x}-0x{:08x}] to 0x{:08x}", bbInfo.startAddress, bbInfo.lastAddress, bbInfo.lastAddress + 4); + return false; + } + cemu_assert_debug(targetBB); + IMLSegment_SetLinkBranchNotTaken(seg, targetBB->GetFirstSegmentInChain()); + } + } + // we assume that all unreachable segments are potentially enterable + // todo - mark them as such + + + // generate cycle counters + // in theory we could generate these as part of FillBasicBlock() but in the future we might use more complex logic to emit fewer operations + for (size_t i = 0; i < basicBlockList.size(); i++) + { + PPCBasicBlockInfo& basicBlockInfo = basicBlockList[i]; + IMLSegment* seg = basicBlockInfo.GetSegmentForInstructionAppend(); + + uint32 ppcInstructionCount = (basicBlockInfo.lastAddress - basicBlockInfo.startAddress + 4) / 4; + cemu_assert_debug(ppcInstructionCount > 0); + + PPCRecompiler_pushBackIMLInstructions(seg, 0, 1); + seg->imlList[0].type = PPCREC_IML_TYPE_MACRO; + seg->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; + seg->imlList[0].op_macro.param = ppcInstructionCount; + } + + // generate cycle check instructions + // note: Introduces new segments + for (size_t i = 0; i < basicBlockList.size(); i++) + { + PPCBasicBlockInfo& basicBlockInfo = basicBlockList[i]; + PPCRecompiler_HandleCycleCheckCount(ppcImlGenContext, basicBlockInfo); + } + + // fill in all the basic blocks + // note: This step introduces new segments as is necessary for some instructions + for (size_t i = 0; i < basicBlockList.size(); i++) + { + PPCBasicBlockInfo& basicBlockInfo = basicBlockList[i]; + ppcImlGenContext.currentBasicBlock = &basicBlockInfo; + if (!PPCIMLGen_FillBasicBlock(ppcImlGenContext, basicBlockInfo)) + return false; + ppcImlGenContext.currentBasicBlock = nullptr; + } + + // mark segments with unknown jump destination (e.g. BLR and most macros) + PPCRecompiler_SetSegmentsUncertainFlow(ppcImlGenContext); + + // debug - check segment graph +#ifdef CEMU_DEBUG_ASSERT + //for (size_t i = 0; i < basicBlockList.size(); i++) + //{ + // IMLSegment* seg = ppcImlGenContext.segmentList2[i]; + // if (seg->list_prevSegments.empty()) + // { + // cemu_assert_debug(seg->isEnterable); + // } + //} + // debug - check if suffix instructions are at the end of segments and if they are present for branching segments + for (size_t segIndex = 0; segIndex < ppcImlGenContext.segmentList2.size(); segIndex++) + { + IMLSegment* seg = ppcImlGenContext.segmentList2[segIndex]; + IMLSegment* nextSeg = (segIndex+1) < ppcImlGenContext.segmentList2.size() ? ppcImlGenContext.segmentList2[segIndex + 1] : nullptr; + + if (seg->imlList.size() > 0) + { + for (size_t f = 0; f < seg->imlList.size() - 1; f++) + { + if (seg->imlList[f].IsSuffixInstruction()) + { + debug_printf("---------------- SegmentDump (Suffix instruction at wrong pos in segment 0x%x):\n", (int)segIndex); + IMLDebug_Dump(&ppcImlGenContext); + DEBUG_BREAK; + } + } + } + if (seg->nextSegmentBranchTaken) + { + if (!seg->HasSuffixInstruction()) + { + debug_printf("---------------- SegmentDump (NoSuffixInstruction in segment 0x%x):\n", (int)segIndex); + IMLDebug_Dump(&ppcImlGenContext); + DEBUG_BREAK; + } + } + if (seg->nextSegmentBranchNotTaken) + { + // if branch not taken, flow must continue to next segment in sequence + cemu_assert_debug(seg->nextSegmentBranchNotTaken == nextSeg); + } + // more detailed checks based on actual suffix instruction + if (seg->imlList.size() > 0) + { + IMLInstruction* inst = seg->GetLastInstruction(); + if (inst->type == PPCREC_IML_TYPE_MACRO && inst->op_macro.param == PPCREC_IML_MACRO_B_FAR) + { + cemu_assert_debug(!seg->GetBranchTaken()); + cemu_assert_debug(!seg->GetBranchNotTaken()); + } + if (inst->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) + { + cemu_assert_debug(seg->GetBranchTaken()); + cemu_assert_debug(seg->GetBranchNotTaken()); + } + if (inst->type == PPCREC_IML_TYPE_CONDITIONAL_JUMP) + { + if (!seg->GetBranchTaken() || !seg->GetBranchNotTaken()) + { + debug_printf("---------------- SegmentDump (Missing branch for conditional jump in segment 0x%x):\n", (int)segIndex); + IMLDebug_Dump(&ppcImlGenContext); + cemu_assert_error(); + } + } + } + segIndex++; + } +#endif + + + // todos: + // - basic block determination should look for the B(L) B(L) pattern. Or maybe just mark every bb without any input segments as an entry segment + + return true; +} + +bool PPCRecompiler_generateIntermediateCode(ppcImlGenContext_t& ppcImlGenContext, PPCRecFunction_t* ppcRecFunc, std::set& entryAddresses, PPCFunctionBoundaryTracker& boundaryTracker) +{ + ppcImlGenContext.boundaryTracker = &boundaryTracker; + if (!PPCRecompiler_GenerateIML(ppcImlGenContext, boundaryTracker, entryAddresses)) + return false; + + // set range + // todo - support non-continuous functions for the range tracking? ppcRecRange_t recRange; recRange.ppcAddress = ppcRecFunc->ppcAddress; recRange.ppcSize = ppcRecFunc->ppcSize; ppcRecFunc->list_ranges.push_back(recRange); - // process ppc instructions - ppcImlGenContext.currentInstruction = (uint32*)memory_getPointerFromVirtualOffset(ppcRecFunc->ppcAddress); - bool unsupportedInstructionFound = false; - sint32 numPPCInstructions = ppcRecFunc->ppcSize/4; - sint32 unsupportedInstructionCount = 0; - uint32 unsupportedInstructionLastOffset = 0; - uint32* firstCurrentInstruction = ppcImlGenContext.currentInstruction; - uint32* endCurrentInstruction = ppcImlGenContext.currentInstruction + numPPCInstructions; + - while(ppcImlGenContext.currentInstruction < endCurrentInstruction) - { - uint32 addressOfCurrentInstruction = (uint32)((uint8*)ppcImlGenContext.currentInstruction - memory_base); - ppcImlGenContext.ppcAddressOfCurrentInstruction = addressOfCurrentInstruction; - ppcImlGenContext.cyclesSinceLastBranch++; - PPCRecompilerImlGen_generateNewInstruction_jumpmark(&ppcImlGenContext, addressOfCurrentInstruction); - - if (entryAddresses.find(addressOfCurrentInstruction) != entryAddresses.end()) - { - // add PPCEnter for addresses that are in entryAddresses - PPCRecompilerImlGen_generateNewInstruction_ppcEnter(&ppcImlGenContext, addressOfCurrentInstruction); - } - else if(ppcImlGenContext.currentInstruction != firstCurrentInstruction) - { - // add PPCEnter mark if code is seemingly unreachable (for example if between two unconditional jump instructions without jump goal) - uint32 opcodeCurrent = PPCRecompiler_getCurrentInstruction(&ppcImlGenContext); - uint32 opcodePrevious = PPCRecompiler_getPreviousInstruction(&ppcImlGenContext); - if( ((opcodePrevious>>26) == 18) && ((opcodeCurrent>>26) == 18) ) - { - // between two B(L) instructions - // todo: for BL only if they are not inlineable - - bool canInlineFunction = false; - if ((opcodePrevious & PPC_OPC_LK) && (opcodePrevious & PPC_OPC_AA) == 0) - { - uint32 li; - PPC_OPC_TEMPL_I(opcodePrevious, li); - sint32 inlineSize = 0; - if (PPCRecompiler_canInlineFunction(li + addressOfCurrentInstruction - 4, &inlineSize)) - canInlineFunction = true; - } - if( canInlineFunction == false && (opcodePrevious & PPC_OPC_LK) == false) - PPCRecompilerImlGen_generateNewInstruction_ppcEnter(&ppcImlGenContext, addressOfCurrentInstruction); - } - if( ((opcodePrevious>>26) == 19) && PPC_getBits(opcodePrevious, 30, 10) == 528 ) - { - uint32 BO, BI, BD; - PPC_OPC_TEMPL_XL(opcodePrevious, BO, BI, BD); - if( (BO & 16) && (opcodePrevious&PPC_OPC_LK) == 0 ) - { - // after unconditional BCTR instruction - PPCRecompilerImlGen_generateNewInstruction_ppcEnter(&ppcImlGenContext, addressOfCurrentInstruction); - } - } - } - - unsupportedInstructionFound = PPCRecompiler_decodePPCInstruction(&ppcImlGenContext); - if( unsupportedInstructionFound ) - { - unsupportedInstructionCount++; - unsupportedInstructionLastOffset = ppcImlGenContext.ppcAddressOfCurrentInstruction; - unsupportedInstructionFound = false; - //break; - } - } - ppcImlGenContext.ppcAddressOfCurrentInstruction = 0; // reset current instruction offset (any future generated IML instruction will be assigned to ppc address 0) - if( unsupportedInstructionCount > 0 || unsupportedInstructionFound ) - { - // could not compile function - debug_printf("Failed recompile due to unknown instruction at 0x%08x\n", unsupportedInstructionLastOffset); - PPCRecompiler_freeContext(&ppcImlGenContext); - return false; - } - // optimize unused jumpmarks away - // first, flag all jumpmarks as unused - std::map map_jumpMarks; - for(sint32 i=0; isecond->op_jumpmark.flags &= ~PPCREC_IML_OP_FLAG_UNUSED; - } - } - // lastly, remove jumpmarks that still have the unused flag set - sint32 currentImlIndex = 0; - for(sint32 i=0; i end of segment after current instruction - // If we encounter a jumpmark -> end of segment before current instruction - // If we encounter ppc_enter -> end of segment before current instruction - if( ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_CJUMP || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BLR || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BLRL || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BCTR || ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BCTRL)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_BL)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_B_FAR)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_LEAVE)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_HLE)) || - (ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_MACRO && (ppcImlGenContext.imlList[segmentImlIndex].operation == PPCREC_IML_MACRO_MFTB)) ) - { - // segment ends after current instruction - PPCRecImlSegment_t* ppcRecSegment = PPCRecompiler_generateImlSegment(&ppcImlGenContext); - ppcRecSegment->startOffset = segmentStart; - ppcRecSegment->count = segmentImlIndex-segmentStart+1; - ppcRecSegment->ppcAddress = 0xFFFFFFFF; - segmentStart = segmentImlIndex+1; - } - else if( ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_JUMPMARK || - ppcImlGenContext.imlList[segmentImlIndex].type == PPCREC_IML_TYPE_PPC_ENTER ) - { - // segment ends before current instruction - if( segmentImlIndex > segmentStart ) - { - PPCRecImlSegment_t* ppcRecSegment = PPCRecompiler_generateImlSegment(&ppcImlGenContext); - ppcRecSegment->startOffset = segmentStart; - ppcRecSegment->count = segmentImlIndex-segmentStart; - ppcRecSegment->ppcAddress = 0xFFFFFFFF; - segmentStart = segmentImlIndex; - } - } - segmentImlIndex++; - } - if( segmentImlIndex != segmentStart ) - { - // final segment - PPCRecImlSegment_t* ppcRecSegment = PPCRecompiler_generateImlSegment(&ppcImlGenContext); - ppcRecSegment->startOffset = segmentStart; - ppcRecSegment->count = segmentImlIndex-segmentStart; - ppcRecSegment->ppcAddress = 0xFFFFFFFF; - segmentStart = segmentImlIndex; - } - // move iml instructions into the segments - for(sint32 s=0; sstartOffset; - uint32 imlCount = ppcImlGenContext.segmentList[s]->count; - if( imlCount > 0 ) - { - ppcImlGenContext.segmentList[s]->imlListSize = imlCount + 4; - ppcImlGenContext.segmentList[s]->imlList = (PPCRecImlInstruction_t*)malloc(sizeof(PPCRecImlInstruction_t)*ppcImlGenContext.segmentList[s]->imlListSize); - ppcImlGenContext.segmentList[s]->imlListCount = imlCount; - memcpy(ppcImlGenContext.segmentList[s]->imlList, ppcImlGenContext.imlList+imlStartIndex, sizeof(PPCRecImlInstruction_t)*imlCount); - } - else - { - // empty segments are allowed so we can handle multiple PPC entry addresses pointing to the same code - ppcImlGenContext.segmentList[s]->imlList = NULL; - ppcImlGenContext.segmentList[s]->imlListSize = 0; - ppcImlGenContext.segmentList[s]->imlListCount = 0; - } - ppcImlGenContext.segmentList[s]->startOffset = 9999999; - ppcImlGenContext.segmentList[s]->count = 9999999; - } - // clear segment-independent iml list - free(ppcImlGenContext.imlList); - ppcImlGenContext.imlList = NULL; - ppcImlGenContext.imlListCount = 999999; // set to high number to force crash in case old code still uses ppcImlGenContext.imlList - // calculate PPC address of each segment based on iml instructions inside that segment (we need this info to calculate how many cpu cycles each segment takes) - for(sint32 s=0; simlListCount; i++) - { - if( ppcImlGenContext.segmentList[s]->imlList[i].associatedPPCAddress == 0 ) - continue; - //if( ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_JUMPMARK || ppcImlGenContext.segmentList[s]->imlList[i].type == PPCREC_IML_TYPE_NO_OP ) - // continue; // jumpmarks and no-op instructions must not affect segment ppc address range - segmentPPCAddrMin = std::min(ppcImlGenContext.segmentList[s]->imlList[i].associatedPPCAddress, segmentPPCAddrMin); - segmentPPCAddrMax = std::max(ppcImlGenContext.segmentList[s]->imlList[i].associatedPPCAddress, segmentPPCAddrMax); - } - if( segmentPPCAddrMin != 0xFFFFFFFF ) - { - ppcImlGenContext.segmentList[s]->ppcAddrMin = segmentPPCAddrMin; - ppcImlGenContext.segmentList[s]->ppcAddrMax = segmentPPCAddrMax; - } - else - { - ppcImlGenContext.segmentList[s]->ppcAddrMin = 0; - ppcImlGenContext.segmentList[s]->ppcAddrMax = 0; - } - } - // certain instructions can change the segment state - // ppcEnter instruction marks a segment as enterable (BL, BCTR, etc. instructions can enter at this location from outside) - // jumpmarks mark the segment as a jump destination (within the same function) - for(sint32 s=0; simlListCount > 0 ) - { - if( ppcImlGenContext.segmentList[s]->imlList[0].type == PPCREC_IML_TYPE_PPC_ENTER ) - { - // mark segment as enterable - if( ppcImlGenContext.segmentList[s]->isEnterable ) - assert_dbg(); // should not happen? - ppcImlGenContext.segmentList[s]->isEnterable = true; - ppcImlGenContext.segmentList[s]->enterPPCAddress = ppcImlGenContext.segmentList[s]->imlList[0].op_ppcEnter.ppcAddress; - // remove ppc_enter instruction - ppcImlGenContext.segmentList[s]->imlList[0].type = PPCREC_IML_TYPE_NO_OP; - ppcImlGenContext.segmentList[s]->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - ppcImlGenContext.segmentList[s]->imlList[0].associatedPPCAddress = 0; - } - else if( ppcImlGenContext.segmentList[s]->imlList[0].type == PPCREC_IML_TYPE_JUMPMARK ) - { - // mark segment as jump destination - if( ppcImlGenContext.segmentList[s]->isJumpDestination ) - assert_dbg(); // should not happen? - ppcImlGenContext.segmentList[s]->isJumpDestination = true; - ppcImlGenContext.segmentList[s]->jumpDestinationPPCAddress = ppcImlGenContext.segmentList[s]->imlList[0].op_jumpmark.address; - // remove jumpmark instruction - ppcImlGenContext.segmentList[s]->imlList[0].type = PPCREC_IML_TYPE_NO_OP; - ppcImlGenContext.segmentList[s]->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - ppcImlGenContext.segmentList[s]->imlList[0].associatedPPCAddress = 0; - } - else - break; - } - } - // the first segment is always enterable as the recompiled functions entrypoint - ppcImlGenContext.segmentList[0]->isEnterable = true; - ppcImlGenContext.segmentList[0]->enterPPCAddress = ppcImlGenContext.functionRef->ppcAddress; - - // link segments for further inter-segment optimization - PPCRecompilerIML_linkSegments(&ppcImlGenContext); - - // optimization pass - replace segments with conditional MOVs if possible - for (sint32 s = 0; s < ppcImlGenContext.segmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s]; - if (imlSegment->nextSegmentBranchNotTaken == NULL || imlSegment->nextSegmentBranchTaken == NULL) - continue; // not a branching segment - PPCRecImlInstruction_t* lastInstruction = PPCRecompilerIML_getLastInstruction(imlSegment); - if (lastInstruction->type != PPCREC_IML_TYPE_CJUMP || lastInstruction->op_conditionalJump.crRegisterIndex != 0) - continue; - PPCRecImlSegment_t* conditionalSegment = imlSegment->nextSegmentBranchNotTaken; - PPCRecImlSegment_t* finalSegment = imlSegment->nextSegmentBranchTaken; - if(imlSegment->nextSegmentBranchTaken != imlSegment->nextSegmentBranchNotTaken->nextSegmentBranchNotTaken) - continue; - if (imlSegment->nextSegmentBranchNotTaken->imlListCount > 4) - continue; - if(conditionalSegment->list_prevSegments.size() != 1) - continue; // the reduced segment must not be the target of any other branch - if(conditionalSegment->isEnterable) - continue; - // check if the segment contains only iml instructions that can be turned into conditional moves (Value assignment, register assignment) - bool canReduceSegment = true; - for (sint32 f = 0; f < conditionalSegment->imlListCount; f++) - { - PPCRecImlInstruction_t* imlInstruction = conditionalSegment->imlList+f; - if( imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - continue; - // todo: Register to register copy - canReduceSegment = false; - break; - } - - if( canReduceSegment == false ) - continue; - - // remove the branch instruction - uint8 branchCond_crRegisterIndex = lastInstruction->op_conditionalJump.crRegisterIndex; - uint8 branchCond_crBitIndex = lastInstruction->op_conditionalJump.crBitIndex; - bool branchCond_bitMustBeSet = lastInstruction->op_conditionalJump.bitMustBeSet; - - PPCRecompilerImlGen_generateNewInstruction_noOp(&ppcImlGenContext, lastInstruction); - - // append conditional moves based on branch condition - for (sint32 f = 0; f < conditionalSegment->imlListCount; f++) - { - PPCRecImlInstruction_t* imlInstruction = conditionalSegment->imlList + f; - if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - PPCRecompilerImlGen_generateNewInstruction_conditional_r_s32(&ppcImlGenContext, PPCRecompiler_appendInstruction(imlSegment), PPCREC_IML_OP_ASSIGN, imlInstruction->op_r_immS32.registerIndex, imlInstruction->op_r_immS32.immS32, branchCond_crRegisterIndex, branchCond_crBitIndex, !branchCond_bitMustBeSet); - else - assert_dbg(); - } - // update segment links - // source segment: imlSegment, conditional/removed segment: conditionalSegment, final segment: finalSegment - PPCRecompilerIML_removeLink(imlSegment, conditionalSegment); - PPCRecompilerIML_removeLink(imlSegment, finalSegment); - PPCRecompilerIML_removeLink(conditionalSegment, finalSegment); - PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, finalSegment); - // remove all instructions from conditional segment - conditionalSegment->imlListCount = 0; - - // if possible, merge imlSegment with finalSegment - if (finalSegment->isEnterable == false && finalSegment->list_prevSegments.size() == 1) - { - // todo: Clean this up and move into separate function PPCRecompilerIML_mergeSegments() - PPCRecompilerIML_removeLink(imlSegment, finalSegment); - if (finalSegment->nextSegmentBranchNotTaken) - { - PPCRecImlSegment_t* tempSegment = finalSegment->nextSegmentBranchNotTaken; - PPCRecompilerIML_removeLink(finalSegment, tempSegment); - PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, tempSegment); - } - if (finalSegment->nextSegmentBranchTaken) - { - PPCRecImlSegment_t* tempSegment = finalSegment->nextSegmentBranchTaken; - PPCRecompilerIML_removeLink(finalSegment, tempSegment); - PPCRecompilerIml_setLinkBranchTaken(imlSegment, tempSegment); - } - // copy IML instructions - for (sint32 f = 0; f < finalSegment->imlListCount; f++) - { - memcpy(PPCRecompiler_appendInstruction(imlSegment), finalSegment->imlList + f, sizeof(PPCRecImlInstruction_t)); - } - finalSegment->imlListCount = 0; - - //PPCRecompiler_dumpIML(ppcRecFunc, &ppcImlGenContext); - } - - // todo: If possible, merge with the segment following conditionalSegment (merging is only possible if the segment is not an entry point or has no other jump sources) - } - - // insert cycle counter instruction in every segment that has a cycle count greater zero - for(sint32 s=0; sppcAddrMin == 0 ) - continue; - // count number of PPC instructions in segment - // note: This algorithm correctly counts inlined functions but it doesn't count NO-OP instructions like ISYNC - uint32 lastPPCInstAddr = 0; - uint32 ppcCount2 = 0; - for (sint32 i = 0; i < imlSegment->imlListCount; i++) - { - if (imlSegment->imlList[i].associatedPPCAddress == 0) - continue; - if (imlSegment->imlList[i].associatedPPCAddress == lastPPCInstAddr) - continue; - lastPPCInstAddr = imlSegment->imlList[i].associatedPPCAddress; - ppcCount2++; - } - //uint32 ppcCount = imlSegment->ppcAddrMax-imlSegment->ppcAddrMin+4; -> No longer works with inlined functions - uint32 cycleCount = ppcCount2;// ppcCount / 4; - if( cycleCount > 0 ) - { - PPCRecompiler_pushBackIMLInstructions(imlSegment, 0, 1); - imlSegment->imlList[0].type = PPCREC_IML_TYPE_MACRO; - imlSegment->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - imlSegment->imlList[0].operation = PPCREC_IML_MACRO_COUNT_CYCLES; - imlSegment->imlList[0].op_macro.param = cycleCount; - } - } - - // find segments that have a (conditional) jump instruction that points in reverse direction of code flow - // for these segments there is a risk that the recompiler could get trapped in an infinite busy loop. - // todo: We should do a loop-detection prepass where we flag segments that are actually in a loop. We can then use this information below to avoid generating the scheduler-exit code for segments that aren't actually in a loop despite them referencing an earlier segment (which could be an exit segment for example) - uint32 currentLoopEscapeJumpMarker = 0xFF000000; // start in an area where no valid code can be located - for(sint32 s=0; sppcAddrMin which isn't really reliable. (We already had a problem where function inlining would generate falsified segment ranges by omitting the branch instruction). Find a better solution (use jumpmark/enterable offsets?) - PPCRecImlSegment_t* imlSegment = ppcImlGenContext.segmentList[s]; - if( imlSegment->imlListCount == 0 ) - continue; - if (imlSegment->imlList[imlSegment->imlListCount - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlListCount - 1].op_conditionalJump.jumpmarkAddress > imlSegment->ppcAddrMin) - continue; - if (imlSegment->imlList[imlSegment->imlListCount - 1].type != PPCREC_IML_TYPE_CJUMP || imlSegment->imlList[imlSegment->imlListCount - 1].op_conditionalJump.jumpAccordingToSegment) - continue; - // exclude non-infinite tight loops - if (PPCRecompilerImlAnalyzer_isTightFiniteLoop(imlSegment)) - continue; - // potential loop segment found, split this segment into four: - // P0: This segment checks if the remaining cycles counter is still above zero. If yes, it jumps to segment P2 (it's also the jump destination for other segments) - // P1: This segment consists only of a single ppc_leave instruction and is usually skipped. Register unload instructions are later inserted here. - // P2: This segment contains the iml instructions of the original segment - // PEntry: This segment is used to enter the function, it jumps to P0 - // All segments are considered to be part of the same PPC instruction range - // The first segment also retains the jump destination and enterable properties from the original segment. - //debug_printf("--- Insert cycle counter check ---\n"); - //PPCRecompiler_dumpIML(ppcRecFunc, &ppcImlGenContext); - - PPCRecompilerIml_insertSegments(&ppcImlGenContext, s, 2); - imlSegment = NULL; - PPCRecImlSegment_t* imlSegmentP0 = ppcImlGenContext.segmentList[s+0]; - PPCRecImlSegment_t* imlSegmentP1 = ppcImlGenContext.segmentList[s+1]; - PPCRecImlSegment_t* imlSegmentP2 = ppcImlGenContext.segmentList[s+2]; - // create entry point segment - PPCRecompilerIml_insertSegments(&ppcImlGenContext, ppcImlGenContext.segmentListCount, 1); - PPCRecImlSegment_t* imlSegmentPEntry = ppcImlGenContext.segmentList[ppcImlGenContext.segmentListCount-1]; - // relink segments - PPCRecompilerIML_relinkInputSegment(imlSegmentP2, imlSegmentP0); - PPCRecompilerIml_setLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); - PPCRecompilerIml_setLinkBranchTaken(imlSegmentP0, imlSegmentP2); - PPCRecompilerIml_setLinkBranchTaken(imlSegmentPEntry, imlSegmentP0); - // update segments - uint32 enterPPCAddress = imlSegmentP2->ppcAddrMin; - if (imlSegmentP2->isEnterable) - enterPPCAddress = imlSegmentP2->enterPPCAddress; - imlSegmentP0->ppcAddress = 0xFFFFFFFF; - imlSegmentP1->ppcAddress = 0xFFFFFFFF; - imlSegmentP2->ppcAddress = 0xFFFFFFFF; - cemu_assert_debug(imlSegmentP2->ppcAddrMin != 0); - // move segment properties from segment P2 to segment P0 - imlSegmentP0->isJumpDestination = imlSegmentP2->isJumpDestination; - imlSegmentP0->jumpDestinationPPCAddress = imlSegmentP2->jumpDestinationPPCAddress; - imlSegmentP0->isEnterable = false; - //imlSegmentP0->enterPPCAddress = imlSegmentP2->enterPPCAddress; - imlSegmentP0->ppcAddrMin = imlSegmentP2->ppcAddrMin; - imlSegmentP0->ppcAddrMax = imlSegmentP2->ppcAddrMax; - imlSegmentP2->isJumpDestination = false; - imlSegmentP2->jumpDestinationPPCAddress = 0; - imlSegmentP2->isEnterable = false; - imlSegmentP2->enterPPCAddress = 0; - imlSegmentP2->ppcAddrMin = 0; - imlSegmentP2->ppcAddrMax = 0; - // setup enterable segment - if( enterPPCAddress != 0 && enterPPCAddress != 0xFFFFFFFF ) - { - imlSegmentPEntry->isEnterable = true; - imlSegmentPEntry->ppcAddress = enterPPCAddress; - imlSegmentPEntry->enterPPCAddress = enterPPCAddress; - } - // assign new jumpmark to segment P2 - imlSegmentP2->isJumpDestination = true; - imlSegmentP2->jumpDestinationPPCAddress = currentLoopEscapeJumpMarker; - currentLoopEscapeJumpMarker++; - // create ppc_leave instruction in segment P1 - PPCRecompiler_pushBackIMLInstructions(imlSegmentP1, 0, 1); - imlSegmentP1->imlList[0].type = PPCREC_IML_TYPE_MACRO; - imlSegmentP1->imlList[0].operation = PPCREC_IML_MACRO_LEAVE; - imlSegmentP1->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - imlSegmentP1->imlList[0].op_macro.param = imlSegmentP0->ppcAddrMin; - imlSegmentP1->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; - // create cycle-based conditional instruction in segment P0 - PPCRecompiler_pushBackIMLInstructions(imlSegmentP0, 0, 1); - imlSegmentP0->imlList[0].type = PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK; - imlSegmentP0->imlList[0].operation = 0; - imlSegmentP0->imlList[0].crRegister = PPC_REC_INVALID_REGISTER; - imlSegmentP0->imlList[0].op_conditionalJump.jumpmarkAddress = imlSegmentP2->jumpDestinationPPCAddress; - imlSegmentP0->imlList[0].associatedPPCAddress = imlSegmentP0->ppcAddrMin; - // jump instruction for PEntry - PPCRecompiler_pushBackIMLInstructions(imlSegmentPEntry, 0, 1); - PPCRecompilerImlGen_generateNewInstruction_jumpSegment(&ppcImlGenContext, imlSegmentPEntry->imlList + 0); - - // skip the newly created segments - s += 2; - } - - // isolate entry points from function flow (enterable segments must not be the target of any other segment) - // this simplifies logic during register allocation - PPCRecompilerIML_isolateEnterableSegments(&ppcImlGenContext); - - // if GQRs can be predicted, optimize PSQ load/stores - PPCRecompiler_optimizePSQLoadAndStore(&ppcImlGenContext); - - // count number of used registers - uint32 numLoadedFPRRegisters = 0; - for(uint32 i=0; i<255; i++) - { - if( ppcImlGenContext.mappedFPRRegister[i] ) - numLoadedFPRRegisters++; - } - - // insert name store instructions at the end of each segment but before branch instructions - for(sint32 s=0; simlListCount == 0 ) - continue; // ignore empty segments - // analyze segment for register usage - PPCImlOptimizerUsedRegisters_t registersUsed; - for(sint32 i=0; iimlListCount; i++) - { - PPCRecompiler_checkRegisterUsage(&ppcImlGenContext, imlSegment->imlList+i, ®istersUsed); - //PPCRecompilerImlGen_findRegisterByMappedName(ppcImlGenContext, registersUsed.readGPR1); - sint32 accessedTempReg[5]; - // intermediate FPRs - accessedTempReg[0] = registersUsed.readFPR1; - accessedTempReg[1] = registersUsed.readFPR2; - accessedTempReg[2] = registersUsed.readFPR3; - accessedTempReg[3] = registersUsed.readFPR4; - accessedTempReg[4] = registersUsed.writtenFPR1; - for(sint32 f=0; f<5; f++) - { - if( accessedTempReg[f] == -1 ) - continue; - uint32 regName = ppcImlGenContext.mappedFPRRegister[accessedTempReg[f]]; - if( regName >= PPCREC_NAME_FPR0 && regName < PPCREC_NAME_FPR0+32 ) - { - imlSegment->ppcFPRUsed[regName - PPCREC_NAME_FPR0] = true; - } - } - } - } - - // merge certain float load+store patterns (must happen before FPR register remapping) - PPCRecompiler_optimizeDirectFloatCopies(&ppcImlGenContext); - // delay byte swapping for certain load+store patterns - PPCRecompiler_optimizeDirectIntegerCopies(&ppcImlGenContext); - - if (numLoadedFPRRegisters > 0) - { - if (PPCRecompiler_manageFPRRegisters(&ppcImlGenContext) == false) - { - PPCRecompiler_freeContext(&ppcImlGenContext); - return false; - } - } - - PPCRecompilerImm_allocateRegisters(&ppcImlGenContext); - - // remove redundant name load and store instructions - PPCRecompiler_reorderConditionModifyInstructions(&ppcImlGenContext); - PPCRecompiler_removeRedundantCRUpdates(&ppcImlGenContext); return true; } diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp index e30a9683..6e602b47 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlGenFPU.cpp @@ -1,198 +1,85 @@ +#include "Cafe/HW/Espresso/EspressoISA.h" #include "../Interpreter/PPCInterpreterInternal.h" #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" #include "Cafe/GameProfile/GameProfile.h" +#include "IML/IML.h" -bool hasSSE1Support = true; -bool hasSSE2Support = true; -bool hasSSE3Support = true; -bool hasLZCNTSupport = false; -bool hasMOVBESupport = false; -bool hasBMI2Support = false; -bool hasAVXSupport = false; +ATTR_MS_ABI double frsqrte_espresso(double input); +ATTR_MS_ABI double fres_espresso(double input); -void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER) +IMLReg _GetRegCR(ppcImlGenContext_t* ppcImlGenContext, uint8 crReg, uint8 crBit); + +#define DefinePS0(name, regIndex) IMLReg name = _GetFPRRegPS0(ppcImlGenContext, regIndex); +#define DefinePS1(name, regIndex) IMLReg name = _GetFPRRegPS1(ppcImlGenContext, regIndex); +#define DefinePSX(name, regIndex, isPS1) IMLReg name = isPS1 ? _GetFPRRegPS1(ppcImlGenContext, regIndex) : _GetFPRRegPS0(ppcImlGenContext, regIndex); +#define DefineTempFPR(name, index) IMLReg name = _GetFPRTemp(ppcImlGenContext, index); + +IMLReg _GetFPRRegPS0(ppcImlGenContext_t* ppcImlGenContext, uint32 regIndex) { - // load from memory - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_FPR_LOAD; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->operation = 0; - imlInstruction->op_storeLoad.registerData = registerDestination; - imlInstruction->op_storeLoad.registerMem = registerMemory; - imlInstruction->op_storeLoad.registerGQR = registerGQR; - imlInstruction->op_storeLoad.immS32 = immS32; - imlInstruction->op_storeLoad.mode = mode; - imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; + cemu_assert_debug(regIndex < 32); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_FPR_HALF + regIndex * 2 + 0, IMLRegFormat::F64); } -void PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerDestination, uint8 registerMemory1, uint8 registerMemory2, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER) +IMLReg _GetFPRRegPS1(ppcImlGenContext_t* ppcImlGenContext, uint32 regIndex) { - // load from memory - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_FPR_LOAD_INDEXED; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->operation = 0; - imlInstruction->op_storeLoad.registerData = registerDestination; - imlInstruction->op_storeLoad.registerMem = registerMemory1; - imlInstruction->op_storeLoad.registerMem2 = registerMemory2; - imlInstruction->op_storeLoad.registerGQR = registerGQR; - imlInstruction->op_storeLoad.immS32 = 0; - imlInstruction->op_storeLoad.mode = mode; - imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; + cemu_assert_debug(regIndex < 32); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_FPR_HALF + regIndex * 2 + 1, IMLRegFormat::F64); } -void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = PPC_REC_INVALID_REGISTER) +IMLReg _GetFPRTemp(ppcImlGenContext_t* ppcImlGenContext, uint32 index) { - // store to memory - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_FPR_STORE; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->operation = 0; - imlInstruction->op_storeLoad.registerData = registerSource; - imlInstruction->op_storeLoad.registerMem = registerMemory; - imlInstruction->op_storeLoad.registerGQR = registerGQR; - imlInstruction->op_storeLoad.immS32 = immS32; - imlInstruction->op_storeLoad.mode = mode; - imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; + cemu_assert_debug(index < 4); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0 + index, IMLRegFormat::F64); } -void PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext_t* ppcImlGenContext, uint8 registerSource, uint8 registerMemory1, uint8 registerMemory2, sint32 immS32, uint32 mode, bool switchEndian, uint8 registerGQR = 0) +IMLReg _GetFPRReg(ppcImlGenContext_t* ppcImlGenContext, uint32 regIndex, bool selectPS1) { - // store to memory - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_FPR_STORE_INDEXED; - imlInstruction->crRegister = PPC_REC_INVALID_REGISTER; - imlInstruction->operation = 0; - imlInstruction->op_storeLoad.registerData = registerSource; - imlInstruction->op_storeLoad.registerMem = registerMemory1; - imlInstruction->op_storeLoad.registerMem2 = registerMemory2; - imlInstruction->op_storeLoad.registerGQR = registerGQR; - imlInstruction->op_storeLoad.immS32 = immS32; - imlInstruction->op_storeLoad.mode = mode; - imlInstruction->op_storeLoad.flags2.swapEndian = switchEndian; + cemu_assert_debug(regIndex < 32); + return PPCRecompilerImlGen_LookupReg(ppcImlGenContext, PPCREC_NAME_FPR_HALF + regIndex * 2 + (selectPS1 ? 1 : 0), IMLRegFormat::F64); } -void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext_t* ppcImlGenContext, sint32 operation, uint8 registerResult, uint8 registerOperand, sint32 crRegister=PPC_REC_INVALID_REGISTER) +void PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, IMLReg fprRegister, bool flushDenormals=false) { - // fpr OP fpr - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_FPR_R_R; - imlInstruction->operation = operation; - imlInstruction->op_fpr_r_r.registerResult = registerResult; - imlInstruction->op_fpr_r_r.registerOperand = registerOperand; - imlInstruction->crRegister = crRegister; - imlInstruction->op_fpr_r_r.flags = 0; -} - -void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext_t* ppcImlGenContext, sint32 operation, uint8 registerResult, uint8 registerOperand1, uint8 registerOperand2, sint32 crRegister=PPC_REC_INVALID_REGISTER) -{ - // fpr = OP (fpr,fpr) - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_FPR_R_R_R; - imlInstruction->operation = operation; - imlInstruction->op_fpr_r_r_r.registerResult = registerResult; - imlInstruction->op_fpr_r_r_r.registerOperandA = registerOperand1; - imlInstruction->op_fpr_r_r_r.registerOperandB = registerOperand2; - imlInstruction->crRegister = crRegister; - imlInstruction->op_fpr_r_r_r.flags = 0; -} - -void PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext_t* ppcImlGenContext, sint32 operation, uint8 registerResult, uint8 registerOperandA, uint8 registerOperandB, uint8 registerOperandC, sint32 crRegister=PPC_REC_INVALID_REGISTER) -{ - // fpr = OP (fpr,fpr,fpr) - PPCRecImlInstruction_t* imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_FPR_R_R_R_R; - imlInstruction->operation = operation; - imlInstruction->op_fpr_r_r_r_r.registerResult = registerResult; - imlInstruction->op_fpr_r_r_r_r.registerOperandA = registerOperandA; - imlInstruction->op_fpr_r_r_r_r.registerOperandB = registerOperandB; - imlInstruction->op_fpr_r_r_r_r.registerOperandC = registerOperandC; - imlInstruction->crRegister = crRegister; - imlInstruction->op_fpr_r_r_r_r.flags = 0; -} - -void PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 operation, uint8 registerResult, sint32 crRegister) -{ - // OP (fpr) - if(imlInstruction == NULL) - imlInstruction = PPCRecompilerImlGen_generateNewEmptyInstruction(ppcImlGenContext); - imlInstruction->type = PPCREC_IML_TYPE_FPR_R; - imlInstruction->operation = operation; - imlInstruction->op_fpr_r.registerResult = registerResult; - imlInstruction->crRegister = crRegister; -} - -/* - * Rounds the bottom double to single precision (if single precision accuracy is emulated) - */ -void PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, uint32 fprRegister, bool flushDenormals=false) -{ - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL, PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprRegister); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprRegister); if( flushDenormals ) assert_dbg(); } -/* - * Rounds pair of doubles to single precision (if single precision accuracy is emulated) - */ -void PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext_t* ppcImlGenContext, uint32 fprRegister, bool flushDenormals=false) +bool PPCRecompilerImlGen_LFS_LFSU_LFD_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL, PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR, fprRegister); - if( flushDenormals ) - assert_dbg(); -} - -bool PPCRecompilerImlGen_LFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - if( hasSSE1Support == false ) - return false; sint32 rA, frD; uint32 imm; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - if( ppcImlGenContext->LSQE ) + IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + if (withUpdate) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, imm, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, true); + // add imm to memory register + cemu_assert_debug(rA != 0); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); + imm = 0; // set imm to 0 so we dont add it twice + } + DefinePS0(fpPs0, frD); + if (isDouble) + { + // LFD/LFDU + ppcImlGenContext->emitInst().make_fpr_r_memory(fpPs0, gprRegister, imm, PPCREC_FPR_LD_MODE_DOUBLE, true); } else { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, imm, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); + // LFS/LFSU + ppcImlGenContext->emitInst().make_fpr_r_memory(fpPs0, gprRegister, imm, PPCREC_FPR_LD_MODE_SINGLE, true); + if( ppcImlGenContext->LSQE ) + { + DefinePS1(fpPs1, frD); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fpPs1, fpPs0); + } } return true; } -bool PPCRecompilerImlGen_LFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_LFSX_LFSUX_LFDX_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble) { - if( hasSSE1Support == false ) - return false; - sint32 rA, frD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - if( ppcImlGenContext->LSQE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, 0, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, true); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, 0, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); - } - return true; -} - -bool PPCRecompilerImlGen_LFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - if( hasSSE2Support == false ) - return false; sint32 rA, frD, rB; PPC_OPC_TEMPL_X(opcode, frD, rA, rB); if( rA == 0 ) @@ -201,171 +88,54 @@ bool PPCRecompilerImlGen_LFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod return false; } // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - if( ppcImlGenContext->LSQE ) + IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + if (withUpdate) + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); + DefinePS0(fpPs0, frD); + if (isDouble) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, true); + if (withUpdate) + ppcImlGenContext->emitInst().make_fpr_r_memory(fpPs0, gprRegister1, 0, PPCREC_FPR_LD_MODE_DOUBLE, true); + else + ppcImlGenContext->emitInst().make_fpr_r_memory_indexed(fpPs0, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_DOUBLE, true); } else { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); + if (withUpdate) + ppcImlGenContext->emitInst().make_fpr_r_memory( fpPs0, gprRegister1, 0, PPCREC_FPR_LD_MODE_SINGLE, true); + else + ppcImlGenContext->emitInst().make_fpr_r_memory_indexed( fpPs0, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_SINGLE, true); + if( ppcImlGenContext->LSQE ) + { + DefinePS1(fpPs1, frD); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fpPs1, fpPs0); + } } return true; } -bool PPCRecompilerImlGen_LFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_STFS_STFSU_STFD_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate, bool isDouble) { - if( hasSSE2Support == false ) - return false; - sint32 rA, frD, rB; - PPC_OPC_TEMPL_X(opcode, frD, rA, rB); - if( rA == 0 ) + sint32 rA, frD; + uint32 imm; + PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); + IMLReg gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + DefinePS0(fpPs0, frD); + if (withUpdate) { - debugBreakpoint(); - return false; - } - // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // add rB to rA (if rA != 0) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegister1, gprRegister2); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - if( ppcImlGenContext->LSQE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister1, 0, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1, true); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprRegister, gprRegister, (sint32)imm); + imm = 0; } + if (isDouble) + ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister, imm, PPCREC_FPR_ST_MODE_DOUBLE, true); else - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister1, 0, PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0, true); - } + ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister, imm, PPCREC_FPR_ST_MODE_SINGLE, true); return true; } -bool PPCRecompilerImlGen_LFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompilerImlGen_STFSX_STFSUX_STFDX_STFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool hasUpdate, bool isDouble) { - if( hasSSE1Support == false ) - return false; - sint32 rA, frD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - if( rA == 0 ) - { - assert_dbg(); - } - // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, imm, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_LFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - if( hasSSE1Support == false ) - return false; - sint32 rA, frD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - if( rA == 0 ) - { - assert_dbg(); - } - // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // emit load iml - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, 0, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_LFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - if( hasSSE2Support == false ) - return false; - sint32 rA, frD, rB; - PPC_OPC_TEMPL_X(opcode, frD, rA, rB); - if( rA == 0 ) - { - debugBreakpoint(); - return false; - } - // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_LFDUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - if( hasSSE2Support == false ) - return false; - sint32 rA, frD, rB; - PPC_OPC_TEMPL_X(opcode, frD, rA, rB); - if( rA == 0 ) - { - debugBreakpoint(); - return false; - } - // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // add rB to rA (if rA != 0) - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegister1, gprRegister2); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister1, 0, PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_STFS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - if( hasSSE1Support == false ) - return false; - sint32 rA, frD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, imm, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_STFSU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - if( hasSSE1Support == false ) - return false; - sint32 rA, frD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, 0, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_STFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - if( hasSSE2Support == false ) - return false; sint32 rA, frS, rB; PPC_OPC_TEMPL_X(opcode, frS, rA, rB); if( rA == 0 ) @@ -374,141 +144,53 @@ bool PPCRecompilerImlGen_STFSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opco return false; } // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); + IMLReg gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + IMLReg gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + if (hasUpdate) + { + ppcImlGenContext->emitInst().make_r_r_r(PPCREC_IML_OP_ADD, gprRegister1, gprRegister1, gprRegister2); + } + DefinePS0(fpPs0, frS); + auto mode = isDouble ? PPCREC_FPR_ST_MODE_DOUBLE : PPCREC_FPR_ST_MODE_SINGLE; if( ppcImlGenContext->LSQE ) { - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); + if (hasUpdate) + ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister1, 0, mode, true); + else + ppcImlGenContext->emitInst().make_fpr_memory_r_indexed(fpPs0, gprRegister1, gprRegister2, 0, mode, true); } else { - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); - } - return true; -} - - -bool PPCRecompilerImlGen_STFSUX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - if( hasSSE2Support == false ) - return false; - sint32 rA, frS, rB; - PPC_OPC_TEMPL_X(opcode, frS, rA, rB); - if( rA == 0 ) - { - debugBreakpoint(); - return false; - } - // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); - // calculate EA in rA - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, NULL, PPCREC_IML_OP_ADD, gprRegister1, gprRegister2); - - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister1, 0, PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_STFD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - if( hasSSE1Support == false ) - return false; - sint32 rA, frD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - if( rA == 0 ) - { - debugBreakpoint(); - return false; - } - // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, imm, PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_STFDU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - if( hasSSE1Support == false ) - return false; - sint32 rA, frD; - uint32 imm; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, imm); - if( rA == 0 ) - { - debugBreakpoint(); - return false; - } - // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, 0, PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, true); - return true; -} - -bool PPCRecompilerImlGen_STFDX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - if( hasSSE2Support == false ) - return false; - sint32 rA, frS, rB; - PPC_OPC_TEMPL_X(opcode, frS, rA, rB); - if( rA == 0 ) - { - debugBreakpoint(); - return false; - } - // get memory gpr registers - uint32 gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - uint32 gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); - if( ppcImlGenContext->LSQE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, true); - } - else - { - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0, true); + if (hasUpdate) + ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister1, 0, mode, true); + else + ppcImlGenContext->emitInst().make_fpr_memory_r_indexed(fpPs0, gprRegister1, gprRegister2, 0, mode, true); } return true; } bool PPCRecompilerImlGen_STFIWX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - if( hasSSE2Support == false ) - return false; sint32 rA, frS, rB; PPC_OPC_TEMPL_X(opcode, frS, rA, rB); - // get memory gpr registers - uint32 gprRegister1; - uint32 gprRegister2; + DefinePS0(fpPs0, frS); + IMLReg gprRegister1; + IMLReg gprRegister2; if( rA != 0 ) { - gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA, false); - gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); + gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + gprRegister2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); } else { // rA is not used - gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB, false); - gprRegister2 = 0; + gprRegister1 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rB); + gprRegister2 = IMLREG_INVALID; } - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frS); if( rA != 0 ) - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r_indexed(ppcImlGenContext, fprRegister, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_UI32_FROM_PS0, true); + ppcImlGenContext->emitInst().make_fpr_memory_r_indexed(fpPs0, gprRegister1, gprRegister2, 0, PPCREC_FPR_ST_MODE_UI32_FROM_PS0, true); else - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister1, 0, PPCREC_FPR_ST_MODE_UI32_FROM_PS0, true); + ppcImlGenContext->emitInst().make_fpr_memory_r(fpPs0, gprRegister1, 0, PPCREC_FPR_ST_MODE_UI32_FROM_PS0, true); return true; } @@ -517,13 +199,10 @@ bool PPCRecompilerImlGen_FADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); PPC_ASSERT(frC==0); - - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_BOTTOM, fprRegisterD, fprRegisterA, fprRegisterB); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_ADD, fprD, fprA, fprB); return true; } @@ -532,13 +211,10 @@ bool PPCRecompilerImlGen_FSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); PPC_ASSERT(frC==0); - - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // subtract bottom double of frB from bottom double of frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterD, fprRegisterA, fprRegisterB); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprD, fprA, fprB); return true; } @@ -553,15 +229,14 @@ bool PPCRecompilerImlGen_FMUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod frA = frC; frC = temp; } - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // multiply bottom double of frD with bottom double of frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterD, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC); return true; } @@ -570,27 +245,25 @@ bool PPCRecompilerImlGen_FDIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB, frC_unused; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC_unused); PPC_ASSERT(frB==0); - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); if( frB == frD && frA != frB ) { - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + DefineTempFPR(fprTemp, 0); // move frA to temporary register - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterTemp, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp, fprA); // divide bottom double of temporary register by bottom double of frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprRegisterTemp, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprTemp, fprB); // move result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprTemp); return true; } // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterA); // copy ps0 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // copy ps0 // divide bottom double of frD by bottom double of frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprD, fprB); return true; } @@ -598,38 +271,37 @@ bool PPCRecompilerImlGen_FMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); // if frB is already in frD we need a temporary register to store the product of frA*frC if( frB == frD ) { - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + DefineTempFPR(fprTemp, 0); // move frA to temporary register - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterTemp, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp, fprA); // multiply bottom double of temporary register with bottom double of frC - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp, fprC); // add result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprD, fprTemp); return true; } // if frC == frD -> swap registers, we assume that frC != frD - if( fprRegisterD == fprRegisterC ) + if( frD == frC ) { // swap frA and frC - sint32 temp = fprRegisterA; - fprRegisterA = fprRegisterC; - fprRegisterC = temp; + IMLReg temp = fprA; + fprA = fprC; + fprC = temp; } // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // always copy ps0 and ps1 // multiply bottom double of frD with bottom double of frC - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterD, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC); // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprD, fprB); return true; } @@ -637,32 +309,34 @@ bool PPCRecompilerImlGen_FMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opco { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // if frB is already in frD we need a temporary register to store the product of frA*frC + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); if( frB == frD ) { - // not implemented + // if frB is already in frD we need a temporary register to store the product of frA*frC + DefineTempFPR(fprTemp, 0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp, fprA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp, fprB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprTemp); return false; } - // if frC == frD -> swap registers, we assume that frC != frD - if( fprRegisterD == fprRegisterC ) + if( frD == frC ) { // swap frA and frC - sint32 temp = fprRegisterA; - fprRegisterA = fprRegisterC; - fprRegisterC = temp; + IMLReg temp = fprA; + fprA = fprC; + fprC = temp; } - // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + // move frA to frD + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // multiply bottom double of frD with bottom double of frC - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterD, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC); // sub frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprD, fprB); return true; } @@ -670,51 +344,52 @@ bool PPCRecompilerImlGen_FNMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); // if frB is already in frD we need a temporary register to store the product of frA*frC if( frB == frD ) { - // hCPU->fpr[frD].fpr = -(hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr - hCPU->fpr[frD].fpr); - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - //// negate frB/frD - //PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprRegisterD, true); + DefineTempFPR(fprTemp, 0); // move frA to temporary register - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterTemp, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp, fprA); // multiply bottom double of temporary register with bottom double of frC - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp, fprC); // sub frB from temporary register - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterTemp, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp, fprB); // negate result - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprTemp); // move result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprTemp); return true; } // if frC == frD -> swap registers, we assume that frC != frD - if( fprRegisterD == fprRegisterC ) + if( frD == frC ) { // swap frA and frC - sint32 temp = fprRegisterA; - fprRegisterA = fprRegisterC; - fprRegisterC = temp; + IMLReg temp = fprA; + fprA = fprC; + fprC = temp; } // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // multiply bottom double of frD with bottom double of frC - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterD, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC); // sub frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprD, fprB); // negate result - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprRegisterD); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprD); return true; } +#define PSE_CopyResultToPs1() if( ppcImlGenContext->PSE ) \ + { \ + DefinePS1(fprDPS1, frD); \ + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDPS1, fprD); \ + } + bool PPCRecompilerImlGen_FMULS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { sint32 frD, frA, frB_unused, frC; @@ -727,24 +402,18 @@ bool PPCRecompilerImlGen_FMULS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco frA = frC; frC = temp; } - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 - + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // multiply bottom double of frD with bottom double of frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterD, fprRegisterC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprD, fprC); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); // if paired single mode, copy frD ps0 to ps1 - if( ppcImlGenContext->PSE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); - } - + PSE_CopyResultToPs1(); return true; } @@ -753,44 +422,31 @@ bool PPCRecompilerImlGen_FDIVS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 frD, frA, frB, frC_unused; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC_unused); PPC_ASSERT(frB==0); - /*hCPU->fpr[frD].fpr = (float)(hCPU->fpr[frA].fpr / hCPU->fpr[frB].fpr); - if( hCPU->PSE ) - hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0;*/ - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); if( frB == frD && frA != frB ) { - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); + DefineTempFPR(fprTemp, 0); // move frA to temporary register - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterTemp, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp, fprA); // divide bottom double of temporary register by bottom double of frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprRegisterTemp, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprTemp, fprB); // move result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprTemp); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - // if paired single mode, copy frD ps0 to ps1 - if( ppcImlGenContext->PSE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); - } + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); + PSE_CopyResultToPs1(); return true; } // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // subtract bottom double of frB from bottom double of frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_DIVIDE_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprD, fprB); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - // if paired single mode, copy frD ps0 to ps1 - if( ppcImlGenContext->PSE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); - } + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); + PSE_CopyResultToPs1(); return true; } @@ -806,22 +462,17 @@ bool PPCRecompilerImlGen_FADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco frA = frB; frB = temp; } - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); // move frA to frD (if different register) - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_ASSIGN, fprRegisterD, fprRegisterA); // always copy ps0 and ps1 + if( frD != frA ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprA); // add bottom double of frD and bottom double of frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_BOTTOM, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprD, fprB); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - // if paired single mode, copy frD ps0 to ps1 - if( ppcImlGenContext->PSE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); - } + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); + PSE_CopyResultToPs1(); return true; } @@ -830,20 +481,12 @@ bool PPCRecompilerImlGen_FSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco int frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); PPC_ASSERT(frB==0); - - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // subtract bottom - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterD, fprRegisterA, fprRegisterB); - // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - // if paired single mode, copy frD ps0 to ps1 - if( ppcImlGenContext->PSE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); - } + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprD, fprA, fprB); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); + PSE_CopyResultToPs1(); return true; } @@ -851,34 +494,26 @@ bool PPCRecompilerImlGen_FMADDS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - //FPRD(RD) = FPRD(RA) * FPRD(RC) + FPRD(RB); - //hCPU->fpr[frD].fpr = hCPU->fpr[frA].fpr * hCPU->fpr[frC].fpr + hCPU->fpr[frB].fpr; - //if( hCPU->PSE ) - // hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0; - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - uint32 fprRegisterTemp; + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); // if none of the operand registers overlap with the result register then we can avoid the usage of a temporary register - if( fprRegisterD != fprRegisterA && fprRegisterD != fprRegisterB && fprRegisterD != fprRegisterC ) - fprRegisterTemp = fprRegisterD; + IMLReg fprRegisterTemp; + if( frD != frA && frD != frB && frD != frC ) + fprRegisterTemp = fprD; else - fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprRegisterA, fprRegisterC); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_BOTTOM, fprRegisterTemp, fprRegisterB); + fprRegisterTemp = _GetFPRTemp(ppcImlGenContext, 0); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprRegisterTemp, fprA, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprRegisterTemp, fprB); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprRegisterTemp); // set result - if( ppcImlGenContext->PSE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterTemp); - } - else if( fprRegisterD != fprRegisterTemp ) + if( fprD != fprRegisterTemp ) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprRegisterTemp); } + PSE_CopyResultToPs1(); return true; } @@ -886,33 +521,27 @@ bool PPCRecompilerImlGen_FMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - //hCPU->fpr[frD].fp0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 - hCPU->fpr[frB].fp0); - //if( hCPU->PSE ) - // hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0; - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - uint32 fprRegisterTemp; + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); + + IMLReg fprRegisterTemp; // if none of the operand registers overlap with the result register then we can avoid the usage of a temporary register - if( fprRegisterD != fprRegisterA && fprRegisterD != fprRegisterB && fprRegisterD != fprRegisterC ) - fprRegisterTemp = fprRegisterD; + if( frD != frA && frD != frB && frD != frC ) + fprRegisterTemp = fprD; else - fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprRegisterA, fprRegisterC); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterTemp, fprRegisterB); + fprRegisterTemp = _GetFPRTemp(ppcImlGenContext, 0); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprRegisterTemp, fprA, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprRegisterTemp, fprB); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprRegisterTemp); // set result - if( ppcImlGenContext->PSE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterTemp); - } - else if( fprRegisterD != fprRegisterTemp ) + if( fprD != fprRegisterTemp ) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprRegisterTemp); } + PSE_CopyResultToPs1(); return true; } @@ -920,53 +549,32 @@ bool PPCRecompilerImlGen_FNMSUBS(ppcImlGenContext_t* ppcImlGenContext, uint32 op { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - - //[FP1(RD) = ]FP0(RD) = -(FP0(RA) * FP0(RC) - FP0(RB)); - //hCPU->fpr[frD].fp0 = (float)-(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 - hCPU->fpr[frB].fp0); - //if( PPC_PSE ) - // hCPU->fpr[frD].fp1 = hCPU->fpr[frD].fp0; - - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - uint32 fprRegisterTemp; + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); + IMLReg fprRegisterTemp; // if none of the operand registers overlap with the result register then we can avoid the usage of a temporary register - if( fprRegisterD != fprRegisterA && fprRegisterD != fprRegisterB && fprRegisterD != fprRegisterC ) - fprRegisterTemp = fprRegisterD; + if( frD != frA && frD != frB && frD != frC ) + fprRegisterTemp = fprD; else - fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM, fprRegisterTemp, fprRegisterA, fprRegisterC); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_BOTTOM, fprRegisterTemp, fprRegisterB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprRegisterTemp); + fprRegisterTemp = _GetFPRTemp(ppcImlGenContext, 0); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprRegisterTemp, fprA, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprRegisterTemp, fprB); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprRegisterTemp); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterTemp); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprRegisterTemp); // set result - if( ppcImlGenContext->PSE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterTemp); - } - else if( fprRegisterD != fprRegisterTemp ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterTemp); - } + if( fprD != fprRegisterTemp ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprRegisterTemp); + PSE_CopyResultToPs1(); return true; } bool PPCRecompilerImlGen_FCMPO(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - sint32 crfD, frA, frB; - PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); - crfD >>= 2; - if( hasSSE2Support == false ) - { - return false; - } - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD); - return true; + // Not implemented + return false; } bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -974,13 +582,21 @@ bool PPCRecompilerImlGen_FCMPU(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 crfD, frA, frB; PPC_OPC_TEMPL_X(opcode, crfD, frA, frB); crfD >>= 2; - if( hasSSE2Support == false ) - { - return false; - } - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPU_BOTTOM, fprRegisterA, fprRegisterB, crfD); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + + IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegLT, IMLCondition::UNORDERED_LT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegGT, IMLCondition::UNORDERED_GT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegSO, IMLCondition::UNORDERED_U); + + // todo: set fpscr + return true; } @@ -988,9 +604,9 @@ bool PPCRecompilerImlGen_FMR(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode { sint32 frD, rA, frB; PPC_OPC_TEMPL_X(opcode, frD, rA, frB); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); return true; } @@ -999,14 +615,11 @@ bool PPCRecompilerImlGen_FABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); - // load registers - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // move frB to frD (if different register) - if( fprRegisterD != fprRegisterB ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); - // abs frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_ABS_BOTTOM, fprRegisterD); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + if( frD != frB ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ABS, fprD); return true; } @@ -1015,14 +628,11 @@ bool PPCRecompilerImlGen_FNABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); - // load registers - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // move frB to frD (if different register) - if( fprRegisterD != fprRegisterB ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); - // abs frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM, fprRegisterD); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + if( frD != frB ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATIVE_ABS, fprD); return true; } @@ -1031,12 +641,12 @@ bool PPCRecompilerImlGen_FRES(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); - // load registers - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterB); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprB, IMLREG_INVALID, IMLREG_INVALID, fprD); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); + PSE_CopyResultToPs1(); return true; } @@ -1045,17 +655,12 @@ bool PPCRecompilerImlGen_FRSP(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - if( fprRegisterD != fprRegisterB ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); - } - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprRegisterD); - if( ppcImlGenContext->PSE ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterD); - } + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + if( fprD != fprB ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM, fprD); + PSE_CopyResultToPs1(); return true; } @@ -1065,17 +670,12 @@ bool PPCRecompilerImlGen_FNEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod PPC_OPC_TEMPL_X(opcode, frD, frA, frB); PPC_ASSERT(frA==0); if( opcode&PPC_OPC_RC ) - { return false; - } - // load registers - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // move frB to frD (if different register) - if( fprRegisterD != fprRegisterB ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); - // negate frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_NEGATE_BOTTOM, fprRegisterD); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + if( frD != frB ) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprD, fprB); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprD); return true; } @@ -1087,11 +687,11 @@ bool PPCRecompilerImlGen_FSEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opcod { return false; } - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SELECT_BOTTOM, fprRegisterD, fprRegisterA, fprRegisterB, fprRegisterC); + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + DefinePS0(fprC, frC); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r_r_r(PPCREC_IML_OP_FPR_SELECT, fprD, fprA, fprB, fprC); return true; } @@ -1099,12 +699,11 @@ bool PPCRecompilerImlGen_FRSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 op { sint32 frD, frA, frB, frC; PPC_OPC_TEMPL_A(opcode, frD, frA, frB, frC); - // hCPU->fpr[frD].fpr = 1.0 / sqrt(hCPU->fpr[frB].fpr); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT, fprRegisterD, fprRegisterB); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprB, IMLREG_INVALID, IMLREG_INVALID, fprD); // adjust accuracy - PPRecompilerImmGen_optionalRoundBottomFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprD); return true; } @@ -1112,69 +711,242 @@ bool PPCRecompilerImlGen_FCTIWZ(ppcImlGenContext_t* ppcImlGenContext, uint32 opc { sint32 frD, frA, frB; PPC_OPC_TEMPL_X(opcode, frD, frA, frB); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ, fprRegisterD, fprRegisterB); + DefinePS0(fprB, frB); + DefinePS0(fprD, frD); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FCTIWZ, fprD, fprB); return true; } -bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, uint32& gqrValue); + +void PPCRecompilerImlGen_ClampInteger(ppcImlGenContext_t* ppcImlGenContext, IMLReg reg, sint32 clampMin, sint32 clampMax) +{ + IMLReg regTmpCondBool = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 1); + // min(reg, clampMax) + ppcImlGenContext->emitInst().make_compare_s32(reg, clampMax, regTmpCondBool, IMLCondition::SIGNED_GT); + ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, false); // condition needs to be inverted because we skip if the condition is true + PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, + [&](ppcImlGenContext_t& genCtx) + { + /* branch not taken */ + genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, reg, clampMax); + } + ); + // max(reg, clampMin) + ppcImlGenContext->emitInst().make_compare_s32(reg, clampMin, regTmpCondBool, IMLCondition::SIGNED_LT); + ppcImlGenContext->emitInst().make_conditional_jump(regTmpCondBool, false); + PPCIMLGen_CreateSegmentBranchedPath(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, + [&](ppcImlGenContext_t& genCtx) + { + /* branch not taken */ + genCtx.emitInst().make_r_s32(PPCREC_IML_OP_ASSIGN, reg, clampMin); + } + ); +} + +void PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext_t* ppcImlGenContext, IMLReg gqrRegister, IMLReg fprRegScaleOut, bool isLoad) +{ + IMLReg gprTmp2 = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 2); + // extract scale factor and sign extend it + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_LEFT_SHIFT, gprTmp2, gqrRegister, 32 - ((isLoad ? 24 : 8)+7)); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_S, gprTmp2, gprTmp2, (32-23)-7); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, gprTmp2, gprTmp2, 0x1FF<<23); + if (isLoad) + ppcImlGenContext->emitInst().make_r_r(PPCREC_IML_OP_NEG, gprTmp2, gprTmp2); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprTmp2, gprTmp2, 0x7F<<23); + // gprTmp2 now holds the scale float bits, bitcast to float + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_BITCAST_INT_TO_FLOAT, fprRegScaleOut, gprTmp2); +} + +void PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, Espresso::PSQ_LOAD_TYPE loadType, bool readPS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1) +{ + if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_F32) + { + ppcImlGenContext->emitInst().make_fpr_r_memory(fprDPS0, gprA, imm, PPCREC_FPR_LD_MODE_SINGLE, true); + if(readPS1) + { + ppcImlGenContext->emitInst().make_fpr_r_memory(fprDPS1, gprA, imm + 4, PPCREC_FPR_LD_MODE_SINGLE, true); + } + } + if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_U16 || loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S16) + { + // get scale factor + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg fprScaleReg = _GetFPRTemp(ppcImlGenContext, 2); + PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext, gqrRegister, fprScaleReg, true); + + bool isSigned = (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S16); + IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm, 16, isSigned, true); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS0, gprTmp); + + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDPS0, fprDPS0, fprScaleReg); + + if(readPS1) + { + ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm + 2, 16, isSigned, true); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS1, gprTmp); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDPS1, fprDPS1, fprScaleReg); + } + } + else if (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_U8 || loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S8) + { + // get scale factor + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg fprScaleReg = _GetFPRTemp(ppcImlGenContext, 2); + PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext, gqrRegister, fprScaleReg, true); + + bool isSigned = (loadType == Espresso::PSQ_LOAD_TYPE::TYPE_S8); + IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm, 8, isSigned, true); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS0, gprTmp); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDPS0, fprDPS0, fprScaleReg); + if(readPS1) + { + ppcImlGenContext->emitInst().make_r_memory(gprTmp, gprA, imm + 1, 8, isSigned, true); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_INT_TO_FLOAT, fprDPS1, gprTmp); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDPS1, fprDPS1, fprScaleReg); + } + } +} + +// PSQ_L and PSQ_LU +bool PPCRecompilerImlGen_PSQ_L(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate) { - if (hasSSE2Support == false) - return false; int rA, frD; uint32 immUnused; PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, immUnused); - sint32 gqrIndex = ((opcode >> 12) & 7); uint32 imm = opcode & 0xFFF; if (imm & 0x800) imm |= ~0xFFF; - bool readPS1 = (opcode & 0x8000) == false; - // get gqr register - uint32 gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex, false); - // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // psq load - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, imm, readPS1 ? PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0, true, gqrRegister); + IMLReg gprA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + DefinePS0(fprDPS0, frD); + DefinePS1(fprDPS1, frD); + if (!readPS1) + { + // if PS1 is not explicitly read then set it to 1.0 + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_LOAD_ONE, fprDPS1); + } + if (withUpdate) + { + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprA, gprA, (sint32)imm); + imm = 0; + } + uint32 knownGQRValue = 0; + if ( !PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, knownGQRValue) ) + { + // generate complex dynamic handler when we dont know the GQR value ahead of time + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg loadTypeReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + // extract the load type from the GQR register + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_RIGHT_SHIFT_U, loadTypeReg, gqrRegister, 16); + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, loadTypeReg, 0x7); + IMLSegment* caseSegment[6]; + sint32 compareValues[6] = {0, 4, 5, 6, 7}; + PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5, 0); + for (sint32 i=0; i<5; i++) + { + IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i] + PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, gqrIndex, static_cast(compareValues[i]), readPS1, gprA, imm, fprDPS0, fprDPS1); + // create the case jump instructions here because we need to add it last + caseSegment[i]->AppendInstruction()->make_jump(); + } + return true; + } + + Espresso::PSQ_LOAD_TYPE type = static_cast((knownGQRValue >> 0) & 0x7); + sint32 scale = (knownGQRValue >> 8) & 0x3F; + cemu_assert_debug(scale == 0); // known GQR values always use a scale of 0 (1.0f) + if (scale != 0) + return false; + + if (type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED1 || + type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED2 || + type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED3) + { + return false; + } + + PPCRecompilerImlGen_EmitPSQLoadCase(ppcImlGenContext, gqrIndex, type, readPS1, gprA, imm, fprDPS0, fprDPS1); return true; } -bool PPCRecompilerImlGen_PSQ_LU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +void PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, Espresso::PSQ_LOAD_TYPE storeType, bool storePS1, IMLReg gprA, sint32 imm, IMLReg fprDPS0, IMLReg fprDPS1) { - if (hasSSE2Support == false) - return false; - int rA, frD; - uint32 immUnused; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, immUnused); - if (rA == 0) - return false; + cemu_assert_debug(!storePS1 || fprDPS1.IsValid()); + if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_F32) + { + ppcImlGenContext->emitInst().make_fpr_memory_r(fprDPS0, gprA, imm, PPCREC_FPR_ST_MODE_SINGLE, true); + if(storePS1) + { + ppcImlGenContext->emitInst().make_fpr_memory_r(fprDPS1, gprA, imm + 4, PPCREC_FPR_ST_MODE_SINGLE, true); + } + } + else if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_U16 || storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S16) + { + // get scale factor + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg fprScaleReg = _GetFPRTemp(ppcImlGenContext, 2); + PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext, gqrRegister, fprScaleReg, false); - sint32 gqrIndex = ((opcode >> 12) & 7); - uint32 imm = opcode & 0xFFF; - if (imm & 0x800) - imm |= ~0xFFF; + bool isSigned = (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S16); + IMLReg fprTmp = _GetFPRTemp(ppcImlGenContext, 0); - bool readPS1 = (opcode & 0x8000) == false; - - // get gqr register - uint32 gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex, false); - // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // paired load - PPCRecompilerImlGen_generateNewInstruction_fpr_r_memory(ppcImlGenContext, fprRegister, gprRegister, 0, readPS1 ? PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0, true, gqrRegister); - return true; + IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp, fprDPS0, fprScaleReg); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprTmp); + + if (isSigned) + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -32768, 32767); + else + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 65535); + ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm, 16, true); + if(storePS1) + { + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp, fprDPS1, fprScaleReg); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprTmp); + if (isSigned) + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -32768, 32767); + else + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 65535); + ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm + 2, 16, true); + } + } + else if (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_U8 || storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S8) + { + // get scale factor + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg fprScaleReg = _GetFPRTemp(ppcImlGenContext, 2); + PPCRecompilerIMLGen_GetPSQScale(ppcImlGenContext, gqrRegister, fprScaleReg, false); + + bool isSigned = (storeType == Espresso::PSQ_LOAD_TYPE::TYPE_S8); + IMLReg fprTmp = _GetFPRTemp(ppcImlGenContext, 0); + IMLReg gprTmp = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp, fprDPS0, fprScaleReg); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprTmp); + if (isSigned) + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -128, 127); + else + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 255); + ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm, 8, true); + if(storePS1) + { + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp, fprDPS1, fprScaleReg); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_FLOAT_TO_INT, gprTmp, fprTmp); + if (isSigned) + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, -128, 127); + else + PPCRecompilerImlGen_ClampInteger(ppcImlGenContext, gprTmp, 0, 255); + ppcImlGenContext->emitInst().make_memory_r(gprTmp, gprA, imm + 1, 8, true); + } + } } -bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +// PSQ_ST and PSQ_STU +bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withUpdate) { int rA, frD; uint32 immUnused; @@ -1183,181 +955,133 @@ bool PPCRecompilerImlGen_PSQ_ST(ppcImlGenContext_t* ppcImlGenContext, uint32 opc if (imm & 0x800) imm |= ~0xFFF; sint32 gqrIndex = ((opcode >> 12) & 7); - bool storePS1 = (opcode & 0x8000) == false; - // get gqr register - uint32 gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex, false); - // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // paired store - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, imm, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0, true, gqrRegister); - return true; -} + IMLReg gprA = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0+rA); + DefinePS0(fprDPS0, frD); + IMLReg fprDPS1 = storePS1 ? _GetFPRRegPS1(ppcImlGenContext, frD) : IMLREG_INVALID; -bool PPCRecompilerImlGen_PSQ_STU(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - int rA, frD; - uint32 immUnused; - PPC_OPC_TEMPL_D_SImm(opcode, frD, rA, immUnused); - if (rA == 0) + if (withUpdate) + { + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_ADD, gprA, gprA, (sint32)imm); + imm = 0; + } + + uint32 gqrValue = 0; + if ( !PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, gqrValue) ) + { + // generate complex dynamic handler when we dont know the GQR value ahead of time + IMLReg gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex); + IMLReg loadTypeReg = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY + 0); + // extract the load type from the GQR register + ppcImlGenContext->emitInst().make_r_r_s32(PPCREC_IML_OP_AND, loadTypeReg, gqrRegister, 0x7); + + IMLSegment* caseSegment[5]; + sint32 compareValues[5] = {0, 4, 5, 6, 7}; + PPCIMLGen_CreateSegmentBranchedPathMultiple(*ppcImlGenContext, *ppcImlGenContext->currentBasicBlock, caseSegment, loadTypeReg, compareValues, 5, 0); + for (sint32 i=0; i<5; i++) + { + IMLRedirectInstOutput outputToCase(ppcImlGenContext, caseSegment[i]); // while this is in scope, instructions go to caseSegment[i] + PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, gqrIndex, static_cast(compareValues[i]), storePS1, gprA, imm, fprDPS0, fprDPS1); + ppcImlGenContext->emitInst().make_jump(); // finalize case + } + return true; + } + + Espresso::PSQ_LOAD_TYPE type = static_cast((gqrValue >> 16) & 0x7); + sint32 scale = (gqrValue >> 24) & 0x3F; + cemu_assert_debug(scale == 0); // known GQR values always use a scale of 0 (1.0f) + + if (type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED1 || + type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED2 || + type == Espresso::PSQ_LOAD_TYPE::TYPE_UNUSED3) + { return false; + } - uint32 imm = opcode & 0xFFF; - if (imm & 0x800) - imm |= ~0xFFF; - sint32 gqrIndex = ((opcode >> 12) & 7); - - bool storePS1 = (opcode & 0x8000) == false; - - // get gqr register - uint32 gqrRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_SPR0 + SPR_UGQR0 + gqrIndex, false); - // get memory gpr register index - uint32 gprRegister = PPCRecompilerImlGen_loadRegister(ppcImlGenContext, PPCREC_NAME_R0 + rA, false); - // add imm to memory register - PPCRecompilerImlGen_generateNewInstruction_r_s32(ppcImlGenContext, PPCREC_IML_OP_ADD, gprRegister, (sint32)imm, 0, false, false, PPC_REC_INVALID_REGISTER, 0); - // get fpr register index - uint32 fprRegister = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // paired store - PPCRecompilerImlGen_generateNewInstruction_fpr_memory_r(ppcImlGenContext, fprRegister, gprRegister, 0, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0, true, gqrRegister); + PPCRecompilerImlGen_EmitPSQStoreCase(ppcImlGenContext, gqrIndex, type, storePS1, gprA, imm, fprDPS0, fprDPS1); return true; } -bool PPCRecompilerImlGen_PS_MULS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +// PS_MULS0 and PS_MULS1 +bool PPCRecompilerImlGen_PS_MULSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1) { sint32 frD, frA, frC; frC = (opcode>>6)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterTemp, fprRegisterC); - // if frD == frA we can multiply frD immediately and safe a copy instruction - if( frD == frA ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - } - else - { - // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); - } - // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePSX(fprC, frC, isVariant1); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + DefineTempFPR(fprTmp0, 0); + DefineTempFPR(fprTmp1, 1); + + // todo - optimize cases where a temporary is not necessary + // todo - round fprC to 25bit accuracy + + // copy ps0 and ps1 to temporary + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTmp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTmp1, fprAps1); + + // multiply + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp0, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp1, fprC); + + // copy back to result + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTmp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTmp1); + + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); + return true; } -bool PPCRecompilerImlGen_PS_MULS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 frD, frA, frC; - frC = (opcode>>6)&0x1F; - frA = (opcode>>16)&0x1F; - frD = (opcode>>21)&0x1F; - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterTemp, fprRegisterC); - // if frD == frA we can multiply frD immediately and safe a copy instruction - if( frD == frA ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - } - else - { - // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); - } - // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - return true; -} - -bool PPCRecompilerImlGen_PS_MADDS0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +// PS_MADDS0 and PS_MADDS1 +bool PPCRecompilerImlGen_PS_MADDSX(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool isVariant1) { sint32 frD, frA, frB, frC; frC = (opcode>>6)&0x1F; frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - //float s0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 + hCPU->fpr[frB].fp0); - //float s1 = (float)(hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp0 + hCPU->fpr[frB].fp1); - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterTemp, fprRegisterC); - // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction - if( frD == frA && frD != frB ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); - } - else - { - // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterTemp, fprRegisterB); - // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); - } - // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - return true; -} -bool PPCRecompilerImlGen_PS_MADDS1(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 frD, frA, frB, frC; - frC = (opcode>>6)&0x1F; - frB = (opcode>>11)&0x1F; - frA = (opcode>>16)&0x1F; - frD = (opcode>>21)&0x1F; - //float s0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp1 + hCPU->fpr[frB].fp0); - //float s1 = (float)(hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp1 + hCPU->fpr[frB].fp1); - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp1 in bottom and top half - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterTemp, fprRegisterC); - // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction - if( frD == frA && frD != frB ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); - } - else - { - // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterTemp, fprRegisterB); - // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); - } - // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePSX(fprC, frC, isVariant1); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + DefineTempFPR(fprTmp0, 0); + DefineTempFPR(fprTmp1, 1); + + // todo - round C to 25bit + // todo - optimize cases where a temporary is not necessary + + // copy ps0 and ps1 to temporary + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTmp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTmp1, fprAps1); + + // multiply + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp0, fprC); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTmp1, fprC); + + // add + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTmp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTmp1, fprBps1); + + // copy back to result + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTmp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTmp1); + + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1369,25 +1093,34 @@ bool PPCRecompilerImlGen_PS_ADD(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frD = (opcode>>21)&0x1F; //hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp0; //hCPU->fpr[frD].fp1 = hCPU->fpr[frA].fp1 + hCPU->fpr[frB].fp1; - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + if( frD == frA ) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); } else if( frD == frB ) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprAps1); } else { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterA); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); } // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1399,13 +1132,20 @@ bool PPCRecompilerImlGen_PS_SUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frD = (opcode>>21)&0x1F; //hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0 - hCPU->fpr[frB].fp0; //hCPU->fpr[frD].fp1 = hCPU->fpr[frA].fp1 - hCPU->fpr[frB].fp1; - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_PAIR, fprRegisterD, fprRegisterA, fprRegisterB); + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprDps0, fprAps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r_r(PPCREC_IML_OP_FPR_SUB, fprDps1, fprAps1, fprBps1); + // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1415,28 +1155,37 @@ bool PPCRecompilerImlGen_PS_MUL(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frC = (opcode >> 6) & 0x1F; frA = (opcode >> 16) & 0x1F; frD = (opcode >> 21) & 0x1F; - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // we need a temporary register - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0 + 0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); - // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA we can multiply frD immediately and safe a copy instruction + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprCps0, frC); + DefinePS1(fprCps1, frC); + + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + + // todo: Optimize for when a temporary isnt necessary + // todo: Round to 25bit? + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); if (frD == frA) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); } else { - // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); } // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1448,28 +1197,35 @@ bool PPCRecompilerImlGen_PS_DIV(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frD = (opcode >> 21) & 0x1F; //hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0 / hCPU->fpr[frB].fp0; //hCPU->fpr[frD].fp1 = hCPU->fpr[frA].fp1 / hCPU->fpr[frB].fp1; - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frD); - // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA we can divide frD immediately and safe a copy instruction + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + if (frD == frA) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_DIVIDE_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprDps1, fprBps1); } else { - // we need a temporary register - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0 + 0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterA); + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprAps1); // we divide temporary by frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_DIVIDE_PAIR, fprRegisterTemp, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprTemp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_DIVIDE, fprTemp1, fprBps1); // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); } // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1483,33 +1239,61 @@ bool PPCRecompilerImlGen_PS_MADD(ppcImlGenContext_t* ppcImlGenContext, uint32 op //float s0 = (float)(hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 + hCPU->fpr[frB].fp0); //float s1 = (float)(hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp1 + hCPU->fpr[frB].fp1); - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); - // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA and frD != frB we can multiply frD immediately and save a copy instruction - if( frD == frA && frD != frB ) + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprCps0, frC); + DefinePS1(fprCps1, frC); + + if (frD != frA && frD != frB) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); + if (frD == frC) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprCps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprCps1, fprAps1); + } + else + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprCps1); + } + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); } else { - // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterTemp, fprRegisterB); - // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); + if( frD == frA && frD != frB ) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); + } + else + { + // we multiply temporary by frA + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); + // add frB + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp1, fprBps1); + // copy result to frD + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); + } } // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1521,81 +1305,54 @@ bool PPCRecompilerImlGen_PS_NMADD(ppcImlGenContext_t* ppcImlGenContext, uint32 o frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprCps0, frC); + DefinePS1(fprCps1, frC); + + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction + // if frD == frA and frD != frB we can multiply frD immediately and save a copy instruction if( frD == frA && frD != frB ) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); } else { // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); // add frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ADD_PAIR, fprRegisterTemp, fprRegisterB); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprTemp1, fprBps1); // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); } + // negate - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_NEGATE_PAIR, fprRegisterD, fprRegisterD); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps0); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps1); // adjust accuracy //PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); // Splatoon requires that we emulate flush-to-denormals for this instruction - //PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, NULL,PPCREC_IML_OP_FPR_ROUND_FLDN_TO_SINGLE_PRECISION_PAIR, fprRegisterD, false); + //ppcImlGenContext->emitInst().make_fpr_r(NULL,PPCREC_IML_OP_FPR_ROUND_FLDN_TO_SINGLE_PRECISION_PAIR, fprRegisterD, false); return true; } -bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) -{ - sint32 frD, frA, frB, frC; - frC = (opcode>>6)&0x1F; - frB = (opcode>>11)&0x1F; - frA = (opcode>>16)&0x1F; - frD = (opcode>>21)&0x1F; - //hCPU->fpr[frD].fp0 = (hCPU->fpr[frA].fp0 * hCPU->fpr[frC].fp0 - hCPU->fpr[frB].fp0); - //hCPU->fpr[frD].fp1 = (hCPU->fpr[frA].fp1 * hCPU->fpr[frC].fp1 - hCPU->fpr[frB].fp1); - - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); - // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction - if( frD == frA && frD != frB ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // sub frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_PAIR, fprRegisterD, fprRegisterB); - } - else - { - // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // sub frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_PAIR, fprRegisterTemp, fprRegisterB); - // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); - } - // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); - return true; -} - -bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) +// PS_MSUB and PS_NMSUB +bool PPCRecompilerImlGen_PS_MSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode, bool withNegative) { sint32 frD, frA, frB, frC; frC = (opcode>>6)&0x1F; @@ -1603,35 +1360,64 @@ bool PPCRecompilerImlGen_PS_NMSUB(ppcImlGenContext_t* ppcImlGenContext, uint32 o frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // we need a temporary register to store frC.fp0 in low and high half - uint32 fprRegisterTemp = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_TEMPORARY_FPR0+0); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterTemp, fprRegisterC); - // todo-optimize: This instruction can be optimized so that it doesn't always use a temporary register - // if frD == frA and frD != frB we can multiply frD immediately and safe a copy instruction - if( frD == frA && frD != frB ) + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprCps0, frC); + DefinePS1(fprCps1, frC); + + if (frD != frA && frD != frB) { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterD, fprRegisterTemp); - // sub frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_PAIR, fprRegisterD, fprRegisterB); + if (frD == frC) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprCps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprCps1, fprAps1); + } + else + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprCps1); + } + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps1, fprBps1); } else { - // we multiply temporary by frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_MULTIPLY_PAIR, fprRegisterTemp, fprRegisterA); - // sub frB - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUB_PAIR, fprRegisterTemp, fprRegisterB); - // copy result to frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterTemp); + DefineTempFPR(fprTemp0, 0); + DefineTempFPR(fprTemp1, 1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprTemp1, fprCps1); + if( frD == frA && frD != frB ) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprDps1, fprTemp1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprDps1, fprBps1); + } + else + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_MULTIPLY, fprTemp1, fprAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_SUB, fprTemp1, fprBps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprTemp0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprTemp1); + } } // negate result - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_NEGATE_PAIR, fprRegisterD, fprRegisterD); + if (withNegative) + { + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps0); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps1); + } // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1642,18 +1428,27 @@ bool PPCRecompilerImlGen_PS_SUM0(ppcImlGenContext_t* ppcImlGenContext, uint32 op frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - //float s0 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1); - //float s1 = (float)hCPU->fpr[frC].fp1; - //hCPU->fpr[frD].fp0 = s0; - //hCPU->fpr[frD].fp1 = s1; - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUM0, fprRegisterD, fprRegisterA, fprRegisterB, fprRegisterC); + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprBps1, frB); + DefinePS1(fprCps1, frC); + + if( frD == frA ) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps1); + } + else + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps0, fprBps1); + } + if (fprDps1 != fprCps1) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprCps1); // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1664,18 +1459,26 @@ bool PPCRecompilerImlGen_PS_SUM1(ppcImlGenContext_t* ppcImlGenContext, uint32 op frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - //float s0 = (float)hCPU->fpr[frC].fp0; - //float s1 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1); - //hCPU->fpr[frD].fp0 = s0; - //hCPU->fpr[frD].fp1 = s1; - // load registers - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SUM1, fprRegisterD, fprRegisterA, fprRegisterB, fprRegisterC); + + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + DefinePS0(fprAps0, frA); + DefinePS1(fprBps1, frB); + DefinePS0(fprCps0, frC); + + if (frB != frD) + { + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprBps1); + } + else + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ADD, fprDps1, fprAps0); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprCps0); + // adjust accuracy - PPRecompilerImmGen_optionalRoundPairFPRToSinglePrecision(ppcImlGenContext, fprRegisterD); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1684,12 +1487,20 @@ bool PPCRecompilerImlGen_PS_NEG(ppcImlGenContext_t* ppcImlGenContext, uint32 opc sint32 frD, frB; frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; - //hCPU->fpr[frD].fp0 = -hCPU->fpr[frB].fp0; - //hCPU->fpr[frD].fp1 = -hCPU->fpr[frB].fp1; - // load registers - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_NEGATE_PAIR, fprRegisterD, fprRegisterB); + + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + if (frB != frD) + { + // copy + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprBps1); + } + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps0); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_NEGATE, fprDps1); return true; } @@ -1698,10 +1509,17 @@ bool PPCRecompilerImlGen_PS_ABS(ppcImlGenContext_t* ppcImlGenContext, uint32 opc sint32 frD, frB; frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; - // load registers - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_ABS_PAIR, fprRegisterD, fprRegisterB); + + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprBps1); + + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ABS, fprDps0); + ppcImlGenContext->emitInst().make_fpr_r(PPCREC_IML_OP_FPR_ABS, fprDps1); return true; } @@ -1713,11 +1531,16 @@ bool PPCRecompilerImlGen_PS_RES(ppcImlGenContext_t* ppcImlGenContext, uint32 opc //hCPU->fpr[frD].fp0 = (float)(1.0f / (float)hCPU->fpr[frB].fp0); //hCPU->fpr[frD].fp1 = (float)(1.0f / (float)hCPU->fpr[frB].fp1); - // load registers - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FRES_PAIR, fprRegisterD, fprRegisterB); + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprBps0, IMLREG_INVALID, IMLREG_INVALID, fprDps0); + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)fres_espresso, fprBps1, IMLREG_INVALID, IMLREG_INVALID, fprDps1); + // adjust accuracy + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1726,13 +1549,17 @@ bool PPCRecompilerImlGen_PS_RSQRTE(ppcImlGenContext_t* ppcImlGenContext, uint32 sint32 frD, frB; frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; - //hCPU->fpr[frD].fp0 = (float)(1.0f / (float)sqrt(hCPU->fpr[frB].fp0)); - //hCPU->fpr[frD].fp1 = (float)(1.0f / (float)sqrt(hCPU->fpr[frB].fp1)); - // load registers - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FRSQRTE_PAIR, fprRegisterD, fprRegisterB); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprBps0, IMLREG_INVALID, IMLREG_INVALID, fprDps0); + ppcImlGenContext->emitInst().make_call_imm((uintptr_t)frsqrte_espresso, fprBps1, IMLREG_INVALID, IMLREG_INVALID, fprDps1); + // adjust accuracy + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps0); + PPRecompilerImmGen_roundToSinglePrecision(ppcImlGenContext, fprDps1); return true; } @@ -1741,14 +1568,15 @@ bool PPCRecompilerImlGen_PS_MR(ppcImlGenContext_t* ppcImlGenContext, uint32 opco sint32 frD, frB; frB = (opcode>>11)&0x1F; frD = (opcode>>21)&0x1F; - //hCPU->fpr[frD].fp0 = hCPU->fpr[frB].fp0; - //hCPU->fpr[frD].fp1 = hCPU->fpr[frB].fp1; - // load registers if( frB != frD ) { - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_PAIR, fprRegisterD, fprRegisterB); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps0, fprBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, fprDps1, fprBps1); } return true; } @@ -1761,11 +1589,17 @@ bool PPCRecompilerImlGen_PS_SEL(ppcImlGenContext_t* ppcImlGenContext, uint32 opc frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterC = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frC); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_SELECT_PAIR, fprRegisterD, fprRegisterA, fprRegisterB, fprRegisterC); + DefinePS0(fprAps0, frA); + DefinePS1(fprAps1, frA); + DefinePS0(fprBps0, frB); + DefinePS1(fprBps1, frB); + DefinePS0(fprCps0, frC); + DefinePS1(fprCps1, frC); + DefinePS0(fprDps0, frD); + DefinePS1(fprDps1, frD); + + ppcImlGenContext->emitInst().make_fpr_r_r_r_r(PPCREC_IML_OP_FPR_SELECT, fprDps0, fprAps0, fprBps0, fprCps0); + ppcImlGenContext->emitInst().make_fpr_r_r_r_r(PPCREC_IML_OP_FPR_SELECT, fprDps1, fprAps1, fprBps1, fprCps1); return true; } @@ -1775,26 +1609,13 @@ bool PPCRecompilerImlGen_PS_MERGE00(ppcImlGenContext_t* ppcImlGenContext, uint32 frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - //float s0 = (float)hCPU->fpr[frA].fp0; - //float s1 = (float)hCPU->fpr[frB].fp0; - //hCPU->fpr[frD].fp0 = s0; - //hCPU->fpr[frD].fp1 = s1; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - // unpcklpd - if( frA == frB ) - { - // simply duplicate bottom into bottom and top of destination register - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterA); - } - else - { - // copy bottom of frB to top first - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP, fprRegisterD, fprRegisterB); - // copy bottom of frA - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterA); - } + DefinePS0(frpAps0, frA); + DefinePS0(frpBps0, frB); + DefinePS0(frpDps0, frD); + DefinePS1(frpDps1, frD); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpBps0); + if (frpDps0 != frpAps0) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps0); return true; } @@ -1804,17 +1625,14 @@ bool PPCRecompilerImlGen_PS_MERGE01(ppcImlGenContext_t* ppcImlGenContext, uint32 frB = (opcode>>11)&0x1F; frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - // hCPU->fpr[frD].fp0 = hCPU->fpr[frA].fp0; - // hCPU->fpr[frD].fp1 = hCPU->fpr[frB].fp1; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - - if( fprRegisterD != fprRegisterB ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, fprRegisterD, fprRegisterB); - if( fprRegisterD != fprRegisterA ) - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterA); + DefinePS0(frpAps0, frA); + DefinePS1(frpBps1, frB); + DefinePS0(frpDps0, frD); + DefinePS1(frpDps1, frD); + if (frpDps0 != frpAps0) + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpBps1); return true; } @@ -1825,33 +1643,22 @@ bool PPCRecompilerImlGen_PS_MERGE10(ppcImlGenContext_t* ppcImlGenContext, uint32 frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - if( frA == frB ) + DefinePS1(frpAps1, frA); + DefinePS0(frpBps0, frB); + DefinePS0(frpDps0, frD); + DefinePS1(frpDps1, frD); + + if (frD != frB) { - // swap bottom and top - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, fprRegisterD, fprRegisterA); - } - else if( frA == frD ) - { - // copy frB bottom to frD bottom - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); - // swap lower and upper half of frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, fprRegisterD, fprRegisterD); - } - else if( frB == frD ) - { - // copy upper half of frA to upper half of frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, fprRegisterD, fprRegisterA); - // swap lower and upper half of frD - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, fprRegisterD, fprRegisterD); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpBps0); } else { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterA); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM, fprRegisterD, fprRegisterB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED, fprRegisterD, fprRegisterD); + DefineTempFPR(frpTemp, 0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpTemp, frpBps0); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpTemp); } return true; } @@ -1863,42 +1670,20 @@ bool PPCRecompilerImlGen_PS_MERGE11(ppcImlGenContext_t* ppcImlGenContext, uint32 frA = (opcode>>16)&0x1F; frD = (opcode>>21)&0x1F; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - uint32 fprRegisterD = PPCRecompilerImlGen_loadOverwriteFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frD); - if( fprRegisterA == fprRegisterB ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterA); - } - else if( fprRegisterD != fprRegisterB ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP, fprRegisterD, fprRegisterA); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP, fprRegisterD, fprRegisterB); - } - else if( fprRegisterD == fprRegisterB ) - { - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM, fprRegisterD, fprRegisterA); - } - else - { - debugBreakpoint(); - return false; - } + DefinePS1(frpAps1, frA); + DefinePS1(frpBps1, frB); + DefinePS0(frpDps0, frD); + DefinePS1(frpDps1, frD); + + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps0, frpAps1); + ppcImlGenContext->emitInst().make_fpr_r_r(PPCREC_IML_OP_FPR_ASSIGN, frpDps1, frpBps1); return true; } bool PPCRecompilerImlGen_PS_CMPO0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) { - sint32 crfD, frA, frB; - uint32 c=0; - frB = (opcode>>11)&0x1F; - frA = (opcode>>16)&0x1F; - crfD = (opcode>>23)&0x7; - - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0+frB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPO_BOTTOM, fprRegisterA, fprRegisterB, crfD); - return true; + // Not implemented + return false; } bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 opcode) @@ -1907,9 +1692,21 @@ bool PPCRecompilerImlGen_PS_CMPU0(ppcImlGenContext_t* ppcImlGenContext, uint32 o frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; crfD = (opcode >> 23) & 0x7; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPU_BOTTOM, fprRegisterA, fprRegisterB, crfD); + + DefinePS0(fprA, frA); + DefinePS0(fprB, frB); + + IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegLT, IMLCondition::UNORDERED_LT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegGT, IMLCondition::UNORDERED_GT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegSO, IMLCondition::UNORDERED_U); + + // todo: set fpscr return true; } @@ -1919,8 +1716,18 @@ bool PPCRecompilerImlGen_PS_CMPU1(ppcImlGenContext_t* ppcImlGenContext, uint32 o frB = (opcode >> 11) & 0x1F; frA = (opcode >> 16) & 0x1F; crfD = (opcode >> 23) & 0x7; - uint32 fprRegisterA = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frA); - uint32 fprRegisterB = PPCRecompilerImlGen_loadFPRRegister(ppcImlGenContext, PPCREC_NAME_FPR0 + frB); - PPCRecompilerImlGen_generateNewInstruction_fpr_r_r(ppcImlGenContext, PPCREC_IML_OP_FPR_FCMPU_TOP, fprRegisterA, fprRegisterB, crfD); + + DefinePS1(fprA, frA); + DefinePS1(fprB, frB); + + IMLReg crBitRegLT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_LT); + IMLReg crBitRegGT = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_GT); + IMLReg crBitRegEQ = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_EQ); + IMLReg crBitRegSO = _GetRegCR(ppcImlGenContext, crfD, Espresso::CR_BIT::CR_BIT_INDEX_SO); + + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegLT, IMLCondition::UNORDERED_LT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegGT, IMLCondition::UNORDERED_GT); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegEQ, IMLCondition::UNORDERED_EQ); + ppcImlGenContext->emitInst().make_fpr_compare(fprA, fprB, crBitRegSO, IMLCondition::UNORDERED_U); return true; -} \ No newline at end of file +} diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp deleted file mode 100644 index 1a15bd22..00000000 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlOptimizer.cpp +++ /dev/null @@ -1,2175 +0,0 @@ -#include "../Interpreter/PPCInterpreterInternal.h" -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" - -void PPCRecompiler_checkRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, PPCImlOptimizerUsedRegisters_t* registersUsed) -{ - registersUsed->readNamedReg1 = -1; - registersUsed->readNamedReg2 = -1; - registersUsed->readNamedReg3 = -1; - registersUsed->writtenNamedReg1 = -1; - registersUsed->readFPR1 = -1; - registersUsed->readFPR2 = -1; - registersUsed->readFPR3 = -1; - registersUsed->readFPR4 = -1; - registersUsed->writtenFPR1 = -1; - if( imlInstruction->type == PPCREC_IML_TYPE_R_NAME ) - { - registersUsed->writtenNamedReg1 = imlInstruction->op_r_name.registerIndex; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_NAME_R ) - { - registersUsed->readNamedReg1 = imlInstruction->op_r_name.registerIndex; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R ) - { - if (imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED || imlInstruction->operation == PPCREC_IML_OP_DCBZ) - { - // both operands are read only - registersUsed->readNamedReg1 = imlInstruction->op_r_r.registerResult; - registersUsed->readNamedReg2 = imlInstruction->op_r_r.registerA; - } - else if ( - imlInstruction->operation == PPCREC_IML_OP_OR || - imlInstruction->operation == PPCREC_IML_OP_AND || - imlInstruction->operation == PPCREC_IML_OP_XOR || - imlInstruction->operation == PPCREC_IML_OP_ADD || - imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY || - imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_ME || - imlInstruction->operation == PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY) - { - // result is read and written, operand is read - registersUsed->writtenNamedReg1 = imlInstruction->op_r_r.registerResult; - registersUsed->readNamedReg1 = imlInstruction->op_r_r.registerResult; - registersUsed->readNamedReg2 = imlInstruction->op_r_r.registerA; - } - else if ( - imlInstruction->operation == PPCREC_IML_OP_ASSIGN || - imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP || - imlInstruction->operation == PPCREC_IML_OP_CNTLZW || - imlInstruction->operation == PPCREC_IML_OP_NOT || - imlInstruction->operation == PPCREC_IML_OP_NEG || - imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32 || - imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32) - { - // result is written, operand is read - registersUsed->writtenNamedReg1 = imlInstruction->op_r_r.registerResult; - registersUsed->readNamedReg1 = imlInstruction->op_r_r.registerA; - } - else - cemu_assert_unimplemented(); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) - { - if (imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED || imlInstruction->operation == PPCREC_IML_OP_MTCRF) - { - // operand register is read only - registersUsed->readNamedReg1 = imlInstruction->op_r_immS32.registerIndex; - } - else if (imlInstruction->operation == PPCREC_IML_OP_ADD || - imlInstruction->operation == PPCREC_IML_OP_SUB || - imlInstruction->operation == PPCREC_IML_OP_AND || - imlInstruction->operation == PPCREC_IML_OP_OR || - imlInstruction->operation == PPCREC_IML_OP_XOR || - imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE) - { - // operand register is read and write - registersUsed->readNamedReg1 = imlInstruction->op_r_immS32.registerIndex; - registersUsed->writtenNamedReg1 = imlInstruction->op_r_immS32.registerIndex; - } - else - { - // operand register is write only - // todo - use explicit lists, avoid default cases - registersUsed->writtenNamedReg1 = imlInstruction->op_r_immS32.registerIndex; - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - { - // result is written, but also considered read (in case the condition fails) - registersUsed->readNamedReg1 = imlInstruction->op_conditional_r_s32.registerIndex; - registersUsed->writtenNamedReg1 = imlInstruction->op_conditional_r_s32.registerIndex; - } - else - cemu_assert_unimplemented(); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R_S32 ) - { - if( imlInstruction->operation == PPCREC_IML_OP_RLWIMI ) - { - // result and operand register are both read, result is written - registersUsed->writtenNamedReg1 = imlInstruction->op_r_r_s32.registerResult; - registersUsed->readNamedReg1 = imlInstruction->op_r_r_s32.registerResult; - registersUsed->readNamedReg2 = imlInstruction->op_r_r_s32.registerA; - } - else - { - // result is write only and operand is read only - registersUsed->writtenNamedReg1 = imlInstruction->op_r_r_s32.registerResult; - registersUsed->readNamedReg1 = imlInstruction->op_r_r_s32.registerA; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R_R ) - { - // in all cases result is written and other operands are read only - registersUsed->writtenNamedReg1 = imlInstruction->op_r_r_r.registerResult; - registersUsed->readNamedReg1 = imlInstruction->op_r_r_r.registerA; - registersUsed->readNamedReg2 = imlInstruction->op_r_r_r.registerB; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) - { - // no effect on registers - } - else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP ) - { - // no effect on registers - } - else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO ) - { - if( imlInstruction->operation == PPCREC_IML_MACRO_BL || imlInstruction->operation == PPCREC_IML_MACRO_B_FAR || imlInstruction->operation == PPCREC_IML_MACRO_BLR || imlInstruction->operation == PPCREC_IML_MACRO_BLRL || imlInstruction->operation == PPCREC_IML_MACRO_BCTR || imlInstruction->operation == PPCREC_IML_MACRO_BCTRL || imlInstruction->operation == PPCREC_IML_MACRO_LEAVE || imlInstruction->operation == PPCREC_IML_MACRO_DEBUGBREAK || imlInstruction->operation == PPCREC_IML_MACRO_COUNT_CYCLES || imlInstruction->operation == PPCREC_IML_MACRO_HLE || imlInstruction->operation == PPCREC_IML_MACRO_MFTB ) - { - // no effect on registers - } - else - cemu_assert_unimplemented(); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD) - { - registersUsed->writtenNamedReg1 = imlInstruction->op_storeLoad.registerData; - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg1 = imlInstruction->op_storeLoad.registerMem; - } - else if (imlInstruction->type == PPCREC_IML_TYPE_MEM2MEM) - { - registersUsed->readNamedReg1 = imlInstruction->op_mem2mem.src.registerMem; - registersUsed->readNamedReg2 = imlInstruction->op_mem2mem.dst.registerMem; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED ) - { - registersUsed->writtenNamedReg1 = imlInstruction->op_storeLoad.registerData; - if( imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER ) - registersUsed->readNamedReg1 = imlInstruction->op_storeLoad.registerMem; - if( imlInstruction->op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER ) - registersUsed->readNamedReg2 = imlInstruction->op_storeLoad.registerMem2; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_STORE ) - { - registersUsed->readNamedReg1 = imlInstruction->op_storeLoad.registerData; - if( imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER ) - registersUsed->readNamedReg2 = imlInstruction->op_storeLoad.registerMem; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED ) - { - registersUsed->readNamedReg1 = imlInstruction->op_storeLoad.registerData; - if( imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER ) - registersUsed->readNamedReg2 = imlInstruction->op_storeLoad.registerMem; - if( imlInstruction->op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER ) - registersUsed->readNamedReg3 = imlInstruction->op_storeLoad.registerMem2; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_CR ) - { - // only affects cr register - } - else if( imlInstruction->type == PPCREC_IML_TYPE_JUMPMARK ) - { - // no effect on registers - } - else if( imlInstruction->type == PPCREC_IML_TYPE_PPC_ENTER ) - { - // no op - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME ) - { - // fpr operation - registersUsed->writtenFPR1 = imlInstruction->op_r_name.registerIndex; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R ) - { - // fpr operation - registersUsed->readFPR1 = imlInstruction->op_r_name.registerIndex; - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD ) - { - // fpr load operation - registersUsed->writtenFPR1 = imlInstruction->op_storeLoad.registerData; - // address is in gpr register - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg1 = imlInstruction->op_storeLoad.registerMem; - // determine partially written result - switch (imlInstruction->op_storeLoad.mode) - { - case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0: - case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); - registersUsed->readNamedReg2 = imlInstruction->op_storeLoad.registerGQR; - break; - case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: - // PS1 remains the same - registersUsed->readFPR4 = imlInstruction->op_storeLoad.registerData; - break; - case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S16_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U16_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U8_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S8_PS0: - break; - default: - cemu_assert_unimplemented(); - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED ) - { - // fpr load operation - registersUsed->writtenFPR1 = imlInstruction->op_storeLoad.registerData; - // address is in gpr registers - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg1 = imlInstruction->op_storeLoad.registerMem; - if (imlInstruction->op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - registersUsed->readNamedReg2 = imlInstruction->op_storeLoad.registerMem2; - // determine partially written result - switch (imlInstruction->op_storeLoad.mode) - { - case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0: - case PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); - registersUsed->readNamedReg3 = imlInstruction->op_storeLoad.registerGQR; - break; - case PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0: - // PS1 remains the same - registersUsed->readFPR4 = imlInstruction->op_storeLoad.registerData; - break; - case PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S16_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U16_PS0: - case PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1: - case PPCREC_FPR_LD_MODE_PSQ_U8_PS0: - break; - default: - cemu_assert_unimplemented(); - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE ) - { - // fpr store operation - registersUsed->readFPR1 = imlInstruction->op_storeLoad.registerData; - if( imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER ) - registersUsed->readNamedReg1 = imlInstruction->op_storeLoad.registerMem; - // PSQ generic stores also access GQR - switch (imlInstruction->op_storeLoad.mode) - { - case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0: - case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); - registersUsed->readNamedReg2 = imlInstruction->op_storeLoad.registerGQR; - break; - default: - break; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED ) - { - // fpr store operation - registersUsed->readFPR1 = imlInstruction->op_storeLoad.registerData; - // address is in gpr registers - if( imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER ) - registersUsed->readNamedReg1 = imlInstruction->op_storeLoad.registerMem; - if( imlInstruction->op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER ) - registersUsed->readNamedReg2 = imlInstruction->op_storeLoad.registerMem2; - // PSQ generic stores also access GQR - switch (imlInstruction->op_storeLoad.mode) - { - case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0: - case PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1: - cemu_assert_debug(imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); - registersUsed->readNamedReg3 = imlInstruction->op_storeLoad.registerGQR; - break; - default: - break; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R ) - { - // fpr operation - if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP || - imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP || - imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED || - imlInstruction->operation == PPCREC_IML_OP_ASSIGN || - imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP || - imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_PAIR || - imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_PAIR || - imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR || - imlInstruction->operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR ) - { - // operand read, result written - registersUsed->readFPR1 = imlInstruction->op_fpr_r_r.registerOperand; - registersUsed->writtenFPR1 = imlInstruction->op_fpr_r_r.registerResult; - } - else if( - imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP || - imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP || - imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64 || - imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ || - imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT - ) - { - // operand read, result read and (partially) written - registersUsed->readFPR1 = imlInstruction->op_fpr_r_r.registerOperand; - registersUsed->readFPR4 = imlInstruction->op_fpr_r_r.registerResult; - registersUsed->writtenFPR1 = imlInstruction->op_fpr_r_r.registerResult; - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR || - imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR || - imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_PAIR || - imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR || - imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) - { - // operand read, result read and written - registersUsed->readFPR1 = imlInstruction->op_fpr_r_r.registerOperand; - registersUsed->readFPR2 = imlInstruction->op_fpr_r_r.registerResult; - registersUsed->writtenFPR1 = imlInstruction->op_fpr_r_r.registerResult; - - } - else if(imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_TOP || - imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPO_BOTTOM) - { - // operand read, result read - registersUsed->readFPR1 = imlInstruction->op_fpr_r_r.registerOperand; - registersUsed->readFPR2 = imlInstruction->op_fpr_r_r.registerResult; - } - else - cemu_assert_unimplemented(); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R ) - { - // fpr operation - registersUsed->readFPR1 = imlInstruction->op_fpr_r_r_r.registerOperandA; - registersUsed->readFPR2 = imlInstruction->op_fpr_r_r_r.registerOperandB; - registersUsed->writtenFPR1 = imlInstruction->op_fpr_r_r_r.registerResult; - // handle partially written result - switch (imlInstruction->operation) - { - case PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM: - case PPCREC_IML_OP_FPR_ADD_BOTTOM: - case PPCREC_IML_OP_FPR_SUB_BOTTOM: - registersUsed->readFPR4 = imlInstruction->op_fpr_r_r_r.registerResult; - break; - case PPCREC_IML_OP_FPR_SUB_PAIR: - break; - default: - cemu_assert_unimplemented(); - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R ) - { - // fpr operation - registersUsed->readFPR1 = imlInstruction->op_fpr_r_r_r_r.registerOperandA; - registersUsed->readFPR2 = imlInstruction->op_fpr_r_r_r_r.registerOperandB; - registersUsed->readFPR3 = imlInstruction->op_fpr_r_r_r_r.registerOperandC; - registersUsed->writtenFPR1 = imlInstruction->op_fpr_r_r_r_r.registerResult; - // handle partially written result - switch (imlInstruction->operation) - { - case PPCREC_IML_OP_FPR_SELECT_BOTTOM: - registersUsed->readFPR4 = imlInstruction->op_fpr_r_r_r_r.registerResult; - break; - case PPCREC_IML_OP_FPR_SUM0: - case PPCREC_IML_OP_FPR_SUM1: - case PPCREC_IML_OP_FPR_SELECT_PAIR: - break; - default: - cemu_assert_unimplemented(); - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R ) - { - // fpr operation - if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64 || - imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR ) - { - registersUsed->readFPR1 = imlInstruction->op_fpr_r.registerResult; - registersUsed->writtenFPR1 = imlInstruction->op_fpr_r.registerResult; - } - else - cemu_assert_unimplemented(); - } - else - { - cemu_assert_unimplemented(); - } -} - -#define replaceRegister(__x,__r,__n) (((__x)==(__r))?(__n):(__x)) - -sint32 replaceRegisterMultiple(sint32 reg, sint32 match[4], sint32 replaced[4]) -{ - for (sint32 i = 0; i < 4; i++) - { - if(match[i] < 0) - continue; - if (reg == match[i]) - { - return replaced[i]; - } - } - return reg; -} - -void PPCRecompiler_replaceGPRRegisterUsageMultiple(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]) -{ - if (imlInstruction->type == PPCREC_IML_TYPE_R_NAME) - { - imlInstruction->op_r_name.registerIndex = replaceRegisterMultiple(imlInstruction->op_r_name.registerIndex, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_NAME_R) - { - imlInstruction->op_r_name.registerIndex = replaceRegisterMultiple(imlInstruction->op_r_name.registerIndex, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_R) - { - imlInstruction->op_r_r.registerResult = replaceRegisterMultiple(imlInstruction->op_r_r.registerResult, gprRegisterSearched, gprRegisterReplaced); - imlInstruction->op_r_r.registerA = replaceRegisterMultiple(imlInstruction->op_r_r.registerA, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) - { - imlInstruction->op_r_immS32.registerIndex = replaceRegisterMultiple(imlInstruction->op_r_immS32.registerIndex, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - imlInstruction->op_conditional_r_s32.registerIndex = replaceRegisterMultiple(imlInstruction->op_conditional_r_s32.registerIndex, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32) - { - // in all cases result is written and other operand is read only - imlInstruction->op_r_r_s32.registerResult = replaceRegisterMultiple(imlInstruction->op_r_r_s32.registerResult, gprRegisterSearched, gprRegisterReplaced); - imlInstruction->op_r_r_s32.registerA = replaceRegisterMultiple(imlInstruction->op_r_r_s32.registerA, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R) - { - // in all cases result is written and other operands are read only - imlInstruction->op_r_r_r.registerResult = replaceRegisterMultiple(imlInstruction->op_r_r_r.registerResult, gprRegisterSearched, gprRegisterReplaced); - imlInstruction->op_r_r_r.registerA = replaceRegisterMultiple(imlInstruction->op_r_r_r.registerA, gprRegisterSearched, gprRegisterReplaced); - imlInstruction->op_r_r_r.registerB = replaceRegisterMultiple(imlInstruction->op_r_r_r.registerB, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) - { - // no effect on registers - } - else if (imlInstruction->type == PPCREC_IML_TYPE_NO_OP) - { - // no effect on registers - } - else if (imlInstruction->type == PPCREC_IML_TYPE_MACRO) - { - if (imlInstruction->operation == PPCREC_IML_MACRO_BL || imlInstruction->operation == PPCREC_IML_MACRO_B_FAR || imlInstruction->operation == PPCREC_IML_MACRO_BLR || imlInstruction->operation == PPCREC_IML_MACRO_BLRL || imlInstruction->operation == PPCREC_IML_MACRO_BCTR || imlInstruction->operation == PPCREC_IML_MACRO_BCTRL || imlInstruction->operation == PPCREC_IML_MACRO_LEAVE || imlInstruction->operation == PPCREC_IML_MACRO_DEBUGBREAK || imlInstruction->operation == PPCREC_IML_MACRO_HLE || imlInstruction->operation == PPCREC_IML_MACRO_MFTB || imlInstruction->operation == PPCREC_IML_MACRO_COUNT_CYCLES ) - { - // no effect on registers - } - else - { - cemu_assert_unimplemented(); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD) - { - imlInstruction->op_storeLoad.registerData = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerMem = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED) - { - imlInstruction->op_storeLoad.registerData = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - imlInstruction->op_storeLoad.registerMem = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); - if (imlInstruction->op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - imlInstruction->op_storeLoad.registerMem2 = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_STORE) - { - imlInstruction->op_storeLoad.registerData = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - imlInstruction->op_storeLoad.registerMem = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) - { - imlInstruction->op_storeLoad.registerData = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerData, gprRegisterSearched, gprRegisterReplaced); - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - imlInstruction->op_storeLoad.registerMem = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); - if (imlInstruction->op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - imlInstruction->op_storeLoad.registerMem2 = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CR) - { - // only affects cr register - } - else if (imlInstruction->type == PPCREC_IML_TYPE_JUMPMARK) - { - // no effect on registers - } - else if (imlInstruction->type == PPCREC_IML_TYPE_PPC_ENTER) - { - // no op - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME) - { - - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R) - { - - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD) - { - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerMem = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); - } - if (imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerGQR = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) - { - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerMem = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); - } - if (imlInstruction->op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerMem2 = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); - } - if (imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerGQR = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE) - { - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerMem = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); - } - if (imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerGQR = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) - { - if (imlInstruction->op_storeLoad.registerMem != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerMem = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem, gprRegisterSearched, gprRegisterReplaced); - } - if (imlInstruction->op_storeLoad.registerMem2 != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerMem2 = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerMem2, gprRegisterSearched, gprRegisterReplaced); - } - if (imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER) - { - imlInstruction->op_storeLoad.registerGQR = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerGQR, gprRegisterSearched, gprRegisterReplaced); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R) - { - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R) - { - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R) - { - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R) - { - } - else - { - cemu_assert_unimplemented(); - } -} - -void PPCRecompiler_replaceFPRRegisterUsageMultiple(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 fprRegisterSearched[4], sint32 fprRegisterReplaced[4]) -{ - if (imlInstruction->type == PPCREC_IML_TYPE_R_NAME) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_NAME_R) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_R) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_S32) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_R_R) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK) - { - // no effect on registers - } - else if (imlInstruction->type == PPCREC_IML_TYPE_NO_OP) - { - // no effect on registers - } - else if (imlInstruction->type == PPCREC_IML_TYPE_MACRO) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_MEM2MEM) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_STORE) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CR) - { - // only affects cr register - } - else if (imlInstruction->type == PPCREC_IML_TYPE_JUMPMARK) - { - // no effect on registers - } - else if (imlInstruction->type == PPCREC_IML_TYPE_PPC_ENTER) - { - // no op - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME) - { - imlInstruction->op_r_name.registerIndex = replaceRegisterMultiple(imlInstruction->op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R) - { - imlInstruction->op_r_name.registerIndex = replaceRegisterMultiple(imlInstruction->op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD) - { - imlInstruction->op_storeLoad.registerData = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) - { - imlInstruction->op_storeLoad.registerData = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE) - { - imlInstruction->op_storeLoad.registerData = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) - { - imlInstruction->op_storeLoad.registerData = replaceRegisterMultiple(imlInstruction->op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R) - { - imlInstruction->op_fpr_r_r.registerResult = replaceRegisterMultiple(imlInstruction->op_fpr_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r.registerOperand = replaceRegisterMultiple(imlInstruction->op_fpr_r_r.registerOperand, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R) - { - imlInstruction->op_fpr_r_r_r.registerResult = replaceRegisterMultiple(imlInstruction->op_fpr_r_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r.registerOperandA = replaceRegisterMultiple(imlInstruction->op_fpr_r_r_r.registerOperandA, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r.registerOperandB = replaceRegisterMultiple(imlInstruction->op_fpr_r_r_r.registerOperandB, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R) - { - imlInstruction->op_fpr_r_r_r_r.registerResult = replaceRegisterMultiple(imlInstruction->op_fpr_r_r_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r_r.registerOperandA = replaceRegisterMultiple(imlInstruction->op_fpr_r_r_r_r.registerOperandA, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r_r.registerOperandB = replaceRegisterMultiple(imlInstruction->op_fpr_r_r_r_r.registerOperandB, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r_r.registerOperandC = replaceRegisterMultiple(imlInstruction->op_fpr_r_r_r_r.registerOperandC, fprRegisterSearched, fprRegisterReplaced); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_R) - { - imlInstruction->op_fpr_r.registerResult = replaceRegisterMultiple(imlInstruction->op_fpr_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - } - else - { - cemu_assert_unimplemented(); - } -} - -void PPCRecompiler_replaceFPRRegisterUsage(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 fprRegisterSearched, sint32 fprRegisterReplaced) -{ - if( imlInstruction->type == PPCREC_IML_TYPE_R_NAME ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_NAME_R ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_S32 ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R_S32 ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R_R ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) - { - // no effect on registers - } - else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP ) - { - // no effect on registers - } - else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD ) - { - // not affected - } - else if (imlInstruction->type == PPCREC_IML_TYPE_MEM2MEM) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_STORE ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED ) - { - // not affected - } - else if( imlInstruction->type == PPCREC_IML_TYPE_CR ) - { - // only affects cr register - } - else if( imlInstruction->type == PPCREC_IML_TYPE_JUMPMARK ) - { - // no effect on registers - } - else if( imlInstruction->type == PPCREC_IML_TYPE_PPC_ENTER ) - { - // no op - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME ) - { - imlInstruction->op_r_name.registerIndex = replaceRegister(imlInstruction->op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R ) - { - imlInstruction->op_r_name.registerIndex = replaceRegister(imlInstruction->op_r_name.registerIndex, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD ) - { - imlInstruction->op_storeLoad.registerData = replaceRegister(imlInstruction->op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED ) - { - imlInstruction->op_storeLoad.registerData = replaceRegister(imlInstruction->op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE ) - { - imlInstruction->op_storeLoad.registerData = replaceRegister(imlInstruction->op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED ) - { - imlInstruction->op_storeLoad.registerData = replaceRegister(imlInstruction->op_storeLoad.registerData, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R ) - { - imlInstruction->op_fpr_r_r.registerResult = replaceRegister(imlInstruction->op_fpr_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r.registerOperand = replaceRegister(imlInstruction->op_fpr_r_r.registerOperand, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R ) - { - imlInstruction->op_fpr_r_r_r.registerResult = replaceRegister(imlInstruction->op_fpr_r_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r.registerOperandA = replaceRegister(imlInstruction->op_fpr_r_r_r.registerOperandA, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r.registerOperandB = replaceRegister(imlInstruction->op_fpr_r_r_r.registerOperandB, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R ) - { - imlInstruction->op_fpr_r_r_r_r.registerResult = replaceRegister(imlInstruction->op_fpr_r_r_r_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r_r.registerOperandA = replaceRegister(imlInstruction->op_fpr_r_r_r_r.registerOperandA, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r_r.registerOperandB = replaceRegister(imlInstruction->op_fpr_r_r_r_r.registerOperandB, fprRegisterSearched, fprRegisterReplaced); - imlInstruction->op_fpr_r_r_r_r.registerOperandC = replaceRegister(imlInstruction->op_fpr_r_r_r_r.registerOperandC, fprRegisterSearched, fprRegisterReplaced); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R ) - { - imlInstruction->op_fpr_r.registerResult = replaceRegister(imlInstruction->op_fpr_r.registerResult, fprRegisterSearched, fprRegisterReplaced); - } - else - { - cemu_assert_unimplemented(); - } -} - -typedef struct -{ - struct - { - sint32 instructionIndex; - sint32 registerPreviousName; - sint32 registerNewName; - sint32 index; // new index - sint32 previousIndex; // previous index (always out of range) - bool nameMustBeMaintained; // must be stored before replacement and loaded after replacement ends - }replacedRegisterEntry[PPC_X64_GPR_USABLE_REGISTERS]; - sint32 count; -}replacedRegisterTracker_t; - -bool PPCRecompiler_checkIfGPRRegisterIsAccessed(PPCImlOptimizerUsedRegisters_t* registersUsed, sint32 gprRegister) -{ - if( registersUsed->readNamedReg1 == gprRegister ) - return true; - if( registersUsed->readNamedReg2 == gprRegister ) - return true; - if( registersUsed->readNamedReg3 == gprRegister ) - return true; - if( registersUsed->writtenNamedReg1 == gprRegister ) - return true; - return false; -} - -/* - * Returns index of register to replace - * If no register needs to be replaced, -1 is returned - */ -sint32 PPCRecompiler_getNextRegisterToReplace(PPCImlOptimizerUsedRegisters_t* registersUsed) -{ - // get index of register to replace - sint32 gprToReplace = -1; - if( registersUsed->readNamedReg1 >= PPC_X64_GPR_USABLE_REGISTERS ) - gprToReplace = registersUsed->readNamedReg1; - else if( registersUsed->readNamedReg2 >= PPC_X64_GPR_USABLE_REGISTERS ) - gprToReplace = registersUsed->readNamedReg2; - else if( registersUsed->readNamedReg3 >= PPC_X64_GPR_USABLE_REGISTERS ) - gprToReplace = registersUsed->readNamedReg3; - else if( registersUsed->writtenNamedReg1 >= PPC_X64_GPR_USABLE_REGISTERS ) - gprToReplace = registersUsed->writtenNamedReg1; - // return - return gprToReplace; -} - -bool PPCRecompiler_findAvailableRegisterDepr(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 imlIndexStart, replacedRegisterTracker_t* replacedRegisterTracker, sint32* registerIndex, sint32* registerName, bool* isUsed) -{ - PPCImlOptimizerUsedRegisters_t registersUsed; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlSegment->imlList+imlIndexStart, ®istersUsed); - // mask all registers used by this instruction - uint32 instructionReservedRegisterMask = 0;//(1<<(PPC_X64_GPR_USABLE_REGISTERS+1))-1; - if( registersUsed.readNamedReg1 != -1 ) - instructionReservedRegisterMask |= (1<<(registersUsed.readNamedReg1)); - if( registersUsed.readNamedReg2 != -1 ) - instructionReservedRegisterMask |= (1<<(registersUsed.readNamedReg2)); - if( registersUsed.readNamedReg3 != -1 ) - instructionReservedRegisterMask |= (1<<(registersUsed.readNamedReg3)); - if( registersUsed.writtenNamedReg1 != -1 ) - instructionReservedRegisterMask |= (1<<(registersUsed.writtenNamedReg1)); - // mask all registers that are reserved for other replacements - uint32 replacementReservedRegisterMask = 0; - for(sint32 i=0; icount; i++) - { - replacementReservedRegisterMask |= (1<replacedRegisterEntry[i].index); - } - - // potential improvement: Scan ahead a few instructions and look for registers that are the least used (or ideally never used) - - // pick available register - const uint32 allRegisterMask = (1<<(PPC_X64_GPR_USABLE_REGISTERS+1))-1; // mask with set bit for every register - uint32 reservedRegisterMask = instructionReservedRegisterMask | replacementReservedRegisterMask; - cemu_assert(instructionReservedRegisterMask != allRegisterMask); // no usable register! (Need to store a register from the replacedRegisterTracker) - sint32 usedRegisterIndex = -1; - for(sint32 i=0; imappedRegister[i] != -1 ) - { - // register is reserved by segment -> In use - *isUsed = true; - *registerName = ppcImlGenContext->mappedRegister[i]; - } - else - { - *isUsed = false; - *registerName = -1; - } - *registerIndex = i; - return true; - } - } - return false; - -} - -bool PPCRecompiler_hasSuffixInstruction(PPCRecImlSegment_t* imlSegment) -{ - if( imlSegment->imlListCount == 0 ) - return false; - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+imlSegment->imlListCount-1; - if( imlInstruction->type == PPCREC_IML_TYPE_MACRO && (imlInstruction->operation == PPCREC_IML_MACRO_BLR || imlInstruction->operation == PPCREC_IML_MACRO_BCTR) || - imlInstruction->type == PPCREC_IML_TYPE_MACRO && imlInstruction->operation == PPCREC_IML_MACRO_BL || - imlInstruction->type == PPCREC_IML_TYPE_MACRO && imlInstruction->operation == PPCREC_IML_MACRO_B_FAR || - imlInstruction->type == PPCREC_IML_TYPE_MACRO && imlInstruction->operation == PPCREC_IML_MACRO_BLRL || - imlInstruction->type == PPCREC_IML_TYPE_MACRO && imlInstruction->operation == PPCREC_IML_MACRO_BCTRL || - imlInstruction->type == PPCREC_IML_TYPE_MACRO && imlInstruction->operation == PPCREC_IML_MACRO_LEAVE || - imlInstruction->type == PPCREC_IML_TYPE_MACRO && imlInstruction->operation == PPCREC_IML_MACRO_HLE || - imlInstruction->type == PPCREC_IML_TYPE_MACRO && imlInstruction->operation == PPCREC_IML_MACRO_MFTB || - imlInstruction->type == PPCREC_IML_TYPE_PPC_ENTER || - imlInstruction->type == PPCREC_IML_TYPE_CJUMP || - imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) - return true; - return false; -} - -void PPCRecompiler_storeReplacedRegister(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, replacedRegisterTracker_t* replacedRegisterTracker, sint32 registerTrackerIndex, sint32* imlIndex) -{ - // store register - sint32 imlIndexEdit = *imlIndex; - PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndexEdit, 1); - // name_unusedRegister = unusedRegister - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList+(imlIndexEdit+0); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); - imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R; - imlInstructionItr->crRegister = PPC_REC_INVALID_REGISTER; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].index; - imlInstructionItr->op_r_name.name = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].registerNewName; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; - imlIndexEdit++; - // load new register if required - if( replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].nameMustBeMaintained ) - { - PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndexEdit, 1); - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList+(imlIndexEdit+0); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); - imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME; - imlInstructionItr->crRegister = PPC_REC_INVALID_REGISTER; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].index; - imlInstructionItr->op_r_name.name = replacedRegisterTracker->replacedRegisterEntry[registerTrackerIndex].registerPreviousName;//ppcImlGenContext->mappedRegister[replacedRegisterTracker.replacedRegisterEntry[i].index]; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; - imlIndexEdit += 1; - } - // move last entry to current one - memcpy(replacedRegisterTracker->replacedRegisterEntry+registerTrackerIndex, replacedRegisterTracker->replacedRegisterEntry+replacedRegisterTracker->count-1, sizeof(replacedRegisterTracker->replacedRegisterEntry[0])); - replacedRegisterTracker->count--; - *imlIndex = imlIndexEdit; -} - -bool PPCRecompiler_reduceNumberOfFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) -{ - // only xmm0 to xmm14 may be used, xmm15 is reserved - // this method will reduce the number of fpr registers used - // inefficient algorithm for optimizing away excess registers - // we simply load, use and store excess registers into other unused registers when we need to - // first we remove all name load and store instructions that involve out-of-bounds registers - for(sint32 s=0; ssegmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - sint32 imlIndex = 0; - while( imlIndex < imlSegment->imlListCount ) - { - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList+imlIndex; - if( imlInstructionItr->type == PPCREC_IML_TYPE_FPR_R_NAME || imlInstructionItr->type == PPCREC_IML_TYPE_FPR_NAME_R ) - { - if( imlInstructionItr->op_r_name.registerIndex >= PPC_X64_FPR_USABLE_REGISTERS ) - { - // convert to NO-OP instruction - imlInstructionItr->type = PPCREC_IML_TYPE_NO_OP; - imlInstructionItr->associatedPPCAddress = 0; - } - } - imlIndex++; - } - } - // replace registers - for(sint32 s=0; ssegmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - sint32 imlIndex = 0; - while( imlIndex < imlSegment->imlListCount ) - { - PPCImlOptimizerUsedRegisters_t registersUsed; - while( true ) - { - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlSegment->imlList+imlIndex, ®istersUsed); - if( registersUsed.readFPR1 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR2 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR3 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.readFPR4 >= PPC_X64_FPR_USABLE_REGISTERS || registersUsed.writtenFPR1 >= PPC_X64_FPR_USABLE_REGISTERS ) - { - // get index of register to replace - sint32 fprToReplace = -1; - if( registersUsed.readFPR1 >= PPC_X64_FPR_USABLE_REGISTERS ) - fprToReplace = registersUsed.readFPR1; - else if( registersUsed.readFPR2 >= PPC_X64_FPR_USABLE_REGISTERS ) - fprToReplace = registersUsed.readFPR2; - else if (registersUsed.readFPR3 >= PPC_X64_FPR_USABLE_REGISTERS) - fprToReplace = registersUsed.readFPR3; - else if (registersUsed.readFPR4 >= PPC_X64_FPR_USABLE_REGISTERS) - fprToReplace = registersUsed.readFPR4; - else if( registersUsed.writtenFPR1 >= PPC_X64_FPR_USABLE_REGISTERS ) - fprToReplace = registersUsed.writtenFPR1; - // generate mask of useable registers - uint8 useableRegisterMask = 0x7F; // lowest bit is fpr register 0 - if( registersUsed.readFPR1 != -1 ) - useableRegisterMask &= ~(1<<(registersUsed.readFPR1)); - if( registersUsed.readFPR2 != -1 ) - useableRegisterMask &= ~(1<<(registersUsed.readFPR2)); - if (registersUsed.readFPR3 != -1) - useableRegisterMask &= ~(1 << (registersUsed.readFPR3)); - if (registersUsed.readFPR4 != -1) - useableRegisterMask &= ~(1 << (registersUsed.readFPR4)); - if( registersUsed.writtenFPR1 != -1 ) - useableRegisterMask &= ~(1<<(registersUsed.writtenFPR1)); - // get highest unused register index (0-6 range) - sint32 unusedRegisterIndex = -1; - for(sint32 f=0; fmappedFPRRegister[unusedRegisterIndex]; - bool replacedRegisterIsUsed = true; - if( unusedRegisterName >= PPCREC_NAME_FPR0 && unusedRegisterName < (PPCREC_NAME_FPR0+32) ) - { - replacedRegisterIsUsed = imlSegment->ppcFPRUsed[unusedRegisterName-PPCREC_NAME_FPR0]; - } - // replace registers that are out of range - PPCRecompiler_replaceFPRRegisterUsage(ppcImlGenContext, imlSegment->imlList+imlIndex, fprToReplace, unusedRegisterIndex); - // add load/store name after instruction - PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndex+1, 2); - // add load/store before current instruction - PPCRecompiler_pushBackIMLInstructions(imlSegment, imlIndex, 2); - // name_unusedRegister = unusedRegister - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList+(imlIndex+0); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); - if( replacedRegisterIsUsed ) - { - imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = unusedRegisterIndex; - imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; - } - else - imlInstructionItr->type = PPCREC_IML_TYPE_NO_OP; - imlInstructionItr = imlSegment->imlList+(imlIndex+1); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); - imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = unusedRegisterIndex; - imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; - // name_gprToReplace = unusedRegister - imlInstructionItr = imlSegment->imlList+(imlIndex+3); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); - imlInstructionItr->type = PPCREC_IML_TYPE_FPR_NAME_R; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = unusedRegisterIndex; - imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[fprToReplace]; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; - // unusedRegister = name_unusedRegister - imlInstructionItr = imlSegment->imlList+(imlIndex+4); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); - if( replacedRegisterIsUsed ) - { - imlInstructionItr->type = PPCREC_IML_TYPE_FPR_R_NAME; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = unusedRegisterIndex; - imlInstructionItr->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unusedRegisterIndex]; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; - } - else - imlInstructionItr->type = PPCREC_IML_TYPE_NO_OP; - } - else - break; - } - imlIndex++; - } - } - return true; -} - -typedef struct -{ - bool isActive; - uint32 virtualReg; - sint32 lastUseIndex; -}ppcRecRegisterMapping_t; - -typedef struct -{ - ppcRecRegisterMapping_t currentMapping[PPC_X64_FPR_USABLE_REGISTERS]; - sint32 ppcRegToMapping[64]; - sint32 currentUseIndex; -}ppcRecManageRegisters_t; - -ppcRecRegisterMapping_t* PPCRecompiler_findAvailableRegisterDepr(ppcRecManageRegisters_t* rCtx, PPCImlOptimizerUsedRegisters_t* instructionUsedRegisters) -{ - // find free register - for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) - { - if (rCtx->currentMapping[i].isActive == false) - { - rCtx->currentMapping[i].isActive = true; - rCtx->currentMapping[i].virtualReg = -1; - rCtx->currentMapping[i].lastUseIndex = rCtx->currentUseIndex; - return rCtx->currentMapping + i; - } - } - // all registers are used - return nullptr; -} - -ppcRecRegisterMapping_t* PPCRecompiler_findUnloadableRegister(ppcRecManageRegisters_t* rCtx, PPCImlOptimizerUsedRegisters_t* instructionUsedRegisters, uint32 unloadLockedMask) -{ - // find unloadable register (with lowest lastUseIndex) - sint32 unloadIndex = -1; - sint32 unloadIndexLastUse = 0x7FFFFFFF; - for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) - { - if (rCtx->currentMapping[i].isActive == false) - continue; - if( (unloadLockedMask&(1<currentMapping[i].virtualReg; - bool isReserved = false; - for (sint32 f = 0; f < 4; f++) - { - if (virtualReg == (sint32)instructionUsedRegisters->fpr[f]) - { - isReserved = true; - break; - } - } - if (isReserved) - continue; - if (rCtx->currentMapping[i].lastUseIndex < unloadIndexLastUse) - { - unloadIndexLastUse = rCtx->currentMapping[i].lastUseIndex; - unloadIndex = i; - } - } - cemu_assert(unloadIndex != -1); - return rCtx->currentMapping + unloadIndex; -} - -bool PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext_t* ppcImlGenContext, sint32 segmentIndex) -{ - ppcRecManageRegisters_t rCtx = { 0 }; - for (sint32 i = 0; i < 64; i++) - rCtx.ppcRegToMapping[i] = -1; - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[segmentIndex]; - sint32 idx = 0; - sint32 currentUseIndex = 0; - PPCImlOptimizerUsedRegisters_t registersUsed; - while (idx < imlSegment->imlListCount) - { - if ( PPCRecompiler_isSuffixInstruction(imlSegment->imlList + idx) ) - break; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlSegment->imlList + idx, ®istersUsed); - sint32 fprMatch[4]; - sint32 fprReplace[4]; - fprMatch[0] = -1; - fprMatch[1] = -1; - fprMatch[2] = -1; - fprMatch[3] = -1; - fprReplace[0] = -1; - fprReplace[1] = -1; - fprReplace[2] = -1; - fprReplace[3] = -1; - // generate a mask of registers that we may not free - sint32 numReplacedOperands = 0; - uint32 unloadLockedMask = 0; - for (sint32 f = 0; f < 5; f++) - { - sint32 virtualFpr; - if (f == 0) - virtualFpr = registersUsed.readFPR1; - else if (f == 1) - virtualFpr = registersUsed.readFPR2; - else if (f == 2) - virtualFpr = registersUsed.readFPR3; - else if (f == 3) - virtualFpr = registersUsed.readFPR4; - else if (f == 4) - virtualFpr = registersUsed.writtenFPR1; - if( virtualFpr < 0 ) - continue; - cemu_assert_debug(virtualFpr < 64); - // check if this virtual FPR is already loaded in any real register - ppcRecRegisterMapping_t* regMapping; - if (rCtx.ppcRegToMapping[virtualFpr] == -1) - { - // not loaded - // find available register - while (true) - { - regMapping = PPCRecompiler_findAvailableRegisterDepr(&rCtx, ®istersUsed); - if (regMapping == NULL) - { - // unload least recently used register and try again - ppcRecRegisterMapping_t* unloadRegMapping = PPCRecompiler_findUnloadableRegister(&rCtx, ®istersUsed, unloadLockedMask); - // mark as locked - unloadLockedMask |= (1<<(unloadRegMapping- rCtx.currentMapping)); - // create unload instruction - PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); - PPCRecImlInstruction_t* imlInstructionTemp = imlSegment->imlList + idx; - memset(imlInstructionTemp, 0x00, sizeof(PPCRecImlInstruction_t)); - imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; - imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionTemp->op_r_name.registerIndex = (uint8)(unloadRegMapping - rCtx.currentMapping); - imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[unloadRegMapping->virtualReg]; - imlInstructionTemp->op_r_name.copyWidth = 32; - imlInstructionTemp->op_r_name.flags = 0; - idx++; - // update mapping - unloadRegMapping->isActive = false; - rCtx.ppcRegToMapping[unloadRegMapping->virtualReg] = -1; - } - else - break; - } - // create load instruction - PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, 1); - PPCRecImlInstruction_t* imlInstructionTemp = imlSegment->imlList + idx; - memset(imlInstructionTemp, 0x00, sizeof(PPCRecImlInstruction_t)); - imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_R_NAME; - imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionTemp->op_r_name.registerIndex = (uint8)(regMapping-rCtx.currentMapping); - imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[virtualFpr]; - imlInstructionTemp->op_r_name.copyWidth = 32; - imlInstructionTemp->op_r_name.flags = 0; - idx++; - // update mapping - regMapping->virtualReg = virtualFpr; - rCtx.ppcRegToMapping[virtualFpr] = (sint32)(regMapping - rCtx.currentMapping); - regMapping->lastUseIndex = rCtx.currentUseIndex; - rCtx.currentUseIndex++; - } - else - { - regMapping = rCtx.currentMapping + rCtx.ppcRegToMapping[virtualFpr]; - regMapping->lastUseIndex = rCtx.currentUseIndex; - rCtx.currentUseIndex++; - } - // replace FPR - bool entryFound = false; - for (sint32 t = 0; t < numReplacedOperands; t++) - { - if (fprMatch[t] == virtualFpr) - { - cemu_assert_debug(fprReplace[t] == (regMapping - rCtx.currentMapping)); - entryFound = true; - break; - } - } - if (entryFound == false) - { - cemu_assert_debug(numReplacedOperands != 4); - fprMatch[numReplacedOperands] = virtualFpr; - fprReplace[numReplacedOperands] = (sint32)(regMapping - rCtx.currentMapping); - numReplacedOperands++; - } - } - if (numReplacedOperands > 0) - { - PPCRecompiler_replaceFPRRegisterUsageMultiple(ppcImlGenContext, imlSegment->imlList + idx, fprMatch, fprReplace); - } - // next - idx++; - } - // count loaded registers - sint32 numLoadedRegisters = 0; - for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) - { - if (rCtx.currentMapping[i].isActive) - numLoadedRegisters++; - } - // store all loaded registers - if (numLoadedRegisters > 0) - { - PPCRecompiler_pushBackIMLInstructions(imlSegment, idx, numLoadedRegisters); - for (sint32 i = 0; i < PPC_X64_FPR_USABLE_REGISTERS; i++) - { - if (rCtx.currentMapping[i].isActive == false) - continue; - PPCRecImlInstruction_t* imlInstructionTemp = imlSegment->imlList + idx; - memset(imlInstructionTemp, 0x00, sizeof(PPCRecImlInstruction_t)); - imlInstructionTemp->type = PPCREC_IML_TYPE_FPR_NAME_R; - imlInstructionTemp->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionTemp->op_r_name.registerIndex = i; - imlInstructionTemp->op_r_name.name = ppcImlGenContext->mappedFPRRegister[rCtx.currentMapping[i].virtualReg]; - imlInstructionTemp->op_r_name.copyWidth = 32; - imlInstructionTemp->op_r_name.flags = 0; - idx++; - } - } - return true; -} - -bool PPCRecompiler_manageFPRRegisters(ppcImlGenContext_t* ppcImlGenContext) -{ - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - if (PPCRecompiler_manageFPRRegistersForSegment(ppcImlGenContext, s) == false) - return false; - } - return true; -} - - -/* - * Returns true if the loaded value is guaranteed to be overwritten - */ -bool PPCRecompiler_trackRedundantNameLoadInstruction(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 startIndex, PPCRecImlInstruction_t* nameStoreInstruction, sint32 scanDepth) -{ - sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; - for(sint32 i=startIndex; iimlListCount; i++) - { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; - //nameStoreInstruction->op_r_name.registerIndex - PPCImlOptimizerUsedRegisters_t registersUsed; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlSegment->imlList+i, ®istersUsed); - if( registersUsed.readNamedReg1 == registerIndex || registersUsed.readNamedReg2 == registerIndex || registersUsed.readNamedReg3 == registerIndex ) - return false; - if( registersUsed.writtenNamedReg1 == registerIndex ) - return true; - } - // todo: Scan next segment(s) - return false; -} - -/* - * Returns true if the loaded value is guaranteed to be overwritten - */ -bool PPCRecompiler_trackRedundantFPRNameLoadInstruction(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 startIndex, PPCRecImlInstruction_t* nameStoreInstruction, sint32 scanDepth) -{ - sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; - for(sint32 i=startIndex; iimlListCount; i++) - { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; - PPCImlOptimizerUsedRegisters_t registersUsed; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlSegment->imlList+i, ®istersUsed); - if( registersUsed.readFPR1 == registerIndex || registersUsed.readFPR2 == registerIndex || registersUsed.readFPR3 == registerIndex || registersUsed.readFPR4 == registerIndex) - return false; - if( registersUsed.writtenFPR1 == registerIndex ) - return true; - } - // todo: Scan next segment(s) - return false; -} - -/* - * Returns true if the loaded name is never changed - */ -bool PPCRecompiler_trackRedundantNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 startIndex, PPCRecImlInstruction_t* nameStoreInstruction, sint32 scanDepth) -{ - sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; - for(sint32 i=startIndex; i>=0; i--) - { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; - PPCImlOptimizerUsedRegisters_t registersUsed; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlSegment->imlList+i, ®istersUsed); - if( registersUsed.writtenNamedReg1 == registerIndex ) - { - if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME ) - return true; - return false; - } - } - return false; -} - -sint32 debugCallCounter1 = 0; - -/* - * Returns true if the name is overwritten in the current or any following segments - */ -bool PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 startIndex, PPCRecImlInstruction_t* nameStoreInstruction, sint32 scanDepth) -{ - //sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; - uint32 name = nameStoreInstruction->op_r_name.name; - for(sint32 i=startIndex; iimlListCount; i++) - { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; - if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_R_NAME ) - { - // name is loaded before being written - if( imlSegment->imlList[i].op_r_name.name == name ) - return false; - } - else if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_NAME_R ) - { - // name is written before being loaded - if( imlSegment->imlList[i].op_r_name.name == name ) - return true; - } - } - if( scanDepth >= 2 ) - return false; - if( imlSegment->nextSegmentIsUncertain ) - return false; - if( imlSegment->nextSegmentBranchTaken && PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, 0, nameStoreInstruction, scanDepth+1) == false ) - return false; - if( imlSegment->nextSegmentBranchNotTaken && PPCRecompiler_trackOverwrittenNameStoreInstruction(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, 0, nameStoreInstruction, scanDepth+1) == false ) - return false; - if( imlSegment->nextSegmentBranchTaken == NULL && imlSegment->nextSegmentBranchNotTaken == NULL ) - return false; - - return true; -} - -/* - * Returns true if the loaded FPR name is never changed - */ -bool PPCRecompiler_trackRedundantFPRNameStoreInstruction(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 startIndex, PPCRecImlInstruction_t* nameStoreInstruction, sint32 scanDepth) -{ - sint16 registerIndex = nameStoreInstruction->op_r_name.registerIndex; - for(sint32 i=startIndex; i>=0; i--) - { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; - PPCImlOptimizerUsedRegisters_t registersUsed; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlSegment->imlList+i, ®istersUsed); - if( registersUsed.writtenFPR1 == registerIndex ) - { - if( imlSegment->imlList[i].type == PPCREC_IML_TYPE_FPR_R_NAME ) - return true; - return false; - } - } - // todo: Scan next segment(s) - return false; -} - -uint32 _PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, uint32 currentOverwriteMask, uint32 currentReadMask, uint32 scanDepth) -{ - // is any bit overwritten but not read? - uint32 overwriteMask = imlSegment->crBitsWritten&~imlSegment->crBitsInput; - currentOverwriteMask |= overwriteMask; - // next segment - if( imlSegment->nextSegmentIsUncertain == false && scanDepth < 3 ) - { - uint32 nextSegmentOverwriteMask = 0; - if( imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchNotTaken ) - { - uint32 mask0 = _PPCRecompiler_getCROverwriteMask(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, 0, 0, scanDepth+1); - uint32 mask1 = _PPCRecompiler_getCROverwriteMask(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, 0, 0, scanDepth+1); - nextSegmentOverwriteMask = mask0&mask1; - } - else if( imlSegment->nextSegmentBranchNotTaken) - { - nextSegmentOverwriteMask = _PPCRecompiler_getCROverwriteMask(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, 0, 0, scanDepth+1); - } - nextSegmentOverwriteMask &= ~imlSegment->crBitsRead; - currentOverwriteMask |= nextSegmentOverwriteMask; - } - else if (imlSegment->nextSegmentIsUncertain) - { - if (ppcImlGenContext->segmentListCount >= 5) - { - return 7; // for more complex functions we assume that CR is not passed on - } - } - return currentOverwriteMask; -} - -/* - * Returns a mask of all CR bits that are overwritten (written but not read) in the segment and all it's following segments - * If the write state of a CR bit cannot be determined, it is returned as 0 (not overwritten) - */ -uint32 PPCRecompiler_getCROverwriteMask(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) -{ - if (imlSegment->nextSegmentIsUncertain) - { - return 0; - } - if( imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchNotTaken ) - { - uint32 mask0 = _PPCRecompiler_getCROverwriteMask(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, 0, 0, 0); - uint32 mask1 = _PPCRecompiler_getCROverwriteMask(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, 0, 0, 0); - return mask0&mask1; // only return bits that are overwritten in both branches - } - else if( imlSegment->nextSegmentBranchNotTaken ) - { - uint32 mask = _PPCRecompiler_getCROverwriteMask(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, 0, 0, 0); - return mask; - } - else - { - // not implemented - } - return 0; -} - -void PPCRecompiler_removeRedundantCRUpdates(ppcImlGenContext_t* ppcImlGenContext) -{ - for(sint32 s=0; ssegmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - - for(sint32 i=0; iimlListCount; i++) - { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; - if (imlInstruction->type == PPCREC_IML_TYPE_CJUMP) - { - if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - { - uint32 crBitFlag = 1 << (imlInstruction->op_conditionalJump.crRegisterIndex * 4 + imlInstruction->op_conditionalJump.crBitIndex); - imlSegment->crBitsInput |= (crBitFlag&~imlSegment->crBitsWritten); // flag bits that have not already been written - imlSegment->crBitsRead |= (crBitFlag); - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - uint32 crBitFlag = 1 << (imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex); - imlSegment->crBitsInput |= (crBitFlag&~imlSegment->crBitsWritten); // flag bits that have not already been written - imlSegment->crBitsRead |= (crBitFlag); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MFCR) - { - imlSegment->crBitsRead |= 0xFFFFFFFF; - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && imlInstruction->operation == PPCREC_IML_OP_MTCRF) - { - imlSegment->crBitsWritten |= ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_CR ) - { - if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR || - imlInstruction->operation == PPCREC_IML_OP_CR_SET) - { - uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); - imlSegment->crBitsWritten |= (crBitFlag & ~imlSegment->crBitsWritten); - } - else if (imlInstruction->operation == PPCREC_IML_OP_CR_OR || - imlInstruction->operation == PPCREC_IML_OP_CR_ORC || - imlInstruction->operation == PPCREC_IML_OP_CR_AND || - imlInstruction->operation == PPCREC_IML_OP_CR_ANDC) - { - uint32 crBitFlag = 1 << (imlInstruction->op_cr.crD); - imlSegment->crBitsWritten |= (crBitFlag & ~imlSegment->crBitsWritten); - crBitFlag = 1 << (imlInstruction->op_cr.crA); - imlSegment->crBitsRead |= (crBitFlag & ~imlSegment->crBitsRead); - crBitFlag = 1 << (imlInstruction->op_cr.crB); - imlSegment->crBitsRead |= (crBitFlag & ~imlSegment->crBitsRead); - } - else - cemu_assert_unimplemented(); - } - else if( PPCRecompilerImlAnalyzer_canTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7 ) - { - imlSegment->crBitsWritten |= (0xF<<(imlInstruction->crRegister*4)); - } - else if( (imlInstruction->type == PPCREC_IML_TYPE_STORE || imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED) && imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER ) - { - // overwrites CR0 - imlSegment->crBitsWritten |= (0xF<<0); - } - } - } - // flag instructions that write to CR where we can ignore individual CR bits - for(sint32 s=0; ssegmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - for(sint32 i=0; iimlListCount; i++) - { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; - if( PPCRecompilerImlAnalyzer_canTypeWriteCR(imlInstruction) && imlInstruction->crRegister >= 0 && imlInstruction->crRegister <= 7 ) - { - uint32 crBitFlags = 0xF<<((uint32)imlInstruction->crRegister*4); - uint32 crOverwriteMask = PPCRecompiler_getCROverwriteMask(ppcImlGenContext, imlSegment); - uint32 crIgnoreMask = crOverwriteMask & ~imlSegment->crBitsRead; - imlInstruction->crIgnoreMask = crIgnoreMask; - } - } - } -} - -bool PPCRecompiler_checkIfGPRIsModifiedInRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 startIndex, sint32 endIndex, sint32 vreg) -{ - PPCImlOptimizerUsedRegisters_t registersUsed; - for (sint32 i = startIndex; i <= endIndex; i++) - { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i; - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); - if (registersUsed.writtenNamedReg1 == vreg) - return true; - } - return false; -} - -sint32 PPCRecompiler_scanBackwardsForReusableRegister(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* startSegment, sint32 startIndex, sint32 name) -{ - // current segment - sint32 currentIndex = startIndex; - PPCRecImlSegment_t* currentSegment = startSegment; - sint32 segmentIterateCount = 0; - sint32 foundRegister = -1; - while (true) - { - // stop scanning if segment is enterable - if (currentSegment->isEnterable) - return -1; - while (currentIndex >= 0) - { - if (currentSegment->imlList[currentIndex].type == PPCREC_IML_TYPE_NAME_R && currentSegment->imlList[currentIndex].op_r_name.name == name) - { - foundRegister = currentSegment->imlList[currentIndex].op_r_name.registerIndex; - break; - } - // previous instruction - currentIndex--; - } - if (foundRegister >= 0) - break; - // continue at previous segment (if there is only one) - if (segmentIterateCount >= 1) - return -1; - if (currentSegment->list_prevSegments.size() != 1) - return -1; - currentSegment = currentSegment->list_prevSegments[0]; - currentIndex = currentSegment->imlListCount - 1; - segmentIterateCount++; - } - // scan again to make sure the register is not modified inbetween - currentIndex = startIndex; - currentSegment = startSegment; - segmentIterateCount = 0; - PPCImlOptimizerUsedRegisters_t registersUsed; - while (true) - { - while (currentIndex >= 0) - { - // check if register is modified - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, currentSegment->imlList+currentIndex, ®istersUsed); - if (registersUsed.writtenNamedReg1 == foundRegister) - return -1; - // check if end of scan reached - if (currentSegment->imlList[currentIndex].type == PPCREC_IML_TYPE_NAME_R && currentSegment->imlList[currentIndex].op_r_name.name == name) - { - //foundRegister = currentSegment->imlList[currentIndex].op_r_name.registerIndex; - return foundRegister; - } - // previous instruction - currentIndex--; - } - // continue at previous segment (if there is only one) - if (segmentIterateCount >= 1) - return -1; - if (currentSegment->list_prevSegments.size() != 1) - return -1; - currentSegment = currentSegment->list_prevSegments[0]; - currentIndex = currentSegment->imlListCount - 1; - segmentIterateCount++; - } - return -1; -} - -void PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 imlIndexLoad, sint32 fprIndex) -{ - PPCRecImlInstruction_t* imlInstructionLoad = imlSegment->imlList + imlIndexLoad; - if (imlInstructionLoad->op_storeLoad.flags2.notExpanded) - return; - - PPCImlOptimizerUsedRegisters_t registersUsed; - sint32 scanRangeEnd = std::min(imlIndexLoad + 25, imlSegment->imlListCount); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances) - bool foundMatch = false; - sint32 lastStore = -1; - for (sint32 i = imlIndexLoad + 1; i < scanRangeEnd; i++) - { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i; - if (PPCRecompiler_isSuffixInstruction(imlInstruction)) - { - break; - } - - // check if FPR is stored - if ((imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0) || - (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0)) - { - if (imlInstruction->op_storeLoad.registerData == fprIndex) - { - if (foundMatch == false) - { - // flag the load-single instruction as "don't expand" (leave single value as-is) - imlInstructionLoad->op_storeLoad.flags2.notExpanded = true; - } - // also set the flag for the store instruction - PPCRecImlInstruction_t* imlInstructionStore = imlInstruction; - imlInstructionStore->op_storeLoad.flags2.notExpanded = true; - - foundMatch = true; - lastStore = i + 1; - - continue; - } - } - - // check if FPR is overwritten (we can actually ignore read operations?) - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); - if (registersUsed.writtenFPR1 == fprIndex) - break; - if (registersUsed.readFPR1 == fprIndex) - break; - if (registersUsed.readFPR2 == fprIndex) - break; - if (registersUsed.readFPR3 == fprIndex) - break; - if (registersUsed.readFPR4 == fprIndex) - break; - } - - if (foundMatch) - { - // insert expand instruction after store - PPCRecImlInstruction_t* newExpand = PPCRecompiler_insertInstruction(imlSegment, lastStore); - PPCRecompilerImlGen_generateNewInstruction_fpr_r(ppcImlGenContext, newExpand, PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64, fprIndex); - } -} - -/* -* Scans for patterns: -* -* -* -* For these patterns the store and load is modified to work with un-extended values (float remains as float, no double conversion) -* The float->double extension is then executed later -* Advantages: -* Keeps denormals and other special float values intact -* Slightly improves performance -*/ -void PPCRecompiler_optimizeDirectFloatCopies(ppcImlGenContext_t* ppcImlGenContext) -{ - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - - for (sint32 i = 0; i < imlSegment->imlListCount; i++) - { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i; - if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) - { - PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, imlSegment, i, imlInstruction->op_storeLoad.registerData); - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED && imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1) - { - PPCRecompiler_optimizeDirectFloatCopiesScanForward(ppcImlGenContext, imlSegment, i, imlInstruction->op_storeLoad.registerData); - } - } - } -} - -void PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 imlIndexLoad, sint32 gprIndex) -{ - PPCRecImlInstruction_t* imlInstructionLoad = imlSegment->imlList + imlIndexLoad; - if ( imlInstructionLoad->op_storeLoad.flags2.swapEndian == false ) - return; - bool foundMatch = false; - PPCImlOptimizerUsedRegisters_t registersUsed; - sint32 scanRangeEnd = std::min(imlIndexLoad + 25, imlSegment->imlListCount); // don't scan too far (saves performance and also the chances we can merge the load+store become low at high distances) - sint32 i = imlIndexLoad + 1; - for (; i < scanRangeEnd; i++) - { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i; - if (PPCRecompiler_isSuffixInstruction(imlInstruction)) - { - break; - } - // check if GPR is stored - if ((imlInstruction->type == PPCREC_IML_TYPE_STORE && imlInstruction->op_storeLoad.copyWidth == 32 ) ) - { - if (imlInstruction->op_storeLoad.registerMem == gprIndex) - break; - if (imlInstruction->op_storeLoad.registerData == gprIndex) - { - PPCRecImlInstruction_t* imlInstructionStore = imlInstruction; - if (foundMatch == false) - { - // switch the endian swap flag for the load instruction - imlInstructionLoad->op_storeLoad.flags2.swapEndian = !imlInstructionLoad->op_storeLoad.flags2.swapEndian; - foundMatch = true; - } - // switch the endian swap flag for the store instruction - imlInstructionStore->op_storeLoad.flags2.swapEndian = !imlInstructionStore->op_storeLoad.flags2.swapEndian; - // keep scanning - continue; - } - } - // check if GPR is accessed - PPCRecompiler_checkRegisterUsage(ppcImlGenContext, imlInstruction, ®istersUsed); - if (registersUsed.readNamedReg1 == gprIndex || - registersUsed.readNamedReg2 == gprIndex || - registersUsed.readNamedReg3 == gprIndex) - { - break; - } - if (registersUsed.writtenNamedReg1 == gprIndex) - return; // GPR overwritten, we don't need to byte swap anymore - } - if (foundMatch) - { - // insert expand instruction - PPCRecImlInstruction_t* newExpand = PPCRecompiler_insertInstruction(imlSegment, i); - PPCRecompilerImlGen_generateNewInstruction_r_r(ppcImlGenContext, newExpand, PPCREC_IML_OP_ENDIAN_SWAP, gprIndex, gprIndex); - } -} - -/* -* Scans for patterns: -* -* -* -* For these patterns the store and load is modified to work with non-swapped values -* The big_endian->little_endian conversion is then executed later -* Advantages: -* Slightly improves performance -*/ -void PPCRecompiler_optimizeDirectIntegerCopies(ppcImlGenContext_t* ppcImlGenContext) -{ - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - - for (sint32 i = 0; i < imlSegment->imlListCount; i++) - { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i; - if (imlInstruction->type == PPCREC_IML_TYPE_LOAD && imlInstruction->op_storeLoad.copyWidth == 32 && imlInstruction->op_storeLoad.flags2.swapEndian ) - { - PPCRecompiler_optimizeDirectIntegerCopiesScanForward(ppcImlGenContext, imlSegment, i, imlInstruction->op_storeLoad.registerData); - } - } - } -} - -sint32 _getGQRIndexFromRegister(ppcImlGenContext_t* ppcImlGenContext, sint32 registerIndex) -{ - if (registerIndex == PPC_REC_INVALID_REGISTER) - return -1; - sint32 namedReg = ppcImlGenContext->mappedRegister[registerIndex]; - if (namedReg >= (PPCREC_NAME_SPR0 + SPR_UGQR0) && namedReg <= (PPCREC_NAME_SPR0 + SPR_UGQR7)) - { - return namedReg - (PPCREC_NAME_SPR0 + SPR_UGQR0); - } - return -1; -} - -bool PPCRecompiler_isUGQRValueKnown(ppcImlGenContext_t* ppcImlGenContext, sint32 gqrIndex, uint32& gqrValue) -{ - // UGQR 2 to 7 are initialized by the OS and we assume that games won't ever permanently touch those - // todo - hack - replace with more accurate solution - if (gqrIndex == 2) - gqrValue = 0x00040004; - else if (gqrIndex == 3) - gqrValue = 0x00050005; - else if (gqrIndex == 4) - gqrValue = 0x00060006; - else if (gqrIndex == 5) - gqrValue = 0x00070007; - else - return false; - return true; -} - -/* - * If value of GQR can be predicted for a given PSQ load or store instruction then replace it with an optimized version - */ -void PPCRecompiler_optimizePSQLoadAndStore(ppcImlGenContext_t* ppcImlGenContext) -{ - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - for (sint32 i = 0; i < imlSegment->imlListCount; i++) - { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i; - if (imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD || imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED) - { - if(imlInstruction->op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0 && - imlInstruction->op_storeLoad.mode != PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 ) - continue; - // get GQR value - cemu_assert_debug(imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); - sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, imlInstruction->op_storeLoad.registerGQR); - cemu_assert(gqrIndex >= 0); - if (ppcImlGenContext->tracking.modifiesGQR[gqrIndex]) - continue; - //uint32 gqrValue = ppcInterpreterCurrentInstance->sprNew.UGQR[gqrIndex]; - uint32 gqrValue; - if (!PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, gqrValue)) - continue; - - uint32 formatType = (gqrValue >> 16) & 7; - uint32 scale = (gqrValue >> 24) & 0x3F; - if (scale != 0) - continue; // only generic handler supports scale - if (imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0) - { - if (formatType == 0) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0; - else if (formatType == 4) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U8_PS0; - else if (formatType == 5) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U16_PS0; - else if (formatType == 6) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0; - else if (formatType == 7) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0; - } - else if (imlInstruction->op_storeLoad.mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1) - { - if (formatType == 0) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1; - else if (formatType == 4) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1; - else if (formatType == 5) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1; - else if (formatType == 6) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1; - else if (formatType == 7) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1; - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE || imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED) - { - if(imlInstruction->op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0 && - imlInstruction->op_storeLoad.mode != PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) - continue; - // get GQR value - cemu_assert_debug(imlInstruction->op_storeLoad.registerGQR != PPC_REC_INVALID_REGISTER); - sint32 gqrIndex = _getGQRIndexFromRegister(ppcImlGenContext, imlInstruction->op_storeLoad.registerGQR); - cemu_assert(gqrIndex >= 0); - if (ppcImlGenContext->tracking.modifiesGQR[gqrIndex]) - continue; - uint32 gqrValue; - if(!PPCRecompiler_isUGQRValueKnown(ppcImlGenContext, gqrIndex, gqrValue)) - continue; - uint32 formatType = (gqrValue >> 16) & 7; - uint32 scale = (gqrValue >> 24) & 0x3F; - if (scale != 0) - continue; // only generic handler supports scale - if (imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0) - { - if (formatType == 0) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0; - else if (formatType == 4) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U8_PS0; - else if (formatType == 5) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U16_PS0; - else if (formatType == 6) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0; - else if (formatType == 7) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0; - } - else if (imlInstruction->op_storeLoad.mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1) - { - if (formatType == 0) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1; - else if (formatType == 4) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1; - else if (formatType == 5) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1; - else if (formatType == 6) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1; - else if (formatType == 7) - imlInstruction->op_storeLoad.mode = PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1; - } - } - } - } -} - -/* - * Returns true if registerWrite overwrites any of the registers read by registerRead - */ -bool PPCRecompilerAnalyzer_checkForGPROverwrite(PPCImlOptimizerUsedRegisters_t* registerRead, PPCImlOptimizerUsedRegisters_t* registerWrite) -{ - if (registerWrite->writtenNamedReg1 < 0) - return false; - - if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg1) - return true; - if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg2) - return true; - if (registerWrite->writtenNamedReg1 == registerRead->readNamedReg3) - return true; - return false; -} - -void _reorderConditionModifyInstructions(PPCRecImlSegment_t* imlSegment) -{ - PPCRecImlInstruction_t* lastInstruction = PPCRecompilerIML_getLastInstruction(imlSegment); - // last instruction a conditional branch? - if (lastInstruction == nullptr || lastInstruction->type != PPCREC_IML_TYPE_CJUMP) - return; - if (lastInstruction->op_conditionalJump.crRegisterIndex >= 8) - return; - // get CR bitmask of bit required for conditional jump - PPCRecCRTracking_t crTracking; - PPCRecompilerImlAnalyzer_getCRTracking(lastInstruction, &crTracking); - uint32 requiredCRBits = crTracking.readCRBits; - - // scan backwards until we find the instruction that sets the CR - sint32 crSetterInstructionIndex = -1; - sint32 unsafeInstructionIndex = -1; - for (sint32 i = imlSegment->imlListCount-2; i >= 0; i--) - { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList + i; - PPCRecompilerImlAnalyzer_getCRTracking(imlInstruction, &crTracking); - if (crTracking.readCRBits != 0) - return; // dont handle complex cases for now - if (crTracking.writtenCRBits != 0) - { - if ((crTracking.writtenCRBits&requiredCRBits) != 0) - { - crSetterInstructionIndex = i; - break; - } - else - { - return; // other CR bits overwritten (dont handle complex cases) - } - } - // is safe? (no risk of overwriting x64 eflags) - if ((imlInstruction->type == PPCREC_IML_TYPE_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_R_NAME || imlInstruction->type == PPCREC_IML_TYPE_NO_OP) || - (imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R || imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME) || - (imlInstruction->type == PPCREC_IML_TYPE_R_S32 && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) || - (imlInstruction->type == PPCREC_IML_TYPE_R_R && (imlInstruction->operation == PPCREC_IML_OP_ASSIGN)) ) - continue; - // not safe - //hasUnsafeInstructions = true; - if (unsafeInstructionIndex == -1) - unsafeInstructionIndex = i; - } - if (crSetterInstructionIndex < 0) - return; - if (unsafeInstructionIndex < 0) - return; // no danger of overwriting eflags, don't reorder - // check if we can move the CR setter instruction to after unsafeInstructionIndex - PPCRecCRTracking_t crTrackingSetter = crTracking; - PPCImlOptimizerUsedRegisters_t regTrackingCRSetter; - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList+crSetterInstructionIndex, ®TrackingCRSetter); - if (regTrackingCRSetter.writtenFPR1 >= 0 || regTrackingCRSetter.readFPR1 >= 0 || regTrackingCRSetter.readFPR2 >= 0 || regTrackingCRSetter.readFPR3 >= 0 || regTrackingCRSetter.readFPR4 >= 0) - return; // we don't handle FPR dependency yet so just ignore FPR instructions - PPCImlOptimizerUsedRegisters_t registerTracking; - if (regTrackingCRSetter.writtenNamedReg1 >= 0) - { - // CR setter does write GPR - for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) - { - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + i, ®isterTracking); - // reads register written by CR setter? - if (PPCRecompilerAnalyzer_checkForGPROverwrite(®isterTracking, ®TrackingCRSetter)) - { - return; // cant move CR setter because of dependency - } - // writes register read by CR setter? - if (PPCRecompilerAnalyzer_checkForGPROverwrite(®TrackingCRSetter, ®isterTracking)) - { - return; // cant move CR setter because of dependency - } - // overwrites register written by CR setter? - if (regTrackingCRSetter.writtenNamedReg1 == registerTracking.writtenNamedReg1) - return; - } - } - else - { - // CR setter does not write GPR - for (sint32 i = crSetterInstructionIndex + 1; i <= unsafeInstructionIndex; i++) - { - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + i, ®isterTracking); - // writes register read by CR setter? - if (PPCRecompilerAnalyzer_checkForGPROverwrite(®TrackingCRSetter, ®isterTracking)) - { - return; // cant move CR setter because of dependency - } - } - } - - // move CR setter instruction -#ifndef PUBLIC_RELEASE - if ((unsafeInstructionIndex + 1) <= crSetterInstructionIndex) - assert_dbg(); -#endif - PPCRecImlInstruction_t* newCRSetterInstruction = PPCRecompiler_insertInstruction(imlSegment, unsafeInstructionIndex+1); - memcpy(newCRSetterInstruction, imlSegment->imlList + crSetterInstructionIndex, sizeof(PPCRecImlInstruction_t)); - PPCRecompilerImlGen_generateNewInstruction_noOp(NULL, imlSegment->imlList + crSetterInstructionIndex); -} - -/* - * Move instructions which update the condition flags closer to the instruction that consumes them - * On x64 this improves performance since we often can avoid storing CR in memory - */ -void PPCRecompiler_reorderConditionModifyInstructions(ppcImlGenContext_t* ppcImlGenContext) -{ - // check if this segment has a conditional branch - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - _reorderConditionModifyInstructions(imlSegment); - } -} diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp deleted file mode 100644 index f6370d8c..00000000 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.cpp +++ /dev/null @@ -1,399 +0,0 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" -#include "PPCRecompilerImlRanges.h" -#include "util/helpers/MemoryPool.h" - -void PPCRecRARange_addLink_perVirtualGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange) -{ -#ifndef PUBLIC_RELEASE - if ((*root) && (*root)->range->virtualRegister != subrange->range->virtualRegister) - assert_dbg(); -#endif - subrange->link_sameVirtualRegisterGPR.next = *root; - if (*root) - (*root)->link_sameVirtualRegisterGPR.prev = subrange; - subrange->link_sameVirtualRegisterGPR.prev = nullptr; - *root = subrange; -} - -void PPCRecRARange_addLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange) -{ - subrange->link_segmentSubrangesGPR.next = *root; - if (*root) - (*root)->link_segmentSubrangesGPR.prev = subrange; - subrange->link_segmentSubrangesGPR.prev = nullptr; - *root = subrange; -} - -void PPCRecRARange_removeLink_perVirtualGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange) -{ - raLivenessSubrange_t* tempPrev = subrange->link_sameVirtualRegisterGPR.prev; - if (subrange->link_sameVirtualRegisterGPR.prev) - subrange->link_sameVirtualRegisterGPR.prev->link_sameVirtualRegisterGPR.next = subrange->link_sameVirtualRegisterGPR.next; - else - (*root) = subrange->link_sameVirtualRegisterGPR.next; - if (subrange->link_sameVirtualRegisterGPR.next) - subrange->link_sameVirtualRegisterGPR.next->link_sameVirtualRegisterGPR.prev = tempPrev; -#ifndef PUBLIC_RELEASE - subrange->link_sameVirtualRegisterGPR.prev = (raLivenessSubrange_t*)1; - subrange->link_sameVirtualRegisterGPR.next = (raLivenessSubrange_t*)1; -#endif -} - -void PPCRecRARange_removeLink_allSubrangesGPR(raLivenessSubrange_t** root, raLivenessSubrange_t* subrange) -{ - raLivenessSubrange_t* tempPrev = subrange->link_segmentSubrangesGPR.prev; - if (subrange->link_segmentSubrangesGPR.prev) - subrange->link_segmentSubrangesGPR.prev->link_segmentSubrangesGPR.next = subrange->link_segmentSubrangesGPR.next; - else - (*root) = subrange->link_segmentSubrangesGPR.next; - if (subrange->link_segmentSubrangesGPR.next) - subrange->link_segmentSubrangesGPR.next->link_segmentSubrangesGPR.prev = tempPrev; -#ifndef PUBLIC_RELEASE - subrange->link_segmentSubrangesGPR.prev = (raLivenessSubrange_t*)1; - subrange->link_segmentSubrangesGPR.next = (raLivenessSubrange_t*)1; -#endif -} - -MemoryPoolPermanentObjects memPool_livenessRange(4096); -MemoryPoolPermanentObjects memPool_livenessSubrange(4096); - -raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext, uint32 virtualRegister, uint32 name) -{ - raLivenessRange_t* livenessRange = memPool_livenessRange.acquireObj(); - livenessRange->list_subranges.resize(0); - livenessRange->virtualRegister = virtualRegister; - livenessRange->name = name; - livenessRange->physicalRegister = -1; - ppcImlGenContext->raInfo.list_ranges.push_back(livenessRange); - return livenessRange; -} - -raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, PPCRecImlSegment_t* imlSegment, sint32 startIndex, sint32 endIndex) -{ - raLivenessSubrange_t* livenessSubrange = memPool_livenessSubrange.acquireObj(); - livenessSubrange->list_locations.resize(0); - livenessSubrange->range = range; - livenessSubrange->imlSegment = imlSegment; - PPCRecompilerIml_setSegmentPoint(&livenessSubrange->start, imlSegment, startIndex); - PPCRecompilerIml_setSegmentPoint(&livenessSubrange->end, imlSegment, endIndex); - // default values - livenessSubrange->hasStore = false; - livenessSubrange->hasStoreDelayed = false; - livenessSubrange->lastIterationIndex = 0; - livenessSubrange->subrangeBranchNotTaken = nullptr; - livenessSubrange->subrangeBranchTaken = nullptr; - livenessSubrange->_noLoad = false; - // add to range - range->list_subranges.push_back(livenessSubrange); - // add to segment - PPCRecRARange_addLink_perVirtualGPR(&(imlSegment->raInfo.linkedList_perVirtualGPR[range->virtualRegister]), livenessSubrange); - PPCRecRARange_addLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, livenessSubrange); - return livenessSubrange; -} - -void _unlinkSubrange(raLivenessSubrange_t* subrange) -{ - PPCRecImlSegment_t* imlSegment = subrange->imlSegment; - PPCRecRARange_removeLink_perVirtualGPR(&imlSegment->raInfo.linkedList_perVirtualGPR[subrange->range->virtualRegister], subrange); - PPCRecRARange_removeLink_allSubrangesGPR(&imlSegment->raInfo.linkedList_allSubranges, subrange); -} - -void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange) -{ - _unlinkSubrange(subrange); - subrange->range->list_subranges.erase(std::find(subrange->range->list_subranges.begin(), subrange->range->list_subranges.end(), subrange)); - subrange->list_locations.clear(); - PPCRecompilerIml_removeSegmentPoint(&subrange->start); - PPCRecompilerIml_removeSegmentPoint(&subrange->end); - memPool_livenessSubrange.releaseObj(subrange); -} - -void _PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange) -{ - _unlinkSubrange(subrange); - PPCRecompilerIml_removeSegmentPoint(&subrange->start); - PPCRecompilerIml_removeSegmentPoint(&subrange->end); - memPool_livenessSubrange.releaseObj(subrange); -} - -void PPCRecRA_deleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range) -{ - for (auto& subrange : range->list_subranges) - { - _PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext, subrange); - } - ppcImlGenContext->raInfo.list_ranges.erase(std::find(ppcImlGenContext->raInfo.list_ranges.begin(), ppcImlGenContext->raInfo.list_ranges.end(), range)); - memPool_livenessRange.releaseObj(range); -} - -void PPCRecRA_deleteRangeNoUnlink(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range) -{ - for (auto& subrange : range->list_subranges) - { - _PPCRecRA_deleteSubrangeNoUnlinkFromRange(ppcImlGenContext, subrange); - } - memPool_livenessRange.releaseObj(range); -} - -void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext) -{ - for(auto& range : ppcImlGenContext->raInfo.list_ranges) - { - PPCRecRA_deleteRangeNoUnlink(ppcImlGenContext, range); - } - ppcImlGenContext->raInfo.list_ranges.clear(); -} - -void PPCRecRA_mergeRanges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, raLivenessRange_t* absorbedRange) -{ - cemu_assert_debug(range != absorbedRange); - cemu_assert_debug(range->virtualRegister == absorbedRange->virtualRegister); - // move all subranges from absorbedRange to range - for (auto& subrange : absorbedRange->list_subranges) - { - range->list_subranges.push_back(subrange); - subrange->range = range; - } - absorbedRange->list_subranges.clear(); - PPCRecRA_deleteRange(ppcImlGenContext, absorbedRange); -} - -void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, raLivenessSubrange_t* absorbedSubrange) -{ -#ifndef PUBLIC_RELEASE - PPCRecRA_debugValidateSubrange(subrange); - PPCRecRA_debugValidateSubrange(absorbedSubrange); - if (subrange->imlSegment != absorbedSubrange->imlSegment) - assert_dbg(); - if (subrange->end.index > absorbedSubrange->start.index) - assert_dbg(); - if (subrange->subrangeBranchTaken || subrange->subrangeBranchNotTaken) - assert_dbg(); - if (subrange == absorbedSubrange) - assert_dbg(); -#endif - subrange->subrangeBranchTaken = absorbedSubrange->subrangeBranchTaken; - subrange->subrangeBranchNotTaken = absorbedSubrange->subrangeBranchNotTaken; - - // merge usage locations - for (auto& location : absorbedSubrange->list_locations) - { - subrange->list_locations.push_back(location); - } - absorbedSubrange->list_locations.clear(); - - subrange->end.index = absorbedSubrange->end.index; - - PPCRecRA_debugValidateSubrange(subrange); - - PPCRecRA_deleteSubrange(ppcImlGenContext, absorbedSubrange); -} - -// remove all inter-segment connections from the range and split it into local ranges (also removes empty ranges) -void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range) -{ - if (range->list_subranges.size() == 1) - assert_dbg(); - for (auto& subrange : range->list_subranges) - { - if (subrange->list_locations.empty()) - continue; - raLivenessRange_t* newRange = PPCRecRA_createRangeBase(ppcImlGenContext, range->virtualRegister, range->name); - raLivenessSubrange_t* newSubrange = PPCRecRA_createSubrange(ppcImlGenContext, newRange, subrange->imlSegment, subrange->list_locations.data()[0].index, subrange->list_locations.data()[subrange->list_locations.size() - 1].index + 1); - // copy locations - for (auto& location : subrange->list_locations) - { - newSubrange->list_locations.push_back(location); - } - } - // remove original range - PPCRecRA_deleteRange(ppcImlGenContext, range); -} - -#ifndef PUBLIC_RELEASE -void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange) -{ - // validate subrange - if (subrange->subrangeBranchTaken && subrange->subrangeBranchTaken->imlSegment != subrange->imlSegment->nextSegmentBranchTaken) - assert_dbg(); - if (subrange->subrangeBranchNotTaken && subrange->subrangeBranchNotTaken->imlSegment != subrange->imlSegment->nextSegmentBranchNotTaken) - assert_dbg(); -} -#else -void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange) {} -#endif - -// split subrange at the given index -// After the split there will be two ranges/subranges: -// head -> subrange is shortned to end at splitIndex -// tail -> a new subrange that reaches from splitIndex to the end of the original subrange -// if head has a physical register assigned it will not carry over to tail -// The return value is the tail subrange -// If trimToHole is true, the end of the head subrange and the start of the tail subrange will be moved to fit the locations -// Ranges that begin at RA_INTER_RANGE_START are allowed and can be split -raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, sint32 splitIndex, bool trimToHole) -{ - // validation -#ifndef PUBLIC_RELEASE - if (subrange->end.index == RA_INTER_RANGE_END || subrange->end.index == RA_INTER_RANGE_START) - assert_dbg(); - if (subrange->start.index >= splitIndex) - assert_dbg(); - if (subrange->end.index <= splitIndex) - assert_dbg(); -#endif - // create tail - raLivenessRange_t* tailRange = PPCRecRA_createRangeBase(ppcImlGenContext, subrange->range->virtualRegister, subrange->range->name); - raLivenessSubrange_t* tailSubrange = PPCRecRA_createSubrange(ppcImlGenContext, tailRange, subrange->imlSegment, splitIndex, subrange->end.index); - // copy locations - for (auto& location : subrange->list_locations) - { - if (location.index >= splitIndex) - tailSubrange->list_locations.push_back(location); - } - // remove tail locations from head - for (sint32 i = 0; i < subrange->list_locations.size(); i++) - { - raLivenessLocation_t* location = subrange->list_locations.data() + i; - if (location->index >= splitIndex) - { - subrange->list_locations.resize(i); - break; - } - } - // adjust start/end - if (trimToHole) - { - if (subrange->list_locations.empty()) - { - subrange->end.index = subrange->start.index+1; - } - else - { - subrange->end.index = subrange->list_locations.back().index + 1; - } - if (tailSubrange->list_locations.empty()) - { - assert_dbg(); // should not happen? (In this case we can just avoid generating a tail at all) - } - else - { - tailSubrange->start.index = tailSubrange->list_locations.front().index; - } - } - return tailSubrange; -} - -void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 index, bool isRead, bool isWrite) -{ - if (subrange->list_locations.empty()) - { - subrange->list_locations.emplace_back(index, isRead, isWrite); - return; - } - raLivenessLocation_t* lastLocation = subrange->list_locations.data() + (subrange->list_locations.size() - 1); - cemu_assert_debug(lastLocation->index <= index); - if (lastLocation->index == index) - { - // update - lastLocation->isRead = lastLocation->isRead || isRead; - lastLocation->isWrite = lastLocation->isWrite || isWrite; - return; - } - // add new - subrange->list_locations.emplace_back(index, isRead, isWrite); -} - -sint32 PPCRecRARange_getReadWriteCost(PPCRecImlSegment_t* imlSegment) -{ - sint32 v = imlSegment->loopDepth + 1; - v *= 5; - return v*v; // 25, 100, 225, 400 -} - -// calculate cost of entire range -// ignores data flow and does not detect avoidable reads/stores -sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range) -{ - sint32 cost = 0; - - // todo - this algorithm isn't accurate. If we have 10 parallel branches with a load each then the actual cost is still only that of one branch (plus minimal extra cost for generating more code). - - // currently we calculate the cost based on the most expensive entry/exit point - - sint32 mostExpensiveRead = 0; - sint32 mostExpensiveWrite = 0; - sint32 readCount = 0; - sint32 writeCount = 0; - - for (auto& subrange : range->list_subranges) - { - if (subrange->start.index != RA_INTER_RANGE_START) - { - //cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment); - mostExpensiveRead = std::max(mostExpensiveRead, PPCRecRARange_getReadWriteCost(subrange->imlSegment)); - readCount++; - } - if (subrange->end.index != RA_INTER_RANGE_END) - { - //cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment); - mostExpensiveWrite = std::max(mostExpensiveWrite, PPCRecRARange_getReadWriteCost(subrange->imlSegment)); - writeCount++; - } - } - cost = mostExpensiveRead + mostExpensiveWrite; - cost = cost + (readCount + writeCount) / 10; - return cost; -} - -// calculate cost of range that it would have after calling PPCRecRA_explodeRange() on it -sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* range) -{ - sint32 cost = -PPCRecRARange_estimateCost(range); - for (auto& subrange : range->list_subranges) - { - if (subrange->list_locations.empty()) - continue; - cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment) * 2; // we assume a read and a store - } - return cost; -} - -sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subrange, sint32 splitIndex) -{ - // validation -#ifndef PUBLIC_RELEASE - if (subrange->end.index == RA_INTER_RANGE_END) - assert_dbg(); -#endif - - sint32 cost = 0; - // find split position in location list - if (subrange->list_locations.empty()) - { - assert_dbg(); // should not happen? - return 0; - } - if (splitIndex <= subrange->list_locations.front().index) - return 0; - if (splitIndex > subrange->list_locations.back().index) - return 0; - - // todo - determine exact cost of split subranges - - cost += PPCRecRARange_getReadWriteCost(subrange->imlSegment) * 2; // currently we assume that the additional region will require a read and a store - - //for (sint32 f = 0; f < subrange->list_locations.size(); f++) - //{ - // raLivenessLocation_t* location = subrange->list_locations.data() + f; - // if (location->index >= splitIndex) - // { - // ... - // return cost; - // } - //} - - return cost; -} diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.h b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.h deleted file mode 100644 index 01970bbf..00000000 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRanges.h +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -raLivenessRange_t* PPCRecRA_createRangeBase(ppcImlGenContext_t* ppcImlGenContext, uint32 virtualRegister, uint32 name); -raLivenessSubrange_t* PPCRecRA_createSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, PPCRecImlSegment_t* imlSegment, sint32 startIndex, sint32 endIndex); -void PPCRecRA_deleteSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange); -void PPCRecRA_deleteRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range); -void PPCRecRA_deleteAllRanges(ppcImlGenContext_t* ppcImlGenContext); - -void PPCRecRA_mergeRanges(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range, raLivenessRange_t* absorbedRange); -void PPCRecRA_explodeRange(ppcImlGenContext_t* ppcImlGenContext, raLivenessRange_t* range); - -void PPCRecRA_mergeSubranges(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, raLivenessSubrange_t* absorbedSubrange); - -raLivenessSubrange_t* PPCRecRA_splitLocalSubrange(ppcImlGenContext_t* ppcImlGenContext, raLivenessSubrange_t* subrange, sint32 splitIndex, bool trimToHole = false); - -void PPCRecRA_updateOrAddSubrangeLocation(raLivenessSubrange_t* subrange, sint32 index, bool isRead, bool isWrite); -void PPCRecRA_debugValidateSubrange(raLivenessSubrange_t* subrange); - -// cost estimation -sint32 PPCRecRARange_getReadWriteCost(PPCRecImlSegment_t* imlSegment); -sint32 PPCRecRARange_estimateCost(raLivenessRange_t* range); -sint32 PPCRecRARange_estimateAdditionalCostAfterRangeExplode(raLivenessRange_t* range); -sint32 PPCRecRARange_estimateAdditionalCostAfterSplit(raLivenessSubrange_t* subrange, sint32 splitIndex); - -// special values to mark the index of ranges that reach across the segment border -#define RA_INTER_RANGE_START (-1) -#define RA_INTER_RANGE_END (0x70000000) diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp deleted file mode 100644 index 92fbd9b0..00000000 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator.cpp +++ /dev/null @@ -1,1012 +0,0 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" -#include "PPCRecompilerImlRanges.h" - -void PPCRecompiler_replaceGPRRegisterUsageMultiple(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlInstruction_t* imlInstruction, sint32 gprRegisterSearched[4], sint32 gprRegisterReplaced[4]); - -bool PPCRecompiler_isSuffixInstruction(PPCRecImlInstruction_t* iml); - -uint32 recRACurrentIterationIndex = 0; - -uint32 PPCRecRA_getNextIterationIndex() -{ - recRACurrentIterationIndex++; - return recRACurrentIterationIndex; -} - -bool _detectLoop(PPCRecImlSegment_t* currentSegment, sint32 depth, uint32 iterationIndex, PPCRecImlSegment_t* imlSegmentLoopBase) -{ - if (currentSegment == imlSegmentLoopBase) - return true; - if (currentSegment->raInfo.lastIterationIndex == iterationIndex) - return currentSegment->raInfo.isPartOfProcessedLoop; - if (depth >= 9) - return false; - currentSegment->raInfo.lastIterationIndex = iterationIndex; - currentSegment->raInfo.isPartOfProcessedLoop = false; - - if (currentSegment->nextSegmentIsUncertain) - return false; - if (currentSegment->nextSegmentBranchNotTaken) - { - if (currentSegment->nextSegmentBranchNotTaken->momentaryIndex > currentSegment->momentaryIndex) - { - currentSegment->raInfo.isPartOfProcessedLoop = _detectLoop(currentSegment->nextSegmentBranchNotTaken, depth + 1, iterationIndex, imlSegmentLoopBase); - } - } - if (currentSegment->nextSegmentBranchTaken) - { - if (currentSegment->nextSegmentBranchTaken->momentaryIndex > currentSegment->momentaryIndex) - { - currentSegment->raInfo.isPartOfProcessedLoop = _detectLoop(currentSegment->nextSegmentBranchTaken, depth + 1, iterationIndex, imlSegmentLoopBase); - } - } - if (currentSegment->raInfo.isPartOfProcessedLoop) - currentSegment->loopDepth++; - return currentSegment->raInfo.isPartOfProcessedLoop; -} - -void PPCRecRA_detectLoop(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegmentLoopBase) -{ - uint32 iterationIndex = PPCRecRA_getNextIterationIndex(); - imlSegmentLoopBase->raInfo.lastIterationIndex = iterationIndex; - if (_detectLoop(imlSegmentLoopBase->nextSegmentBranchTaken, 0, iterationIndex, imlSegmentLoopBase)) - { - imlSegmentLoopBase->loopDepth++; - } -} - -void PPCRecRA_identifyLoop(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) -{ - if (imlSegment->nextSegmentIsUncertain) - return; - // check if this segment has a branch that links to itself (tight loop) - if (imlSegment->nextSegmentBranchTaken == imlSegment) - { - // segment loops over itself - imlSegment->loopDepth++; - return; - } - // check if this segment has a branch that goes backwards (potential complex loop) - if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->momentaryIndex < imlSegment->momentaryIndex) - { - PPCRecRA_detectLoop(ppcImlGenContext, imlSegment); - } -} - -typedef struct -{ - sint32 name; - sint32 virtualRegister; - sint32 physicalRegister; - bool isDirty; -}raRegisterState_t; - -const sint32 _raInfo_physicalGPRCount = PPC_X64_GPR_USABLE_REGISTERS; - -raRegisterState_t* PPCRecRA_getRegisterState(raRegisterState_t* regState, sint32 virtualRegister) -{ - for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) - { - if (regState[i].virtualRegister == virtualRegister) - { -#ifndef PUBLIC_RELEASE - if (regState[i].physicalRegister < 0) - assert_dbg(); -#endif - return regState + i; - } - } - return nullptr; -} - -raRegisterState_t* PPCRecRA_getFreePhysicalRegister(raRegisterState_t* regState) -{ - for (sint32 i = 0; i < _raInfo_physicalGPRCount; i++) - { - if (regState[i].physicalRegister < 0) - { - regState[i].physicalRegister = i; - return regState + i; - } - } - return nullptr; -} - -typedef struct -{ - uint16 registerIndex; - uint16 registerName; -}raLoadStoreInfo_t; - -void PPCRecRA_insertGPRLoadInstruction(PPCRecImlSegment_t* imlSegment, sint32 insertIndex, sint32 registerIndex, sint32 registerName) -{ - PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, 1); - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList + (insertIndex + 0); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); - imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = registerIndex; - imlInstructionItr->op_r_name.name = registerName; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; -} - -void PPCRecRA_insertGPRLoadInstructions(PPCRecImlSegment_t* imlSegment, sint32 insertIndex, raLoadStoreInfo_t* loadList, sint32 loadCount) -{ - PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, loadCount); - memset(imlSegment->imlList + (insertIndex + 0), 0x00, sizeof(PPCRecImlInstruction_t)*loadCount); - for (sint32 i = 0; i < loadCount; i++) - { - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList + (insertIndex + i); - imlInstructionItr->type = PPCREC_IML_TYPE_R_NAME; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = (uint8)loadList[i].registerIndex; - imlInstructionItr->op_r_name.name = (uint32)loadList[i].registerName; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; - } -} - -void PPCRecRA_insertGPRStoreInstruction(PPCRecImlSegment_t* imlSegment, sint32 insertIndex, sint32 registerIndex, sint32 registerName) -{ - PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, 1); - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList + (insertIndex + 0); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); - imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = registerIndex; - imlInstructionItr->op_r_name.name = registerName; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; -} - -void PPCRecRA_insertGPRStoreInstructions(PPCRecImlSegment_t* imlSegment, sint32 insertIndex, raLoadStoreInfo_t* storeList, sint32 storeCount) -{ - PPCRecompiler_pushBackIMLInstructions(imlSegment, insertIndex, storeCount); - memset(imlSegment->imlList + (insertIndex + 0), 0x00, sizeof(PPCRecImlInstruction_t)*storeCount); - for (sint32 i = 0; i < storeCount; i++) - { - PPCRecImlInstruction_t* imlInstructionItr = imlSegment->imlList + (insertIndex + i); - memset(imlInstructionItr, 0x00, sizeof(PPCRecImlInstruction_t)); - imlInstructionItr->type = PPCREC_IML_TYPE_NAME_R; - imlInstructionItr->operation = PPCREC_IML_OP_ASSIGN; - imlInstructionItr->op_r_name.registerIndex = (uint8)storeList[i].registerIndex; - imlInstructionItr->op_r_name.name = (uint32)storeList[i].registerName; - imlInstructionItr->op_r_name.copyWidth = 32; - imlInstructionItr->op_r_name.flags = 0; - } -} - -#define SUBRANGE_LIST_SIZE (128) - -sint32 PPCRecRA_countInstructionsUntilNextUse(raLivenessSubrange_t* subrange, sint32 startIndex) -{ - for (sint32 i = 0; i < subrange->list_locations.size(); i++) - { - if (subrange->list_locations.data()[i].index >= startIndex) - return subrange->list_locations.data()[i].index - startIndex; - } - return INT_MAX; -} - -// count how many instructions there are until physRegister is used by any subrange (returns 0 if register is in use at startIndex, and INT_MAX if not used for the remainder of the segment) -sint32 PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(PPCRecImlSegment_t* imlSegment, sint32 startIndex, sint32 physRegister) -{ - sint32 minDistance = INT_MAX; - // next - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - while(subrangeItr) - { - if (subrangeItr->range->physicalRegister != physRegister) - { - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; - continue; - } - if (startIndex >= subrangeItr->start.index && startIndex < subrangeItr->end.index) - return 0; - if (subrangeItr->start.index >= startIndex) - { - minDistance = std::min(minDistance, (subrangeItr->start.index - startIndex)); - } - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; - } - return minDistance; -} - -typedef struct -{ - raLivenessSubrange_t* liveRangeList[64]; - sint32 liveRangesCount; -}raLiveRangeInfo_t; - -// return a bitmask that contains only registers that are not used by any colliding range -uint32 PPCRecRA_getAllowedRegisterMaskForFullRange(raLivenessRange_t* range) -{ - uint32 physRegisterMask = (1 << PPC_X64_GPR_USABLE_REGISTERS) - 1; - for (auto& subrange : range->list_subranges) - { - PPCRecImlSegment_t* imlSegment = subrange->imlSegment; - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - while(subrangeItr) - { - if (subrange == subrangeItr) - { - // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; - continue; - } - - if (subrange->start.index < subrangeItr->end.index && subrange->end.index > subrangeItr->start.index || - (subrange->start.index == RA_INTER_RANGE_START && subrange->start.index == subrangeItr->start.index) || - (subrange->end.index == RA_INTER_RANGE_END && subrange->end.index == subrangeItr->end.index) ) - { - if(subrangeItr->range->physicalRegister >= 0) - physRegisterMask &= ~(1<<(subrangeItr->range->physicalRegister)); - } - // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; - } - } - return physRegisterMask; -} - -bool _livenessRangeStartCompare(raLivenessSubrange_t* lhs, raLivenessSubrange_t* rhs) { return lhs->start.index < rhs->start.index; } - -void _sortSegmentAllSubrangesLinkedList(PPCRecImlSegment_t* imlSegment) -{ - raLivenessSubrange_t* subrangeList[4096+1]; - sint32 count = 0; - // disassemble linked list - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - while (subrangeItr) - { - if (count >= 4096) - assert_dbg(); - subrangeList[count] = subrangeItr; - count++; - // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; - } - if (count == 0) - { - imlSegment->raInfo.linkedList_allSubranges = nullptr; - return; - } - // sort - std::sort(subrangeList, subrangeList + count, _livenessRangeStartCompare); - //for (sint32 i1 = 0; i1 < count; i1++) - //{ - // for (sint32 i2 = i1+1; i2 < count; i2++) - // { - // if (subrangeList[i1]->start.index > subrangeList[i2]->start.index) - // { - // // swap - // raLivenessSubrange_t* temp = subrangeList[i1]; - // subrangeList[i1] = subrangeList[i2]; - // subrangeList[i2] = temp; - // } - // } - //} - // reassemble linked list - subrangeList[count] = nullptr; - imlSegment->raInfo.linkedList_allSubranges = subrangeList[0]; - subrangeList[0]->link_segmentSubrangesGPR.prev = nullptr; - subrangeList[0]->link_segmentSubrangesGPR.next = subrangeList[1]; - for (sint32 i = 1; i < count; i++) - { - subrangeList[i]->link_segmentSubrangesGPR.prev = subrangeList[i - 1]; - subrangeList[i]->link_segmentSubrangesGPR.next = subrangeList[i + 1]; - } - // validate list -#ifndef PUBLIC_RELEASE - sint32 count2 = 0; - subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - sint32 currentStartIndex = RA_INTER_RANGE_START; - while (subrangeItr) - { - count2++; - if (subrangeItr->start.index < currentStartIndex) - assert_dbg(); - currentStartIndex = subrangeItr->start.index; - // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; - } - if (count != count2) - assert_dbg(); -#endif -} - -bool PPCRecRA_assignSegmentRegisters(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) -{ - - // sort subranges ascending by start index - - //std::sort(imlSegment->raInfo.list_subranges.begin(), imlSegment->raInfo.list_subranges.end(), _sortSubrangesByStartIndexDepr); - _sortSegmentAllSubrangesLinkedList(imlSegment); - - raLiveRangeInfo_t liveInfo; - liveInfo.liveRangesCount = 0; - //sint32 subrangeIndex = 0; - //for (auto& subrange : imlSegment->raInfo.list_subranges) - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - while(subrangeItr) - { - sint32 currentIndex = subrangeItr->start.index; - // validate subrange - PPCRecRA_debugValidateSubrange(subrangeItr); - // expire ranges - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) - { - raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; - if (liverange->end.index <= currentIndex && liverange->end.index != RA_INTER_RANGE_END) - { -#ifndef PUBLIC_RELEASE - if (liverange->subrangeBranchTaken || liverange->subrangeBranchNotTaken) - assert_dbg(); // infinite subranges should not expire -#endif - // remove entry - liveInfo.liveRangesCount--; - liveInfo.liveRangeList[f] = liveInfo.liveRangeList[liveInfo.liveRangesCount]; - f--; - } - } - // check if subrange already has register assigned - if (subrangeItr->range->physicalRegister >= 0) - { - // verify if register is actually available -#ifndef PUBLIC_RELEASE - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) - { - raLivenessSubrange_t* liverangeItr = liveInfo.liveRangeList[f]; - if (liverangeItr->range->physicalRegister == subrangeItr->range->physicalRegister) - { - // this should never happen because we try to preventively avoid register conflicts - assert_dbg(); - } - } -#endif - // add to live ranges - liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; - liveInfo.liveRangesCount++; - // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; - continue; - } - // find free register - uint32 physRegisterMask = (1<range->physicalRegister < 0) - assert_dbg(); - physRegisterMask &= ~(1<range->physicalRegister); - } - // check intersections with other ranges and determine allowed registers - uint32 allowedPhysRegisterMask = 0; - uint32 unusedRegisterMask = physRegisterMask; // mask of registers that are currently not used (does not include range checks) - if (physRegisterMask != 0) - { - allowedPhysRegisterMask = PPCRecRA_getAllowedRegisterMaskForFullRange(subrangeItr->range); - physRegisterMask &= allowedPhysRegisterMask; - } - if (physRegisterMask == 0) - { - struct - { - // estimated costs and chosen candidates for the different spill strategies - // hole cutting into a local range - struct - { - sint32 distance; - raLivenessSubrange_t* largestHoleSubrange; - sint32 cost; // additional cost of choosing this candidate - }localRangeHoleCutting; - // split current range (this is generally only a good choice when the current range is long but rarely used) - struct - { - sint32 cost; - sint32 physRegister; - sint32 distance; // size of hole - }availableRegisterHole; - // explode a inter-segment range (prefer ranges that are not read/written in this segment) - struct - { - raLivenessRange_t* range; - sint32 cost; - sint32 distance; // size of hole - // note: If we explode a range, we still have to check the size of the hole that becomes available, if too small then we need to add cost of splitting local subrange - }explodeRange; - // todo - add more strategies, make cost estimation smarter (for example, in some cases splitting can have reduced or no cost if read/store can be avoided due to data flow) - }spillStrategies; - // cant assign register - // there might be registers available, we just can't use them due to range conflicts - if (subrangeItr->end.index != RA_INTER_RANGE_END) - { - // range ends in current segment - - // Current algo looks like this: - // 1) Get the size of the largest possible hole that we can cut into any of the live local subranges - // 1.1) Check if the hole is large enough to hold the current subrange - // 2) If yes, cut hole and return false (full retry) - // 3) If no, try to reuse free register (need to determine how large the region is we can use) - // 4) If there is no free register or the range is extremely short go back to step 1+2 but additionally split the current subrange at where the hole ends - - cemu_assert_debug(currentIndex == subrangeItr->start.index); - - sint32 requiredSize = subrangeItr->end.index - subrangeItr->start.index; - // evaluate strategy: Cut hole into local subrange - spillStrategies.localRangeHoleCutting.distance = -1; - spillStrategies.localRangeHoleCutting.largestHoleSubrange = nullptr; - spillStrategies.localRangeHoleCutting.cost = INT_MAX; - if (currentIndex >= 0) - { - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) - { - raLivenessSubrange_t* candidate = liveInfo.liveRangeList[f]; - if (candidate->end.index == RA_INTER_RANGE_END) - continue; - sint32 distance = PPCRecRA_countInstructionsUntilNextUse(candidate, currentIndex); - if (distance < 2) - continue; // not even worth the consideration - // calculate split cost of candidate - sint32 cost = PPCRecRARange_estimateAdditionalCostAfterSplit(candidate, currentIndex + distance); - // calculate additional split cost of currentRange if hole is not large enough - if (distance < requiredSize) - { - cost += PPCRecRARange_estimateAdditionalCostAfterSplit(subrangeItr, currentIndex + distance); - // we also slightly increase cost in relation to the remaining length (in order to make the algorithm prefer larger holes) - cost += (requiredSize - distance) / 10; - } - // compare cost with previous candidates - if (cost < spillStrategies.localRangeHoleCutting.cost) - { - spillStrategies.localRangeHoleCutting.cost = cost; - spillStrategies.localRangeHoleCutting.distance = distance; - spillStrategies.localRangeHoleCutting.largestHoleSubrange = candidate; - } - } - } - // evaluate strategy: Split current range to fit in available holes - spillStrategies.availableRegisterHole.cost = INT_MAX; - spillStrategies.availableRegisterHole.distance = -1; - spillStrategies.availableRegisterHole.physRegister = -1; - if (currentIndex >= 0) - { - if (unusedRegisterMask != 0) - { - for (sint32 t = 0; t < PPC_X64_GPR_USABLE_REGISTERS; t++) - { - if ((unusedRegisterMask&(1 << t)) == 0) - continue; - // get size of potential hole for this register - sint32 distance = PPCRecRA_countInstructionsUntilNextLocalPhysRegisterUse(imlSegment, currentIndex, t); - if (distance < 2) - continue; // not worth consideration - // calculate additional cost due to split - if (distance >= requiredSize) - assert_dbg(); // should not happen or else we would have selected this register - sint32 cost = PPCRecRARange_estimateAdditionalCostAfterSplit(subrangeItr, currentIndex + distance); - // add small additional cost for the remaining range (prefer larger holes) - cost += (requiredSize - distance) / 10; - if (cost < spillStrategies.availableRegisterHole.cost) - { - spillStrategies.availableRegisterHole.cost = cost; - spillStrategies.availableRegisterHole.distance = distance; - spillStrategies.availableRegisterHole.physRegister = t; - } - } - } - } - // evaluate strategy: Explode inter-segment ranges - spillStrategies.explodeRange.cost = INT_MAX; - spillStrategies.explodeRange.range = nullptr; - spillStrategies.explodeRange.distance = -1; - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) - { - raLivenessSubrange_t* candidate = liveInfo.liveRangeList[f]; - if (candidate->end.index != RA_INTER_RANGE_END) - continue; - sint32 distance = PPCRecRA_countInstructionsUntilNextUse(liveInfo.liveRangeList[f], currentIndex); - if( distance < 2) - continue; - sint32 cost; - cost = PPCRecRARange_estimateAdditionalCostAfterRangeExplode(candidate->range); - // if the hole is not large enough, add cost of splitting current subrange - if (distance < requiredSize) - { - cost += PPCRecRARange_estimateAdditionalCostAfterSplit(subrangeItr, currentIndex + distance); - // add small additional cost for the remaining range (prefer larger holes) - cost += (requiredSize - distance) / 10; - } - // compare with current best candidate for this strategy - if (cost < spillStrategies.explodeRange.cost) - { - spillStrategies.explodeRange.cost = cost; - spillStrategies.explodeRange.distance = distance; - spillStrategies.explodeRange.range = candidate->range; - } - } - // choose strategy - if (spillStrategies.explodeRange.cost != INT_MAX && spillStrategies.explodeRange.cost <= spillStrategies.localRangeHoleCutting.cost && spillStrategies.explodeRange.cost <= spillStrategies.availableRegisterHole.cost) - { - // explode range - PPCRecRA_explodeRange(ppcImlGenContext, spillStrategies.explodeRange.range); - // split current subrange if necessary - if( requiredSize > spillStrategies.explodeRange.distance) - PPCRecRA_splitLocalSubrange(ppcImlGenContext, subrangeItr, currentIndex+spillStrategies.explodeRange.distance, true); - } - else if (spillStrategies.availableRegisterHole.cost != INT_MAX && spillStrategies.availableRegisterHole.cost <= spillStrategies.explodeRange.cost && spillStrategies.availableRegisterHole.cost <= spillStrategies.localRangeHoleCutting.cost) - { - // use available register - PPCRecRA_splitLocalSubrange(ppcImlGenContext, subrangeItr, currentIndex + spillStrategies.availableRegisterHole.distance, true); - } - else if (spillStrategies.localRangeHoleCutting.cost != INT_MAX && spillStrategies.localRangeHoleCutting.cost <= spillStrategies.explodeRange.cost && spillStrategies.localRangeHoleCutting.cost <= spillStrategies.availableRegisterHole.cost) - { - // cut hole - PPCRecRA_splitLocalSubrange(ppcImlGenContext, spillStrategies.localRangeHoleCutting.largestHoleSubrange, currentIndex + spillStrategies.localRangeHoleCutting.distance, true); - // split current subrange if necessary - if (requiredSize > spillStrategies.localRangeHoleCutting.distance) - PPCRecRA_splitLocalSubrange(ppcImlGenContext, subrangeItr, currentIndex + spillStrategies.localRangeHoleCutting.distance, true); - } - else if (subrangeItr->start.index == RA_INTER_RANGE_START) - { - // alternative strategy if we have no other choice: explode current range - PPCRecRA_explodeRange(ppcImlGenContext, subrangeItr->range); - } - else - assert_dbg(); - - return false; - } - else - { - // range exceeds segment border - // simple but bad solution -> explode the entire range (no longer allow it to cross segment boundaries) - // better solutions: 1) Depending on the situation, we can explode other ranges to resolve the conflict. Thus we should explode the range with the lowest extra cost - // 2) Or we explode the range only partially - // explode the range with the least cost - spillStrategies.explodeRange.cost = INT_MAX; - spillStrategies.explodeRange.range = nullptr; - spillStrategies.explodeRange.distance = -1; - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) - { - raLivenessSubrange_t* candidate = liveInfo.liveRangeList[f]; - if (candidate->end.index != RA_INTER_RANGE_END) - continue; - // only select candidates that clash with current subrange - if (candidate->range->physicalRegister < 0 && candidate != subrangeItr) - continue; - - sint32 cost; - cost = PPCRecRARange_estimateAdditionalCostAfterRangeExplode(candidate->range); - // compare with current best candidate for this strategy - if (cost < spillStrategies.explodeRange.cost) - { - spillStrategies.explodeRange.cost = cost; - spillStrategies.explodeRange.distance = INT_MAX; - spillStrategies.explodeRange.range = candidate->range; - } - } - // add current range as a candidate too - sint32 ownCost; - ownCost = PPCRecRARange_estimateAdditionalCostAfterRangeExplode(subrangeItr->range); - if (ownCost < spillStrategies.explodeRange.cost) - { - spillStrategies.explodeRange.cost = ownCost; - spillStrategies.explodeRange.distance = INT_MAX; - spillStrategies.explodeRange.range = subrangeItr->range; - } - if (spillStrategies.explodeRange.cost == INT_MAX) - assert_dbg(); // should not happen - PPCRecRA_explodeRange(ppcImlGenContext, spillStrategies.explodeRange.range); - } - return false; - } - // assign register to range - sint32 registerIndex = -1; - for (sint32 f = 0; f < PPC_X64_GPR_USABLE_REGISTERS; f++) - { - if ((physRegisterMask&(1 << f)) != 0) - { - registerIndex = f; - break; - } - } - subrangeItr->range->physicalRegister = registerIndex; - // add to live ranges - liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; - liveInfo.liveRangesCount++; - // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; - } - return true; -} - -void PPCRecRA_assignRegisters(ppcImlGenContext_t* ppcImlGenContext) -{ - // start with frequently executed segments first - sint32 maxLoopDepth = 0; - for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++) - { - maxLoopDepth = std::max(maxLoopDepth, ppcImlGenContext->segmentList[i]->loopDepth); - } - while (true) - { - bool done = false; - for (sint32 d = maxLoopDepth; d >= 0; d--) - { - for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[i]; - if (imlSegment->loopDepth != d) - continue; - done = PPCRecRA_assignSegmentRegisters(ppcImlGenContext, imlSegment); - if (done == false) - break; - } - if (done == false) - break; - } - if (done) - break; - } -} - -typedef struct -{ - raLivenessSubrange_t* subrangeList[SUBRANGE_LIST_SIZE]; - sint32 subrangeCount; - bool hasUndefinedEndings; -}subrangeEndingInfo_t; - -void _findSubrangeWriteEndings(raLivenessSubrange_t* subrange, uint32 iterationIndex, sint32 depth, subrangeEndingInfo_t* info) -{ - if (depth >= 30) - { - info->hasUndefinedEndings = true; - return; - } - if (subrange->lastIterationIndex == iterationIndex) - return; // already processed - subrange->lastIterationIndex = iterationIndex; - if (subrange->hasStoreDelayed) - return; // no need to traverse this subrange - PPCRecImlSegment_t* imlSegment = subrange->imlSegment; - if (subrange->end.index != RA_INTER_RANGE_END) - { - // ending segment - if (info->subrangeCount >= SUBRANGE_LIST_SIZE) - { - info->hasUndefinedEndings = true; - return; - } - else - { - info->subrangeList[info->subrangeCount] = subrange; - info->subrangeCount++; - } - return; - } - - // traverse next subranges in flow - if (imlSegment->nextSegmentBranchNotTaken) - { - if (subrange->subrangeBranchNotTaken == nullptr) - { - info->hasUndefinedEndings = true; - } - else - { - _findSubrangeWriteEndings(subrange->subrangeBranchNotTaken, iterationIndex, depth + 1, info); - } - } - if (imlSegment->nextSegmentBranchTaken) - { - if (subrange->subrangeBranchTaken == nullptr) - { - info->hasUndefinedEndings = true; - } - else - { - _findSubrangeWriteEndings(subrange->subrangeBranchTaken, iterationIndex, depth + 1, info); - } - } -} - -void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange) -{ - if (subrange->end.index != RA_INTER_RANGE_END) - return; - // analyze data flow across segments (if this segment has writes) - if (subrange->hasStore) - { - subrangeEndingInfo_t writeEndingInfo; - writeEndingInfo.subrangeCount = 0; - writeEndingInfo.hasUndefinedEndings = false; - _findSubrangeWriteEndings(subrange, PPCRecRA_getNextIterationIndex(), 0, &writeEndingInfo); - if (writeEndingInfo.hasUndefinedEndings == false) - { - // get cost of delaying store into endings - sint32 delayStoreCost = 0; - bool alreadyStoredInAllEndings = true; - for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++) - { - raLivenessSubrange_t* subrangeItr = writeEndingInfo.subrangeList[i]; - if( subrangeItr->hasStore ) - continue; // this ending already stores, no extra cost - alreadyStoredInAllEndings = false; - sint32 storeCost = PPCRecRARange_getReadWriteCost(subrangeItr->imlSegment); - delayStoreCost = std::max(storeCost, delayStoreCost); - } - if (alreadyStoredInAllEndings) - { - subrange->hasStore = false; - subrange->hasStoreDelayed = true; - } - else if (delayStoreCost <= PPCRecRARange_getReadWriteCost(subrange->imlSegment)) - { - subrange->hasStore = false; - subrange->hasStoreDelayed = true; - for (sint32 i = 0; i < writeEndingInfo.subrangeCount; i++) - { - raLivenessSubrange_t* subrangeItr = writeEndingInfo.subrangeList[i]; - subrangeItr->hasStore = true; - } - } - } - } -} - -void PPCRecRA_generateSegmentInstructions(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) -{ - sint16 virtualReg2PhysReg[PPC_REC_MAX_VIRTUAL_GPR]; - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) - virtualReg2PhysReg[i] = -1; - - raLiveRangeInfo_t liveInfo; - liveInfo.liveRangesCount = 0; - sint32 index = 0; - sint32 suffixInstructionCount = (imlSegment->imlListCount > 0 && PPCRecompiler_isSuffixInstruction(imlSegment->imlList + imlSegment->imlListCount - 1)) ? 1 : 0; - // load register ranges that are supplied from previous segments - raLivenessSubrange_t* subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - //for (auto& subrange : imlSegment->raInfo.list_subranges) - while(subrangeItr) - { - if (subrangeItr->start.index == RA_INTER_RANGE_START) - { - liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; - liveInfo.liveRangesCount++; -#ifndef PUBLIC_RELEASE - // load GPR - if (subrangeItr->_noLoad == false) - { - assert_dbg(); - } - // update translation table - if (virtualReg2PhysReg[subrangeItr->range->virtualRegister] != -1) - assert_dbg(); -#endif - virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; - } - // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; - } - // process instructions - while(index < imlSegment->imlListCount+1) - { - // expire ranges - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) - { - raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; - if (liverange->end.index <= index) - { - // update translation table - if (virtualReg2PhysReg[liverange->range->virtualRegister] == -1) - assert_dbg(); - virtualReg2PhysReg[liverange->range->virtualRegister] = -1; - // store GPR - if (liverange->hasStore) - { - PPCRecRA_insertGPRStoreInstruction(imlSegment, std::min(index, imlSegment->imlListCount - suffixInstructionCount), liverange->range->physicalRegister, liverange->range->name); - index++; - } - // remove entry - liveInfo.liveRangesCount--; - liveInfo.liveRangeList[f] = liveInfo.liveRangeList[liveInfo.liveRangesCount]; - f--; - } - } - // load new ranges - subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - while(subrangeItr) - { - if (subrangeItr->start.index == index) - { - liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; - liveInfo.liveRangesCount++; - // load GPR - if (subrangeItr->_noLoad == false) - { - PPCRecRA_insertGPRLoadInstruction(imlSegment, std::min(index, imlSegment->imlListCount - suffixInstructionCount), subrangeItr->range->physicalRegister, subrangeItr->range->name); - index++; - subrangeItr->start.index--; - } - // update translation table - cemu_assert_debug(virtualReg2PhysReg[subrangeItr->range->virtualRegister] == -1); - virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; - } - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; - } - // replace registers - if (index < imlSegment->imlListCount) - { - PPCImlOptimizerUsedRegisters_t gprTracking; - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking); - - sint32 inputGpr[4]; - inputGpr[0] = gprTracking.gpr[0]; - inputGpr[1] = gprTracking.gpr[1]; - inputGpr[2] = gprTracking.gpr[2]; - inputGpr[3] = gprTracking.gpr[3]; - sint32 replaceGpr[4]; - for (sint32 f = 0; f < 4; f++) - { - sint32 virtualRegister = gprTracking.gpr[f]; - if (virtualRegister < 0) - { - replaceGpr[f] = -1; - continue; - } - if (virtualRegister >= PPC_REC_MAX_VIRTUAL_GPR) - assert_dbg(); - replaceGpr[f] = virtualReg2PhysReg[virtualRegister]; - cemu_assert_debug(replaceGpr[f] >= 0); - } - PPCRecompiler_replaceGPRRegisterUsageMultiple(ppcImlGenContext, imlSegment->imlList + index, inputGpr, replaceGpr); - } - // next iml instruction - index++; - } - // expire infinite subranges (subranges that cross the segment border) - sint32 storeLoadListLength = 0; - raLoadStoreInfo_t loadStoreList[PPC_REC_MAX_VIRTUAL_GPR]; - for (sint32 f = 0; f < liveInfo.liveRangesCount; f++) - { - raLivenessSubrange_t* liverange = liveInfo.liveRangeList[f]; - if (liverange->end.index == RA_INTER_RANGE_END) - { - // update translation table - cemu_assert_debug(virtualReg2PhysReg[liverange->range->virtualRegister] != -1); - virtualReg2PhysReg[liverange->range->virtualRegister] = -1; - // store GPR - if (liverange->hasStore) - { - loadStoreList[storeLoadListLength].registerIndex = liverange->range->physicalRegister; - loadStoreList[storeLoadListLength].registerName = liverange->range->name; - storeLoadListLength++; - } - // remove entry - liveInfo.liveRangesCount--; - liveInfo.liveRangeList[f] = liveInfo.liveRangeList[liveInfo.liveRangesCount]; - f--; - } - else - { - cemu_assert_suspicious(); - } - } - if (storeLoadListLength > 0) - { - PPCRecRA_insertGPRStoreInstructions(imlSegment, imlSegment->imlListCount - suffixInstructionCount, loadStoreList, storeLoadListLength); - } - // load subranges for next segments - subrangeItr = imlSegment->raInfo.linkedList_allSubranges; - storeLoadListLength = 0; - while(subrangeItr) - { - if (subrangeItr->start.index == RA_INTER_RANGE_END) - { - liveInfo.liveRangeList[liveInfo.liveRangesCount] = subrangeItr; - liveInfo.liveRangesCount++; - // load GPR - if (subrangeItr->_noLoad == false) - { - loadStoreList[storeLoadListLength].registerIndex = subrangeItr->range->physicalRegister; - loadStoreList[storeLoadListLength].registerName = subrangeItr->range->name; - storeLoadListLength++; - } - // update translation table - cemu_assert_debug(virtualReg2PhysReg[subrangeItr->range->virtualRegister] == -1); - virtualReg2PhysReg[subrangeItr->range->virtualRegister] = subrangeItr->range->physicalRegister; - } - // next - subrangeItr = subrangeItr->link_segmentSubrangesGPR.next; - } - if (storeLoadListLength > 0) - { - PPCRecRA_insertGPRLoadInstructions(imlSegment, imlSegment->imlListCount - suffixInstructionCount, loadStoreList, storeLoadListLength); - } -} - -void PPCRecRA_generateMoveInstructions(ppcImlGenContext_t* ppcImlGenContext) -{ - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - PPCRecRA_generateSegmentInstructions(ppcImlGenContext, imlSegment); - } -} - -void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext); -void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext); - -void PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext_t* ppcImlGenContext) -{ - // insert empty segments after every non-taken branch if the linked segment has more than one input - // this gives the register allocator more room to create efficient spill code - sint32 segmentIndex = 0; - while (segmentIndex < ppcImlGenContext->segmentListCount) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[segmentIndex]; - if (imlSegment->nextSegmentIsUncertain) - { - segmentIndex++; - continue; - } - if (imlSegment->nextSegmentBranchTaken == nullptr || imlSegment->nextSegmentBranchNotTaken == nullptr) - { - segmentIndex++; - continue; - } - if (imlSegment->nextSegmentBranchNotTaken->list_prevSegments.size() <= 1) - { - segmentIndex++; - continue; - } - if (imlSegment->nextSegmentBranchNotTaken->isEnterable) - { - segmentIndex++; - continue; - } - PPCRecompilerIml_insertSegments(ppcImlGenContext, segmentIndex + 1, 1); - PPCRecImlSegment_t* imlSegmentP0 = ppcImlGenContext->segmentList[segmentIndex + 0]; - PPCRecImlSegment_t* imlSegmentP1 = ppcImlGenContext->segmentList[segmentIndex + 1]; - PPCRecImlSegment_t* nextSegment = imlSegment->nextSegmentBranchNotTaken; - PPCRecompilerIML_removeLink(imlSegmentP0, nextSegment); - PPCRecompilerIml_setLinkBranchNotTaken(imlSegmentP1, nextSegment); - PPCRecompilerIml_setLinkBranchNotTaken(imlSegmentP0, imlSegmentP1); - segmentIndex++; - } - // detect loops - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - imlSegment->momentaryIndex = s; - } - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - PPCRecRA_identifyLoop(ppcImlGenContext, imlSegment); - } -} - -void PPCRecompilerImm_allocateRegisters(ppcImlGenContext_t* ppcImlGenContext) -{ - PPCRecompilerImm_prepareForRegisterAllocation(ppcImlGenContext); - - ppcImlGenContext->raInfo.list_ranges = std::vector(); - - // calculate liveness - PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext); - PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext); - - PPCRecRA_assignRegisters(ppcImlGenContext); - - PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext); - PPCRecRA_generateMoveInstructions(ppcImlGenContext); - - PPCRecRA_deleteAllRanges(ppcImlGenContext); -} \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator2.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator2.cpp deleted file mode 100644 index e2070703..00000000 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerImlRegisterAllocator2.cpp +++ /dev/null @@ -1,414 +0,0 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" -#include "PPCRecompilerImlRanges.h" -#include - -bool _isRangeDefined(PPCRecImlSegment_t* imlSegment, sint32 vGPR) -{ - return (imlSegment->raDistances.reg[vGPR].usageStart != INT_MAX); -} - -void PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) -{ - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) - { - imlSegment->raDistances.reg[i].usageStart = INT_MAX; - imlSegment->raDistances.reg[i].usageEnd = INT_MIN; - } - // scan instructions for usage range - sint32 index = 0; - PPCImlOptimizerUsedRegisters_t gprTracking; - while (index < imlSegment->imlListCount) - { - // end loop at suffix instruction - if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index)) - break; - // get accessed GPRs - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking); - for (sint32 t = 0; t < 4; t++) - { - sint32 virtualRegister = gprTracking.gpr[t]; - if (virtualRegister < 0) - continue; - cemu_assert_debug(virtualRegister < PPC_REC_MAX_VIRTUAL_GPR); - imlSegment->raDistances.reg[virtualRegister].usageStart = std::min(imlSegment->raDistances.reg[virtualRegister].usageStart, index); // index before/at instruction - imlSegment->raDistances.reg[virtualRegister].usageEnd = std::max(imlSegment->raDistances.reg[virtualRegister].usageEnd, index+1); // index after instruction - } - // next instruction - index++; - } -} - -void PPCRecRA_calculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext) -{ - // for each register calculate min/max index of usage range within each segment - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecRA_calculateSegmentMinMaxRanges(ppcImlGenContext, ppcImlGenContext->segmentList[s]); - } -} - -raLivenessSubrange_t* PPCRecRA_convertToMappedRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR, raLivenessRange_t* range) -{ - if (imlSegment->raDistances.isProcessed[vGPR]) - { - // return already existing segment - return imlSegment->raInfo.linkedList_perVirtualGPR[vGPR]; - } - imlSegment->raDistances.isProcessed[vGPR] = true; - if (_isRangeDefined(imlSegment, vGPR) == false) - return nullptr; - // create subrange - cemu_assert_debug(imlSegment->raInfo.linkedList_perVirtualGPR[vGPR] == nullptr); - raLivenessSubrange_t* subrange = PPCRecRA_createSubrange(ppcImlGenContext, range, imlSegment, imlSegment->raDistances.reg[vGPR].usageStart, imlSegment->raDistances.reg[vGPR].usageEnd); - // traverse forward - if (imlSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) - { - if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) - { - subrange->subrangeBranchTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, vGPR, range); - cemu_assert_debug(subrange->subrangeBranchTaken->start.index == RA_INTER_RANGE_START); - } - if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) - { - subrange->subrangeBranchNotTaken = PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, vGPR, range); - cemu_assert_debug(subrange->subrangeBranchNotTaken->start.index == RA_INTER_RANGE_START); - } - } - // traverse backward - if (imlSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_START) - { - for (auto& it : imlSegment->list_prevSegments) - { - if (it->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) - PPCRecRA_convertToMappedRanges(ppcImlGenContext, it, vGPR, range); - } - } - // return subrange - return subrange; -} - -void PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) -{ - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) - { - if( _isRangeDefined(imlSegment, i) == false ) - continue; - if( imlSegment->raDistances.isProcessed[i]) - continue; - raLivenessRange_t* range = PPCRecRA_createRangeBase(ppcImlGenContext, i, ppcImlGenContext->mappedRegister[i]); - PPCRecRA_convertToMappedRanges(ppcImlGenContext, imlSegment, i, range); - } - // create lookup table of ranges - raLivenessSubrange_t* vGPR2Subrange[PPC_REC_MAX_VIRTUAL_GPR]; - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) - { - vGPR2Subrange[i] = imlSegment->raInfo.linkedList_perVirtualGPR[i]; -#ifndef PUBLIC_RELEASE - if (vGPR2Subrange[i] && vGPR2Subrange[i]->link_sameVirtualRegisterGPR.next != nullptr) - assert_dbg(); -#endif - } - // parse instructions and convert to locations - sint32 index = 0; - PPCImlOptimizerUsedRegisters_t gprTracking; - while (index < imlSegment->imlListCount) - { - // end loop at suffix instruction - if (PPCRecompiler_isSuffixInstruction(imlSegment->imlList + index)) - break; - // get accessed GPRs - PPCRecompiler_checkRegisterUsage(NULL, imlSegment->imlList + index, &gprTracking); - // handle accessed GPR - for (sint32 t = 0; t < 4; t++) - { - sint32 virtualRegister = gprTracking.gpr[t]; - if (virtualRegister < 0) - continue; - bool isWrite = (t == 3); - // add location - PPCRecRA_updateOrAddSubrangeLocation(vGPR2Subrange[virtualRegister], index, isWrite == false, isWrite); -#ifndef PUBLIC_RELEASE - if (index < vGPR2Subrange[virtualRegister]->start.index) - assert_dbg(); - if (index+1 > vGPR2Subrange[virtualRegister]->end.index) - assert_dbg(); -#endif - } - // next instruction - index++; - } -} - -void PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR) -{ - if (_isRangeDefined(imlSegment, vGPR) == false) - { - imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_END; - imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END; - return; - } - imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_END; -} - -void PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment, sint32 vGPR) -{ - if (_isRangeDefined(imlSegment, vGPR) == false) - { - imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START; - imlSegment->raDistances.reg[vGPR].usageEnd = RA_INTER_RANGE_START; - } - else - { - imlSegment->raDistances.reg[vGPR].usageStart = RA_INTER_RANGE_START; - } - // propagate backwards - for (auto& it : imlSegment->list_prevSegments) - { - PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, it, vGPR); - } -} - -void _PPCRecRA_connectRanges(ppcImlGenContext_t* ppcImlGenContext, sint32 vGPR, PPCRecImlSegment_t** route, sint32 routeDepth) -{ -#ifndef PUBLIC_RELEASE - if (routeDepth < 2) - assert_dbg(); -#endif - // extend starting range to end of segment - PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[0], vGPR); - // extend all the connecting segments in both directions - for (sint32 i = 1; i < (routeDepth - 1); i++) - { - PPCRecRA_extendRangeToEndOfSegment(ppcImlGenContext, route[i], vGPR); - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[i], vGPR); - } - // extend the final segment towards the beginning - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, route[routeDepth-1], vGPR); -} - -void _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR, sint32 distanceLeft, PPCRecImlSegment_t** route, sint32 routeDepth) -{ - if (routeDepth >= 64) - { - forceLogDebug_printf("Recompiler RA route maximum depth exceeded for function 0x%08x\n", ppcImlGenContext->functionRef->ppcAddress); - return; - } - route[routeDepth] = currentSegment; - if (currentSegment->raDistances.reg[vGPR].usageStart == INT_MAX) - { - // measure distance to end of segment - distanceLeft -= currentSegment->imlListCount; - if (distanceLeft > 0) - { - if (currentSegment->nextSegmentBranchNotTaken) - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, distanceLeft, route, routeDepth + 1); - if (currentSegment->nextSegmentBranchTaken) - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, distanceLeft, route, routeDepth + 1); - } - return; - } - else - { - // measure distance to range - if (currentSegment->raDistances.reg[vGPR].usageStart == RA_INTER_RANGE_END) - { - if (distanceLeft < currentSegment->imlListCount) - return; // range too far away - } - else if (currentSegment->raDistances.reg[vGPR].usageStart != RA_INTER_RANGE_START && currentSegment->raDistances.reg[vGPR].usageStart > distanceLeft) - return; // out of range - // found close range -> connect ranges - _PPCRecRA_connectRanges(ppcImlGenContext, vGPR, route, routeDepth + 1); - } -} - -void PPCRecRA_checkAndTryExtendRange(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* currentSegment, sint32 vGPR) -{ -#ifndef PUBLIC_RELEASE - if (currentSegment->raDistances.reg[vGPR].usageEnd < 0) - assert_dbg(); -#endif - // count instructions to end of initial segment - if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_START) - assert_dbg(); - sint32 instructionsUntilEndOfSeg; - if (currentSegment->raDistances.reg[vGPR].usageEnd == RA_INTER_RANGE_END) - instructionsUntilEndOfSeg = 0; - else - instructionsUntilEndOfSeg = currentSegment->imlListCount - currentSegment->raDistances.reg[vGPR].usageEnd; - -#ifndef PUBLIC_RELEASE - if (instructionsUntilEndOfSeg < 0) - assert_dbg(); -#endif - sint32 remainingScanDist = 45 - instructionsUntilEndOfSeg; - if (remainingScanDist <= 0) - return; // can't reach end - - // also dont forget: Extending is easier if we allow 'non symetric' branches. E.g. register range one enters one branch - PPCRecImlSegment_t* route[64]; - route[0] = currentSegment; - if (currentSegment->nextSegmentBranchNotTaken) - { - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchNotTaken, vGPR, remainingScanDist, route, 1); - } - if (currentSegment->nextSegmentBranchTaken) - { - _PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, currentSegment->nextSegmentBranchTaken, vGPR, remainingScanDist, route, 1); - } -} - -void PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) -{ - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries - { - if(imlSegment->raDistances.reg[i].usageStart == INT_MAX) - continue; // not used - // check and extend if possible - PPCRecRA_checkAndTryExtendRange(ppcImlGenContext, imlSegment, i); - } -#ifndef PUBLIC_RELEASE - if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable) - assert_dbg(); - if ((imlSegment->nextSegmentBranchNotTaken != nullptr || imlSegment->nextSegmentBranchTaken != nullptr) && imlSegment->nextSegmentIsUncertain) - assert_dbg(); -#endif -} - -void PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext_t* ppcImlGenContext, PPCRecImlSegment_t* imlSegment) -{ - std::vector list_segments; - list_segments.reserve(1000); - sint32 index = 0; - imlSegment->raRangeExtendProcessed = true; - list_segments.push_back(imlSegment); - while (index < list_segments.size()) - { - PPCRecImlSegment_t* currentSegment = list_segments[index]; - PPCRecRA_mergeCloseRangesForSegmentV2(ppcImlGenContext, currentSegment); - // follow flow - if (currentSegment->nextSegmentBranchNotTaken && currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed == false) - { - currentSegment->nextSegmentBranchNotTaken->raRangeExtendProcessed = true; - list_segments.push_back(currentSegment->nextSegmentBranchNotTaken); - } - if (currentSegment->nextSegmentBranchTaken && currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed == false) - { - currentSegment->nextSegmentBranchTaken->raRangeExtendProcessed = true; - list_segments.push_back(currentSegment->nextSegmentBranchTaken); - } - index++; - } -} - -void PPCRecRA_mergeCloseRangesV2(ppcImlGenContext_t* ppcImlGenContext) -{ - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - if (imlSegment->list_prevSegments.empty()) - { - if (imlSegment->raRangeExtendProcessed) - assert_dbg(); // should not happen - PPCRecRA_followFlowAndExtendRanges(ppcImlGenContext, imlSegment); - } - } -} - -void PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext_t* ppcImlGenContext) -{ - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - auto localLoopDepth = imlSegment->loopDepth; - if( localLoopDepth <= 0 ) - continue; // not inside a loop - // look for loop exit - bool hasLoopExit = false; - if (imlSegment->nextSegmentBranchTaken && imlSegment->nextSegmentBranchTaken->loopDepth < localLoopDepth) - { - hasLoopExit = true; - } - if (imlSegment->nextSegmentBranchNotTaken && imlSegment->nextSegmentBranchNotTaken->loopDepth < localLoopDepth) - { - hasLoopExit = true; - } - if(hasLoopExit == false) - continue; - - // extend looping ranges into all exits (this allows the data flow analyzer to move stores out of the loop) - for (sint32 i = 0; i < PPC_REC_MAX_VIRTUAL_GPR; i++) // todo: Use dynamic maximum or list of used vGPRs so we can avoid parsing empty entries - { - if (imlSegment->raDistances.reg[i].usageEnd != RA_INTER_RANGE_END) - continue; // range not set or does not reach end of segment - if(imlSegment->nextSegmentBranchTaken) - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchTaken, i); - if(imlSegment->nextSegmentBranchNotTaken) - PPCRecRA_extendRangeToBeginningOfSegment(ppcImlGenContext, imlSegment->nextSegmentBranchNotTaken, i); - } - } -} - -void PPCRecRA_processFlowAndCalculateLivenessRangesV2(ppcImlGenContext_t* ppcImlGenContext) -{ - // merge close ranges - PPCRecRA_mergeCloseRangesV2(ppcImlGenContext); - // extra pass to move register stores out of loops - PPCRecRA_extendRangesOutOfLoopsV2(ppcImlGenContext); - // calculate liveness ranges - for (sint32 s = 0; s < ppcImlGenContext->segmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - PPCRecRA_createSegmentLivenessRanges(ppcImlGenContext, imlSegment); - } -} - -void PPCRecRA_analyzeSubrangeDataDependencyV2(raLivenessSubrange_t* subrange) -{ - bool isRead = false; - bool isWritten = false; - bool isOverwritten = false; - for (auto& location : subrange->list_locations) - { - if (location.isRead) - { - isRead = true; - } - if (location.isWrite) - { - if (isRead == false) - isOverwritten = true; - isWritten = true; - } - } - subrange->_noLoad = isOverwritten; - subrange->hasStore = isWritten; - - if (subrange->start.index == RA_INTER_RANGE_START) - subrange->_noLoad = true; -} - -void _analyzeRangeDataFlow(raLivenessSubrange_t* subrange); - -void PPCRecRA_analyzeRangeDataFlowV2(ppcImlGenContext_t* ppcImlGenContext) -{ - // this function is called after _assignRegisters(), which means that all ranges are already final and wont change anymore - // first do a per-subrange pass - for (auto& range : ppcImlGenContext->raInfo.list_ranges) - { - for (auto& subrange : range->list_subranges) - { - PPCRecRA_analyzeSubrangeDataDependencyV2(subrange); - } - } - // then do a second pass where we scan along subrange flow - for (auto& range : ppcImlGenContext->raInfo.list_ranges) - { - for (auto& subrange : range->list_subranges) // todo - traversing this backwards should be faster and yield better results due to the nature of the algorithm - { - _analyzeRangeDataFlow(subrange); - } - } -} \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp index fcbe64be..468af5b2 100644 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerIntermediate.cpp @@ -1,173 +1,26 @@ #include "PPCRecompiler.h" #include "PPCRecompilerIml.h" -PPCRecImlSegment_t* PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext_t* ppcImlGenContext, uint32 ppcOffset) -{ - for(sint32 s=0; ssegmentListCount; s++) - { - if( ppcImlGenContext->segmentList[s]->isJumpDestination && ppcImlGenContext->segmentList[s]->jumpDestinationPPCAddress == ppcOffset ) - { - return ppcImlGenContext->segmentList[s]; - } - } - debug_printf("PPCRecompiler_getSegmentByPPCJumpAddress(): Unable to find segment (ppcOffset 0x%08x)\n", ppcOffset); - return NULL; -} - -void PPCRecompilerIml_setLinkBranchNotTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst) -{ - // make sure segments aren't already linked - if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst) - return; - // add as next segment for source - if (imlSegmentSrc->nextSegmentBranchNotTaken != NULL) - assert_dbg(); - imlSegmentSrc->nextSegmentBranchNotTaken = imlSegmentDst; - // add as previous segment for destination - imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc); -} - -void PPCRecompilerIml_setLinkBranchTaken(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst) -{ - // make sure segments aren't already linked - if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst) - return; - // add as next segment for source - if (imlSegmentSrc->nextSegmentBranchTaken != NULL) - assert_dbg(); - imlSegmentSrc->nextSegmentBranchTaken = imlSegmentDst; - // add as previous segment for destination - imlSegmentDst->list_prevSegments.push_back(imlSegmentSrc); -} - -void PPCRecompilerIML_removeLink(PPCRecImlSegment_t* imlSegmentSrc, PPCRecImlSegment_t* imlSegmentDst) -{ - if (imlSegmentSrc->nextSegmentBranchNotTaken == imlSegmentDst) - { - imlSegmentSrc->nextSegmentBranchNotTaken = NULL; - } - else if (imlSegmentSrc->nextSegmentBranchTaken == imlSegmentDst) - { - imlSegmentSrc->nextSegmentBranchTaken = NULL; - } - else - assert_dbg(); - - bool matchFound = false; - for (sint32 i = 0; i < imlSegmentDst->list_prevSegments.size(); i++) - { - if (imlSegmentDst->list_prevSegments[i] == imlSegmentSrc) - { - imlSegmentDst->list_prevSegments.erase(imlSegmentDst->list_prevSegments.begin()+i); - matchFound = true; - break; - } - } - if (matchFound == false) - assert_dbg(); -} - -/* - * Replaces all links to segment orig with linkts to segment new - */ -void PPCRecompilerIML_relinkInputSegment(PPCRecImlSegment_t* imlSegmentOrig, PPCRecImlSegment_t* imlSegmentNew) -{ - while (imlSegmentOrig->list_prevSegments.size() != 0) - { - PPCRecImlSegment_t* prevSegment = imlSegmentOrig->list_prevSegments[0]; - if (prevSegment->nextSegmentBranchNotTaken == imlSegmentOrig) - { - PPCRecompilerIML_removeLink(prevSegment, imlSegmentOrig); - PPCRecompilerIml_setLinkBranchNotTaken(prevSegment, imlSegmentNew); - } - else if (prevSegment->nextSegmentBranchTaken == imlSegmentOrig) - { - PPCRecompilerIML_removeLink(prevSegment, imlSegmentOrig); - PPCRecompilerIml_setLinkBranchTaken(prevSegment, imlSegmentNew); - } - else - { - assert_dbg(); - } - } -} - -void PPCRecompilerIML_linkSegments(ppcImlGenContext_t* ppcImlGenContext) -{ - for(sint32 s=0; ssegmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - - bool isLastSegment = (s+1)>=ppcImlGenContext->segmentListCount; - PPCRecImlSegment_t* nextSegment = isLastSegment?NULL:ppcImlGenContext->segmentList[s+1]; - // handle empty segment - if( imlSegment->imlListCount == 0 ) - { - if (isLastSegment == false) - PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, ppcImlGenContext->segmentList[s+1]); // continue execution to next segment - else - imlSegment->nextSegmentIsUncertain = true; - continue; - } - // check last instruction of segment - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+(imlSegment->imlListCount-1); - if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP || imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) - { - // find destination segment by ppc jump address - PPCRecImlSegment_t* jumpDestSegment = PPCRecompiler_getSegmentByPPCJumpAddress(ppcImlGenContext, imlInstruction->op_conditionalJump.jumpmarkAddress); - if( jumpDestSegment ) - { - if (imlInstruction->op_conditionalJump.condition != PPCREC_JUMP_CONDITION_NONE) - PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, nextSegment); - PPCRecompilerIml_setLinkBranchTaken(imlSegment, jumpDestSegment); - } - else - { - imlSegment->nextSegmentIsUncertain = true; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO ) - { - // currently we assume that the next segment is unknown for all macros - imlSegment->nextSegmentIsUncertain = true; - } - else - { - // all other instruction types do not branch - //imlSegment->nextSegment[0] = nextSegment; - PPCRecompilerIml_setLinkBranchNotTaken(imlSegment, nextSegment); - //imlSegment->nextSegmentIsUncertain = true; - } - } -} - void PPCRecompilerIML_isolateEnterableSegments(ppcImlGenContext_t* ppcImlGenContext) { - sint32 initialSegmentCount = ppcImlGenContext->segmentListCount; - for (sint32 i = 0; i < ppcImlGenContext->segmentListCount; i++) + size_t initialSegmentCount = ppcImlGenContext->segmentList2.size(); + for (size_t i = 0; i < initialSegmentCount; i++) { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[i]; + IMLSegment* imlSegment = ppcImlGenContext->segmentList2[i]; if (imlSegment->list_prevSegments.empty() == false && imlSegment->isEnterable) { // spawn new segment at end - PPCRecompilerIml_insertSegments(ppcImlGenContext, ppcImlGenContext->segmentListCount, 1); - PPCRecImlSegment_t* entrySegment = ppcImlGenContext->segmentList[ppcImlGenContext->segmentListCount-1]; + PPCRecompilerIml_insertSegments(ppcImlGenContext, ppcImlGenContext->segmentList2.size(), 1); + IMLSegment* entrySegment = ppcImlGenContext->segmentList2[ppcImlGenContext->segmentList2.size()-1]; entrySegment->isEnterable = true; entrySegment->enterPPCAddress = imlSegment->enterPPCAddress; // create jump instruction PPCRecompiler_pushBackIMLInstructions(entrySegment, 0, 1); - PPCRecompilerImlGen_generateNewInstruction_jumpSegment(ppcImlGenContext, entrySegment->imlList + 0); - PPCRecompilerIml_setLinkBranchTaken(entrySegment, imlSegment); + entrySegment->imlList.data()[0].make_jump(); + IMLSegment_SetLinkBranchTaken(entrySegment, imlSegment); // remove enterable flag from original segment imlSegment->isEnterable = false; imlSegment->enterPPCAddress = 0; } } -} - -PPCRecImlInstruction_t* PPCRecompilerIML_getLastInstruction(PPCRecImlSegment_t* imlSegment) -{ - if (imlSegment->imlListCount == 0) - return nullptr; - return imlSegment->imlList + (imlSegment->imlListCount - 1); -} +} \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp deleted file mode 100644 index 8e8a63d9..00000000 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64.cpp +++ /dev/null @@ -1,2688 +0,0 @@ -#include "Cafe/HW/Espresso/PPCState.h" -#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterInternal.h" -#include "Cafe/HW/Espresso/Interpreter/PPCInterpreterHelper.h" -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" -#include "Cafe/OS/libs/coreinit/coreinit_Time.h" - -#include "util/MemMapper/MemMapper.h" - -sint32 x64Gen_registerMap[12] = // virtual GPR to x64 register mapping -{ - REG_RAX, REG_RDX, REG_RBX, REG_RBP, REG_RSI, REG_RDI, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_RCX -}; - -/* -* Remember current instruction output offset for reloc -* The instruction generated after this method has been called will be adjusted -*/ -void PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext_t* x64GenContext, uint8 type, void* extraInfo = nullptr) -{ - if( x64GenContext->relocateOffsetTableCount >= x64GenContext->relocateOffsetTableSize ) - { - x64GenContext->relocateOffsetTableSize = std::max(4, x64GenContext->relocateOffsetTableSize*2); - x64GenContext->relocateOffsetTable = (x64RelocEntry_t*)realloc(x64GenContext->relocateOffsetTable, sizeof(x64RelocEntry_t)*x64GenContext->relocateOffsetTableSize); - } - x64GenContext->relocateOffsetTable[x64GenContext->relocateOffsetTableCount].offset = x64GenContext->codeBufferIndex; - x64GenContext->relocateOffsetTable[x64GenContext->relocateOffsetTableCount].type = type; - x64GenContext->relocateOffsetTable[x64GenContext->relocateOffsetTableCount].extraInfo = extraInfo; - x64GenContext->relocateOffsetTableCount++; -} - -/* -* Overwrites the currently cached (in x64 cf) cr* register -* Should be called before each x64 instruction which overwrites the current status flags (with mappedCRRegister set to PPCREC_CR_TEMPORARY unless explicitly set by PPC instruction) -*/ -void PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, sint32 mappedCRRegister, sint32 crState) -{ - x64GenContext->activeCRRegister = mappedCRRegister; - x64GenContext->activeCRState = crState; -} - -/* -* Reset cached cr* register without storing it first -*/ -void PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext) -{ - x64GenContext->activeCRRegister = PPC_REC_INVALID_REGISTER; -} - -void PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext_t* x64GenContext, sint32 jumpInstructionOffset, sint32 destinationOffset) -{ - uint8* instructionData = x64GenContext->codeBuffer + jumpInstructionOffset; - if (instructionData[0] == 0x0F && (instructionData[1] >= 0x80 && instructionData[1] <= 0x8F)) - { - // far conditional jump - *(uint32*)(instructionData + 2) = (destinationOffset - (jumpInstructionOffset + 6)); - } - else if (instructionData[0] >= 0x70 && instructionData[0] <= 0x7F) - { - // short conditional jump - sint32 distance = (sint32)((destinationOffset - (jumpInstructionOffset + 2))); - cemu_assert_debug(distance >= -128 && distance <= 127); - *(uint8*)(instructionData + 1) = (uint8)distance; - } - else if (instructionData[0] == 0xE9) - { - *(uint32*)(instructionData + 1) = (destinationOffset - (jumpInstructionOffset + 5)); - } - else if (instructionData[0] == 0xEB) - { - sint32 distance = (sint32)((destinationOffset - (jumpInstructionOffset + 2))); - cemu_assert_debug(distance >= -128 && distance <= 127); - *(uint8*)(instructionData + 1) = (uint8)distance; - } - else - { - assert_dbg(); - } -} - -void PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - sint32 crRegister = imlInstruction->crRegister; - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // check for sign instead of _BELOW (CF) which is not set by TEST - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - // todo: Set CR SO if XER SO bit is set - PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, crRegister, PPCREC_CR_STATE_TYPE_LOGICAL); -} - -void* ATTR_MS_ABI PPCRecompiler_virtualHLE(PPCInterpreter_t* hCPU, uint32 hleFuncId) -{ - void* prevRSPTemp = hCPU->rspTemp; - if( hleFuncId == 0xFFD0 ) - { - hCPU->remainingCycles -= 500; // let subtract about 500 cycles for each HLE call - hCPU->gpr[3] = 0; - PPCInterpreter_nextInstruction(hCPU); - return ppcInterpreterCurrentInstance; - } - else - { - auto hleCall = PPCInterpreter_getHLECall(hleFuncId); - cemu_assert(hleCall != nullptr); - hleCall(hCPU); - } - hCPU->rspTemp = prevRSPTemp; - return ppcInterpreterCurrentInstance; -} - -void ATTR_MS_ABI PPCRecompiler_getTBL(PPCInterpreter_t* hCPU, uint32 gprIndex) -{ - uint64 coreTime = coreinit::coreinit_getTimerTick(); - hCPU->gpr[gprIndex] = (uint32)(coreTime&0xFFFFFFFF); -} - -void ATTR_MS_ABI PPCRecompiler_getTBU(PPCInterpreter_t* hCPU, uint32 gprIndex) -{ - uint64 coreTime = coreinit::coreinit_getTimerTick(); - hCPU->gpr[gprIndex] = (uint32)((coreTime>>32)&0xFFFFFFFF); -} - -bool PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( imlInstruction->operation == PPCREC_IML_MACRO_BLR || imlInstruction->operation == PPCREC_IML_MACRO_BLRL ) - { - uint32 currentInstructionAddress = imlInstruction->op_macro.param; - // MOV EDX, [SPR_LR] - x64Emit_mov_reg64_mem32(x64GenContext, REG_RDX, REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); - // if BLRL, then update SPR LR - if (imlInstruction->operation == PPCREC_IML_MACRO_BLRL) - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.LR), currentInstructionAddress + 4); - // JMP [offset+RDX*(8/4)+R15] - x64Gen_writeU8(x64GenContext, 0x41); - x64Gen_writeU8(x64GenContext, 0xFF); - x64Gen_writeU8(x64GenContext, 0xA4); - x64Gen_writeU8(x64GenContext, 0x57); - x64Gen_writeU32(x64GenContext, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); - return true; - } - else if( imlInstruction->operation == PPCREC_IML_MACRO_BCTR || imlInstruction->operation == PPCREC_IML_MACRO_BCTRL ) - { - uint32 currentInstructionAddress = imlInstruction->op_macro.param; - // MOV EDX, [SPR_CTR] - x64Emit_mov_reg64_mem32(x64GenContext, REG_RDX, REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); - // if BCTRL, then update SPR LR - if (imlInstruction->operation == PPCREC_IML_MACRO_BCTRL) - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.LR), currentInstructionAddress + 4); - // JMP [offset+RDX*(8/4)+R15] - x64Gen_writeU8(x64GenContext, 0x41); - x64Gen_writeU8(x64GenContext, 0xFF); - x64Gen_writeU8(x64GenContext, 0xA4); - x64Gen_writeU8(x64GenContext, 0x57); - x64Gen_writeU32(x64GenContext, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); - return true; - } - else if( imlInstruction->operation == PPCREC_IML_MACRO_BL ) - { - // MOV DWORD [SPR_LinkRegister], newLR - uint32 newLR = imlInstruction->op_macro.param + 4; - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.LR), newLR); - // remember new instruction pointer in RDX - uint32 newIP = imlInstruction->op_macro.param2; - x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RDX, newIP); - // since RDX is constant we can use JMP [R15+const_offset] if jumpTableOffset+RDX*2 does not exceed the 2GB boundary - uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL; - if (lookupOffset >= 0x80000000ULL) - { - // JMP [offset+RDX*(8/4)+R15] - x64Gen_writeU8(x64GenContext, 0x41); - x64Gen_writeU8(x64GenContext, 0xFF); - x64Gen_writeU8(x64GenContext, 0xA4); - x64Gen_writeU8(x64GenContext, 0x57); - x64Gen_writeU32(x64GenContext, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); - } - else - { - x64Gen_writeU8(x64GenContext, 0x41); - x64Gen_writeU8(x64GenContext, 0xFF); - x64Gen_writeU8(x64GenContext, 0xA7); - x64Gen_writeU32(x64GenContext, (uint32)lookupOffset); - } - return true; - } - else if( imlInstruction->operation == PPCREC_IML_MACRO_B_FAR ) - { - // remember new instruction pointer in RDX - uint32 newIP = imlInstruction->op_macro.param2; - x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RDX, newIP); - // Since RDX is constant we can use JMP [R15+const_offset] if jumpTableOffset+RDX*2 does not exceed the 2GB boundary - uint64 lookupOffset = (uint64)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL; - if (lookupOffset >= 0x80000000ULL) - { - // JMP [offset+RDX*(8/4)+R15] - x64Gen_writeU8(x64GenContext, 0x41); - x64Gen_writeU8(x64GenContext, 0xFF); - x64Gen_writeU8(x64GenContext, 0xA4); - x64Gen_writeU8(x64GenContext, 0x57); - x64Gen_writeU32(x64GenContext, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); - } - else - { - x64Gen_writeU8(x64GenContext, 0x41); - x64Gen_writeU8(x64GenContext, 0xFF); - x64Gen_writeU8(x64GenContext, 0xA7); - x64Gen_writeU32(x64GenContext, (uint32)lookupOffset); - } - return true; - } - else if( imlInstruction->operation == PPCREC_IML_MACRO_LEAVE ) - { - uint32 currentInstructionAddress = imlInstruction->op_macro.param; - // remember PC value in REG_EDX - x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RDX, currentInstructionAddress); - - uint32 newIP = 0; // special value for recompiler exit - uint64 lookupOffset = (uint64)&(((PPCRecompilerInstanceData_t*)NULL)->ppcRecompilerDirectJumpTable) + (uint64)newIP * 2ULL; - // JMP [R15+offset] - x64Gen_writeU8(x64GenContext, 0x41); - x64Gen_writeU8(x64GenContext, 0xFF); - x64Gen_writeU8(x64GenContext, 0xA7); - x64Gen_writeU32(x64GenContext, (uint32)lookupOffset); - return true; - } - else if( imlInstruction->operation == PPCREC_IML_MACRO_DEBUGBREAK ) - { - x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, imlInstruction->op_macro.param2); - x64Gen_int3(x64GenContext); - return true; - } - else if( imlInstruction->operation == PPCREC_IML_MACRO_COUNT_CYCLES ) - { - uint32 cycleCount = imlInstruction->op_macro.param; - x64Gen_sub_mem32reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), cycleCount); - return true; - } - else if( imlInstruction->operation == PPCREC_IML_MACRO_HLE ) - { - uint32 ppcAddress = imlInstruction->op_macro.param; - uint32 funcId = imlInstruction->op_macro.param2; - //x64Gen_int3(x64GenContext); - // update instruction pointer - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, instructionPointer), ppcAddress); - //// save hCPU (RSP) - //x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)&ppcRecompilerX64_hCPUTemp); - //x64Emit_mov_mem64_reg64(x64GenContext, REG_RESV_TEMP, 0, REG_RSP); - // set parameters - x64Gen_mov_reg64_reg64(x64GenContext, REG_RCX, REG_RSP); - x64Gen_mov_reg64_imm64(x64GenContext, REG_RDX, funcId); - // restore stackpointer from executionContext/hCPU->rspTemp - x64Emit_mov_reg64_mem64(x64GenContext, REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp)); - //x64Emit_mov_reg64_mem64(x64GenContext, REG_RSP, REG_R14, 0); - //x64Gen_int3(x64GenContext); - // reserve space on stack for call parameters - x64Gen_sub_reg64_imm32(x64GenContext, REG_RSP, 8*11); // must be uneven number in order to retain stack 0x10 alignment - x64Gen_mov_reg64_imm64(x64GenContext, REG_RBP, 0); - // call HLE function - x64Gen_mov_reg64_imm64(x64GenContext, REG_RAX, (uint64)PPCRecompiler_virtualHLE); - x64Gen_call_reg64(x64GenContext, REG_RAX); - // restore RSP to hCPU (from RAX, result of PPCRecompiler_virtualHLE) - //x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)&ppcRecompilerX64_hCPUTemp); - //x64Emit_mov_reg64_mem64Reg64(x64GenContext, REG_RSP, REG_RESV_TEMP, 0); - x64Gen_mov_reg64_reg64(x64GenContext, REG_RSP, REG_RAX); - // MOV R15, ppcRecompilerInstanceData - x64Gen_mov_reg64_imm64(x64GenContext, REG_R15, (uint64)ppcRecompilerInstanceData); - // MOV R13, memory_base - x64Gen_mov_reg64_imm64(x64GenContext, REG_R13, (uint64)memory_base); - // check if cycles where decreased beyond zero, if yes -> leave recompiler - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_CARRY, 0); - //x64Gen_int3(x64GenContext); - //x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RDX, ppcAddress); - - x64Emit_mov_reg64_mem32(x64GenContext, REG_RDX, REG_RSP, offsetof(PPCInterpreter_t, instructionPointer)); - // set EAX to 0 (we assume that ppcRecompilerDirectJumpTable[0] will be a recompiler escape function) - x64Gen_xor_reg32_reg32(x64GenContext, REG_RAX, REG_RAX); - // ADD RAX, R15 (R15 -> Pointer to ppcRecompilerInstanceData - x64Gen_add_reg64_reg64(x64GenContext, REG_RAX, REG_R15); - //// JMP [recompilerCallTable+EAX/4*8] - //x64Gen_int3(x64GenContext); - x64Gen_jmp_memReg64(x64GenContext, REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); - // check if instruction pointer was changed - // assign new instruction pointer to EAX - x64Emit_mov_reg64_mem32(x64GenContext, REG_RAX, REG_RSP, offsetof(PPCInterpreter_t, instructionPointer)); - // remember instruction pointer in REG_EDX - x64Gen_mov_reg64_reg64(x64GenContext, REG_RDX, REG_RAX); - // EAX *= 2 - x64Gen_add_reg64_reg64(x64GenContext, REG_RAX, REG_RAX); - // ADD RAX, R15 (R15 -> Pointer to ppcRecompilerInstanceData - x64Gen_add_reg64_reg64(x64GenContext, REG_RAX, REG_R15); - // JMP [ppcRecompilerDirectJumpTable+RAX/4*8] - x64Gen_jmp_memReg64(x64GenContext, REG_RAX, (uint32)offsetof(PPCRecompilerInstanceData_t, ppcRecompilerDirectJumpTable)); - return true; - } - else if( imlInstruction->operation == PPCREC_IML_MACRO_MFTB ) - { - uint32 ppcAddress = imlInstruction->op_macro.param; - uint32 sprId = imlInstruction->op_macro.param2&0xFFFF; - uint32 gprIndex = (imlInstruction->op_macro.param2>>16)&0x1F; - // update instruction pointer - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, instructionPointer), ppcAddress); - // set parameters - x64Gen_mov_reg64_reg64(x64GenContext, REG_RCX, REG_RSP); - x64Gen_mov_reg64_imm64(x64GenContext, REG_RDX, gprIndex); - // restore stackpointer to original RSP - x64Emit_mov_reg64_mem64(x64GenContext, REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp)); - // push hCPU on stack - x64Gen_push_reg64(x64GenContext, REG_RCX); - // reserve space on stack for call parameters - x64Gen_sub_reg64_imm32(x64GenContext, REG_RSP, 8*11 + 8); - x64Gen_mov_reg64_imm64(x64GenContext, REG_RBP, 0); - // call HLE function - if( sprId == SPR_TBL ) - x64Gen_mov_reg64_imm64(x64GenContext, REG_RAX, (uint64)PPCRecompiler_getTBL); - else if( sprId == SPR_TBU ) - x64Gen_mov_reg64_imm64(x64GenContext, REG_RAX, (uint64)PPCRecompiler_getTBU); - else - assert_dbg(); - x64Gen_call_reg64(x64GenContext, REG_RAX); - // restore hCPU from stack - x64Gen_add_reg64_imm32(x64GenContext, REG_RSP, 8 * 11 + 8); - x64Gen_pop_reg64(x64GenContext, REG_RSP); - // MOV R15, ppcRecompilerInstanceData - x64Gen_mov_reg64_imm64(x64GenContext, REG_R15, (uint64)ppcRecompilerInstanceData); - // MOV R13, memory_base - x64Gen_mov_reg64_imm64(x64GenContext, REG_R13, (uint64)memory_base); - return true; - } - else - { - debug_printf("Unknown recompiler macro operation %d\n", imlInstruction->operation); - assert_dbg(); - } - return false; -} - -/* -* Load from memory -*/ -bool PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed) -{ - sint32 realRegisterData = tempToRealRegister(imlInstruction->op_storeLoad.registerData); - sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); - sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; - if( indexed ) - realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); - if( false )//imlInstruction->op_storeLoad.flags & PPCREC_IML_OP_FLAG_FASTMEMACCESS ) - { - // load u8/u16/u32 via direct memory access + optional sign extend - assert_dbg(); // todo - } - else - { - if( indexed && realRegisterMem == realRegisterMem2 ) - { - return false; - } - if( indexed && realRegisterData == realRegisterMem2 ) - { - // for indexed memory access realRegisterData must not be the same register as the second memory register, - // this can easily be fixed by swapping the logic of realRegisterMem and realRegisterMem2 - sint32 temp = realRegisterMem; - realRegisterMem = realRegisterMem2; - realRegisterMem2 = temp; - } - - bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend; - bool switchEndian = imlInstruction->op_storeLoad.flags2.swapEndian; - if( imlInstruction->op_storeLoad.copyWidth == 32 ) - { - //if( indexed ) - // PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if (indexed) - { - x64Gen_lea_reg64Low32_reg64Low32PlusReg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem, realRegisterMem2); - } - if( hasMOVBESupport && switchEndian ) - { - if (indexed) - { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); - //if (indexed && realRegisterMem != realRegisterData) - // x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else - { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - } - } - else - { - if (indexed) - { - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); - //if (realRegisterMem != realRegisterData) - // x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - if (switchEndian) - x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); - } - else - { - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if (switchEndian) - x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); - } - } - } - else if( imlInstruction->op_storeLoad.copyWidth == 16 ) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // todo: We can avoid this if MOVBE is available - if (indexed) - { - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - if( hasMOVBESupport && switchEndian ) - { - x64Gen_movBEZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if( indexed && realRegisterMem != realRegisterData ) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else - { - x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if( indexed && realRegisterMem != realRegisterData ) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - if( switchEndian ) - x64Gen_rol_reg64Low16_imm8(x64GenContext, realRegisterData, 8); - } - if( signExtend ) - x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData); - else - x64Gen_movZeroExtend_reg64Low32_reg64Low16(x64GenContext, realRegisterData, realRegisterData); - } - else if( imlInstruction->op_storeLoad.copyWidth == 8 ) - { - if( indexed ) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // todo: Optimize by using only MOVZX/MOVSX - if( indexed ) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - // todo: Use sign extend move from memory instead of separate sign-extend? - if( signExtend ) - x64Gen_movSignExtend_reg64Low32_mem8Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - else - x64Emit_movZX_reg32_mem8(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if( indexed && realRegisterMem != realRegisterData ) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_LOAD_LWARX_MARKER ) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( imlInstruction->op_storeLoad.immS32 != 0 ) - assert_dbg(); // not supported - if( indexed ) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), realRegisterMem); // remember EA for reservation - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if( indexed && realRegisterMem != realRegisterData ) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - if( switchEndian ) - x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); - x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), realRegisterData); // remember value for reservation - // LWARX instruction costs extra cycles (this speeds up busy loops) - x64Gen_sub_mem32reg64_imm32(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 20); - } - else if( imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_LSWI_3 ) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( switchEndian == false ) - assert_dbg(); - if( indexed ) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); // can be replaced with LEA temp, [memReg1+memReg2] (this way we can avoid the SUB instruction after the move) - if( hasMOVBESupport ) - { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if( indexed && realRegisterMem != realRegisterData ) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else - { - x64Emit_mov_reg32_mem32(x64GenContext, realRegisterData, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32); - if( indexed && realRegisterMem != realRegisterData ) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Gen_bswap_reg64Lower32bit(x64GenContext, realRegisterData); - } - x64Gen_and_reg64Low32_imm32(x64GenContext, realRegisterData, 0xFFFFFF00); - } - else - return false; - return true; - } - return false; -} - -/* -* Write to memory -*/ -bool PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed) -{ - sint32 realRegisterData = tempToRealRegister(imlInstruction->op_storeLoad.registerData); - sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); - sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; - if (indexed) - realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); - - if (false)//imlInstruction->op_storeLoad.flags & PPCREC_IML_OP_FLAG_FASTMEMACCESS ) - { - // load u8/u16/u32 via direct memory access + optional sign extend - assert_dbg(); // todo - } - else - { - if (indexed && realRegisterMem == realRegisterMem2) - { - return false; - } - if (indexed && realRegisterData == realRegisterMem2) - { - // for indexed memory access realRegisterData must not be the same register as the second memory register, - // this can easily be fixed by swapping the logic of realRegisterMem and realRegisterMem2 - sint32 temp = realRegisterMem; - realRegisterMem = realRegisterMem2; - realRegisterMem2 = temp; - } - - bool signExtend = imlInstruction->op_storeLoad.flags2.signExtend; - bool swapEndian = imlInstruction->op_storeLoad.flags2.swapEndian; - if (imlInstruction->op_storeLoad.copyWidth == 32) - { - if (indexed) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - uint32 valueRegister; - if ((swapEndian == false || hasMOVBESupport) && realRegisterMem != realRegisterData) - { - valueRegister = realRegisterData; - } - else - { - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - valueRegister = REG_RESV_TEMP; - } - if (hasMOVBESupport == false && swapEndian) - x64Gen_bswap_reg64Lower32bit(x64GenContext, valueRegister); - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - if (hasMOVBESupport && swapEndian) - x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); - else - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, valueRegister); - if (indexed) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else if (imlInstruction->op_storeLoad.copyWidth == 16) - { - if (indexed || swapEndian) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - if (swapEndian) - x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); - if (indexed) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - // todo: Optimize this, e.g. by using MOVBE - } - else if (imlInstruction->op_storeLoad.copyWidth == 8) - { - if (indexed) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if (indexed && realRegisterMem == realRegisterData) - { - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - realRegisterData = REG_RESV_TEMP; - } - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32, realRegisterData); - if (indexed) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STWCX_MARKER) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if (imlInstruction->op_storeLoad.immS32 != 0) - assert_dbg(); // todo - // reset cr0 LT, GT and EQ - sint32 crRegister = 0; - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ), 0); - // calculate effective address - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - if (swapEndian) - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - // realRegisterMem now holds EA - x64Gen_cmp_reg64Low32_mem32reg64(x64GenContext, realRegisterMem, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemAddr)); - sint32 jumpInstructionOffsetJumpToEnd = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); - // EA matches reservation - // backup EAX (since it's an explicit operand of CMPXCHG and will be overwritten) - x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX); - // backup REG_RESV_MEMBASE - x64Emit_mov_mem64_reg64(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2]), REG_RESV_MEMBASE); - // add mem register to REG_RESV_MEMBASE - x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem); - // load reserved value in EAX - x64Emit_mov_reg64_mem32(x64GenContext, REG_EAX, REG_RESV_HCPU, offsetof(PPCInterpreter_t, reservedMemValue)); - // bswap EAX - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_EAX); - - //x64Gen_lock_cmpxchg_mem32Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, realRegisterMem, 0, REG_RESV_TEMP); - x64Gen_lock_cmpxchg_mem32Reg64_reg64(x64GenContext, REG_RESV_MEMBASE, 0, REG_RESV_TEMP); - - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ)); - - // reset reservation - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemAddr), 0); - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, reservedMemValue), 0); - - // restore EAX - x64Emit_mov_reg64_mem32(x64GenContext, REG_EAX, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); - // restore REG_RESV_MEMBASE - x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_MEMBASE, REG_RESV_HCPU, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[2])); - - // copy XER SO to CR0 SO - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.XER), 31); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RESV_HCPU, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_SO)); - // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffsetJumpToEnd, x64GenContext->codeBufferIndex); - } - else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STSWI_2) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 16); // store upper 2 bytes .. - x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); // .. as big-endian - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - - x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); - if (indexed) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else if (imlInstruction->op_storeLoad.copyWidth == PPC_REC_STORE_STSWI_3) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, realRegisterData); - if (indexed) - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8); - x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 2, REG_RESV_TEMP); - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8); - x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 1, REG_RESV_TEMP); - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 8); - x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32 + 0, REG_RESV_TEMP); - - if (indexed) - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else - return false; - return true; - } - return false; -} - -/* - * Copy byte/word/dword from memory to memory - */ -void PPCRecompilerX64Gen_imlInstruction_mem2mem(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - sint32 realSrcMemReg = tempToRealRegister(imlInstruction->op_mem2mem.src.registerMem); - sint32 realSrcMemImm = imlInstruction->op_mem2mem.src.immS32; - sint32 realDstMemReg = tempToRealRegister(imlInstruction->op_mem2mem.dst.registerMem); - sint32 realDstMemImm = imlInstruction->op_mem2mem.dst.immS32; - // PPCRecompilerX64Gen_crConditionFlags_forget() is not needed here, since MOVs don't affect eflags - if (imlInstruction->op_mem2mem.copyWidth == 32) - { - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_R13, realSrcMemReg, realSrcMemImm); - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realDstMemReg, realDstMemImm, REG_RESV_TEMP); - } - else - { - assert_dbg(); - } -} - -bool PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - { - // registerResult = registerA - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); - if (imlInstruction->crMode == PPCREC_CR_MODE_LOGICAL) - { - // since MOV doesn't set eflags we need another test instruction - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); - // set cr bits - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - else - { - assert_dbg(); - } - } - else - { - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); - } - } - else if (imlInstruction->operation == PPCREC_IML_OP_ENDIAN_SWAP) - { - // registerResult = endianSwap32(registerA) - if (imlInstruction->op_r_r.registerA != imlInstruction->op_r_r.registerResult) - assert_dbg(); - x64Gen_bswap_reg64Lower32bit(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_ADD ) - { - // registerResult += registerA - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S8_TO_S32 ) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode == PPCREC_CR_MODE_ARITHMETIC ) - { - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); - // set cr bits - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - else - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation\n"); - assert_dbg(); - } - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_OR || imlInstruction->operation == PPCREC_IML_OP_AND || imlInstruction->operation == PPCREC_IML_OP_XOR ) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( imlInstruction->operation == PPCREC_IML_OP_OR ) - { - // registerResult |= registerA - x64Gen_or_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_AND ) - { - // registerResult &= registerA - x64Gen_and_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); - } - else - { - // registerResult ^= registerA - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); - } - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - // set cr bits - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_NOT ) - { - // copy register content if different registers - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) - { - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); - } - // NOT destination register - x64Gen_not_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult)); - // update cr bits - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - // NOT instruction does not update flags, so we have to generate an additional TEST instruction - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); - // set cr bits - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_CNTLZW ) - { - // count leading zeros - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // LZCNT instruction (part of SSE4, CPUID.80000001H:ECX.ABM[Bit 5]) - if( hasLZCNTSupport ) - { - x64Gen_lzcnt_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); - } - else - { - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerA), tempToRealRegister(imlInstruction->op_r_r.registerA)); - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_bsr_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); - x64Gen_neg_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult)); - x64Gen_add_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), 32-1); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); - x64Gen_mov_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), 32); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) - { - // registerA CMP registerB (arithmetic compare) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER ) - { - return false; // a NO-OP instruction - } - if( imlInstruction->crRegister >= 8 ) - { - return false; - } - // update state of cr register - if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) - PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC); - else - PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC); - // create compare instruction - x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); - // set cr bits - sint32 crRegister = imlInstruction->crRegister; - if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) - { - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_LESS, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - // todo: Also set summary overflow if xer bit is set - } - else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) - { - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - // todo: Also set summary overflow if xer bit is set - } - else - assert_dbg(); - } - else if( imlInstruction->operation == PPCREC_IML_OP_NEG ) - { - // copy register content if different registers - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) - { - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); - } - // NEG destination register - x64Gen_neg_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult)); - // update cr bits - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - // set cr bits - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY ) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // copy operand to result if different registers - if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) - { - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); - } - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // add carry bit - x64Gen_adc_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), 0); - // update xer carry - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - // set cr bits - sint32 crRegister = imlInstruction->crRegister; - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // check for sign instead of _BELOW (CF) which is not set by AND/OR - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - // todo: Use different version of PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction) - // todo: Also set summary overflow if xer bit is set - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_ME ) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // copy operand to result if different registers - if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) - { - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerA)); - } - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // add carry bit - x64Gen_adc_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), (uint32)-1); - // update xer carry - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - // set cr bits - sint32 crRegister = imlInstruction->crRegister; - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY ) - { - // registerResult = ~registerOperand1 + carry - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r.registerA); - // copy operand to result register - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); - // execute NOT on result - x64Gen_not_reg64Low32(x64GenContext, rRegResult); - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // add carry - x64Gen_adc_reg64Low32_imm32(x64GenContext, rRegResult, 0); - // update carry - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - // update cr if requested - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode == PPCREC_CR_MODE_LOGICAL ) - { - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - // set cr bits - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - else - { - assert_dbg(); - } - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN_S16_TO_S32 ) - { - // registerResult = (uint32)(sint32)(sint16)registerA - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), reg32ToReg16(tempToRealRegister(imlInstruction->op_r_r.registerA))); - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode == PPCREC_CR_MODE_ARITHMETIC ) - { - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r.registerResult), tempToRealRegister(imlInstruction->op_r_r.registerResult)); - // set cr bits - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - else - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation\n"); - assert_dbg(); - } - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_DCBZ ) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( imlInstruction->op_r_r.registerResult != imlInstruction->op_r_r.registerA ) - { - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, tempToRealRegister(imlInstruction->op_r_r.registerA)); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, tempToRealRegister(imlInstruction->op_r_r.registerResult)); - x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F); - x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE); - for(sint32 f=0; f<0x20; f+=8) - x64Gen_mov_mem64Reg64_imm32(x64GenContext, REG_RESV_TEMP, f, 0); - } - else - { - // calculate effective address - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, tempToRealRegister(imlInstruction->op_r_r.registerA)); - x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, ~0x1F); - x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE); - for(sint32 f=0; f<0x20; f+=8) - x64Gen_mov_mem64Reg64_imm32(x64GenContext, REG_RESV_TEMP, f, 0); - } - } - else - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation); - return false; - } - return true; -} - -bool PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN ) - { - // registerResult = immS32 - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - x64Gen_mov_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); - } - else if( imlInstruction->operation == PPCREC_IML_OP_ADD ) - { - // registerResult += immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - x64Gen_add_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); - } - else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) - { - // registerResult -= immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if (imlInstruction->crRegister == PPCREC_CR_REG_TEMP) - { - // do nothing -> SUB is for BDNZ instruction - } - else if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - // update cr register - assert_dbg(); - } - x64Gen_sub_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); - } - else if( imlInstruction->operation == PPCREC_IML_OP_AND ) - { - // registerResult &= immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_and_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - // set cr bits - sint32 crRegister = imlInstruction->crRegister; - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - // todo: Set CR SO if XER SO bit is set - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_OR ) - { - // registerResult |= immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - x64Gen_or_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); - } - else if( imlInstruction->operation == PPCREC_IML_OP_XOR ) - { - // registerResult ^= immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - x64Gen_xor_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint32)imlInstruction->op_r_immS32.immS32); - } - else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) - { - // registerResult <<<= immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - if( (imlInstruction->op_r_immS32.immS32&0x80) ) - assert_dbg(); // should not happen - x64Gen_rol_reg64Low32_imm8(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), (uint8)imlInstruction->op_r_immS32.immS32); - } - else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) - { - // registerResult CMP immS32 (arithmetic compare) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER ) - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): No-Op CMP found\n"); - return true; // a NO-OP instruction - } - if( imlInstruction->crRegister >= 8 ) - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported CMP with crRegister = 8\n"); - return false; - } - // update state of cr register - if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) - PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC); - else - PPCRecompilerX64Gen_crConditionFlags_set(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction->crRegister, PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC); - // create compare instruction - x64Gen_cmp_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_immS32.registerIndex), imlInstruction->op_r_immS32.immS32); - // set cr bits - uint32 crRegister = imlInstruction->crRegister; - if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_SIGNED ) - { - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_LESS, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_COMPARE_UNSIGNED ) - { - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_LT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_GT))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - if( (imlInstruction->crIgnoreMask&(1<<(crRegister*4+PPCREC_CR_BIT_EQ))) == 0 ) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_ESP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - } - else - assert_dbg(); - // todo: Also set summary overflow if xer bit is set? - } - else if( imlInstruction->operation == PPCREC_IML_OP_MFCR ) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - uint32 destRegister = tempToRealRegister(imlInstruction->op_r_immS32.registerIndex); - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); - for(sint32 f=0; f<32; f++) - { - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr)+f, 0); - x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, destRegister, destRegister); - } - } - else if (imlInstruction->operation == PPCREC_IML_OP_MTCRF) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - uint32 srcRegister = tempToRealRegister(imlInstruction->op_r_immS32.registerIndex); - uint32 crBitMask = ppc_MTCRFMaskToCRBitMask((uint32)imlInstruction->op_r_immS32.immS32); - for (sint32 f = 0; f < 32; f++) - { - if(((crBitMask >> f) & 1) == 0) - continue; - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f), 0); - x64Gen_test_reg64Low32_imm32(x64GenContext, srcRegister, 0x80000000>>f); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, REG_ESP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8) * (f)); - } - } - else - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation); - return false; - } - return true; -} - -bool PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - if (imlInstruction->operation == PPCREC_IML_OP_ASSIGN) - { - // registerResult = immS32 (conditional) - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - assert_dbg(); - } - - x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)imlInstruction->op_conditional_r_s32.immS32); - - uint8 crBitIndex = imlInstruction->op_conditional_r_s32.crRegisterIndex * 4 + imlInstruction->op_conditional_r_s32.crBitIndex; - if (imlInstruction->op_conditional_r_s32.crRegisterIndex == x64GenContext->activeCRRegister) - { - if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC) - { - if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); - return true; - } - } - else if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC) - { - if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); - return true; - } - } - else if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_LOGICAL) - { - if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_LT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_EQ) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); - return true; - } - else if (imlInstruction->op_conditional_r_s32.crBitIndex == CR_BIT_GT) - { - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, imlInstruction->op_conditional_r_s32.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); - return true; - } - } - } - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); - if (imlInstruction->op_conditional_r_s32.bitMustBeSet) - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_CARRY, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); - else - x64Gen_cmovcc_reg64Low32_reg64Low32(x64GenContext, X86_CONDITION_NOT_CARRY, tempToRealRegister(imlInstruction->op_conditional_r_s32.registerIndex), REG_RESV_TEMP); - return true; - } - return false; -} - -bool PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - if( imlInstruction->operation == PPCREC_IML_OP_ADD || imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY || imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY ) - { - // registerResult = registerOperand1 + registerOperand2 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); - - bool addCarry = imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY; - if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) ) - { - // be careful not to overwrite the operand before we use it - if( rRegResult == rRegOperand1 ) - { - if( addCarry ) - { - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - else - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - else - { - if( addCarry ) - { - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); - } - else - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); - } - } - else - { - // copy operand1 to destination register before doing addition - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); - // add operand2 - if( addCarry ) - { - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - else - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - // update carry - if( imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY || imlInstruction->operation == PPCREC_IML_OP_ADD_CARRY_UPDATE_CARRY ) - { - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - } - // set cr bits if enabled - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - sint32 crRegister = imlInstruction->crRegister; - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - return true; - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_SUB ) - { - // registerResult = registerOperand1 - registerOperand2 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); - if( rRegOperand1 == rRegOperand2 ) - { - // result = operand1 - operand1 -> 0 - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - } - else if( rRegResult == rRegOperand1 ) - { - // result = result - operand2 - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - else if ( rRegResult == rRegOperand2 ) - { - // result = operand1 - result - // NEG result - x64Gen_neg_reg64Low32(x64GenContext, rRegResult); - // ADD result, operand1 - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); - } - else - { - // copy operand1 to destination register before doing addition - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); - // sub operand2 - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - // set cr bits if enabled - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - sint32 crRegister = imlInstruction->crRegister; - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - return true; - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_SUB_CARRY_UPDATE_CARRY ) - { - // registerResult = registerOperand1 - registerOperand2 + carry - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); - if( rRegOperand1 == rRegOperand2 ) - { - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_cmc(x64GenContext); - // result = operand1 - operand1 -> 0 - x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - } - else if( rRegResult == rRegOperand1 ) - { - // copy inverted xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_cmc(x64GenContext); - // result = result - operand2 - x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - else if ( rRegResult == rRegOperand2 ) - { - // result = operand1 - result - // NOT result - x64Gen_not_reg64Low32(x64GenContext, rRegResult); - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // ADC result, operand1 - x64Gen_adc_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); - } - else - { - // copy operand1 to destination register before doing addition - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); - // copy xer_ca to eflags carry - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - x64Gen_cmc(x64GenContext); - // sub operand2 - x64Gen_sbb_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - // update carry flag (todo: is this actually correct in all cases?) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - // update cr0 if requested - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - assert_dbg(); - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) - { - // registerResult = registerOperand1 * registerOperand2 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); - if( (rRegResult == rRegOperand1) || (rRegResult == rRegOperand2) ) - { - // be careful not to overwrite the operand before we use it - if( rRegResult == rRegOperand1 ) - x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - else - x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); - } - else - { - // copy operand1 to destination register before doing multiplication - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand1); - // add operand2 - x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand2); - } - // set cr bits if enabled - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - // since IMUL instruction leaves relevant flags undefined, we have to use another TEST instruction to get the correct results - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_SUBFC ) - { - // registerResult = registerOperand2(rB) - registerOperand1(rA) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // updates carry flag - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - return false; - } - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperandA = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperandB = tempToRealRegister(imlInstruction->op_r_r_r.registerB); - // update carry flag - // carry flag is detected this way: - //if ((~a+b) < a) { - // return true; - //} - //if ((~a+b+1) < 1) { - // return true; - //} - // set carry to zero - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // ((~a+b)<~a) == true -> ca = 1 - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperandA); - x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandB); - x64Gen_not_reg64Low32(x64GenContext, rRegOperandA); - x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandA); - x64Gen_not_reg64Low32(x64GenContext, rRegOperandA); - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); - // reset carry flag + jump destination afterwards - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); - // OR ((~a+b+1)<1) == true -> ca = 1 - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperandA); - // todo: Optimize by reusing result in REG_RESV_TEMP from above and only add 1 - x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperandB); - x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); - // reset carry flag + jump destination afterwards - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); - // do subtraction - if( rRegOperandB == rRegOperandA ) - { - // result = operandA - operandA -> 0 - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - } - else if( rRegResult == rRegOperandB ) - { - // result = result - operandA - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperandA); - } - else if ( rRegResult == rRegOperandA ) - { - // result = operandB - result - // NEG result - x64Gen_neg_reg64Low32(x64GenContext, rRegResult); - // ADD result, operandB - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperandB); - } - else - { - // copy operand1 to destination register before doing addition - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperandB); - // sub operand2 - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperandA); - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_SLW || imlInstruction->operation == PPCREC_IML_OP_SRW ) - { - // registerResult = registerOperand1(rA) >> registerOperand2(rB) (up to 63 bits) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); - - if (hasBMI2Support && imlInstruction->operation == PPCREC_IML_OP_SRW) - { - // use BMI2 SHRX if available - x64Gen_shrx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); - } - else if (hasBMI2Support && imlInstruction->operation == PPCREC_IML_OP_SLW) - { - // use BMI2 SHLX if available - x64Gen_shlx_reg64_reg64_reg64(x64GenContext, rRegResult, rRegOperand1, rRegOperand2); - x64Gen_and_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); // trim result to 32bit - } - else - { - // lazy and slow way to do shift by register without relying on ECX/CL or BMI2 - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1); - for (sint32 b = 0; b < 6; b++) - { - x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1 << b)); - sint32 jumpInstructionOffset = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set - if (b == 5) - { - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); - } - else - { - if (imlInstruction->operation == PPCREC_IML_OP_SLW) - x64Gen_shl_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1 << b)); - else - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1 << b)); - } - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->codeBufferIndex); - } - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); - } - // set cr bits if enabled - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_ROTATE ) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); - // todo: Use BMI2 rotate if available - // check if CL/ECX/RCX is available - if( rRegResult != REG_RCX && rRegOperand1 != REG_RCX && rRegOperand2 != REG_RCX ) - { - // swap operand 2 with RCX - x64Gen_xchg_reg64_reg64(x64GenContext, REG_RCX, rRegOperand2); - // move operand 1 to temp register - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1); - // rotate - x64Gen_rol_reg64Low32_cl(x64GenContext, REG_RESV_TEMP); - // undo swap operand 2 with RCX - x64Gen_xchg_reg64_reg64(x64GenContext, REG_RCX, rRegOperand2); - // copy to result register - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); - } - else - { - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1); - // lazy and slow way to do shift by register without relying on ECX/CL - for(sint32 b=0; b<5; b++) - { - x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1<codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set - x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<codeBufferIndex); - } - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); - } - // set cr bits if enabled - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_SRAW ) - { - // registerResult = (sint32)registerOperand1(rA) >> (sint32)registerOperand2(rB) (up to 63 bits) - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); - // save cr - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - return false; - } - // todo: Use BMI instructions if available? - // MOV registerResult, registerOperand (if different) - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand1); - // reset carry - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // we use the same shift by register approach as in SLW/SRW, but we have to differentiate by signed/unsigned shift since it influences how the carry flag is set - x64Gen_test_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 0x80000000); - sint32 jumpInstructionJumpToSignedShift = x64GenContext->codeBufferIndex; - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); - //sint32 jumpInstructionJumpToEnd = x64GenContext->codeBufferIndex; - //x64Gen_jmpc(x64GenContext, X86_CONDITION_EQUAL, 0); - // unsigned shift (MSB of input register is not set) - for(sint32 b=0; b<6; b++) - { - x64Gen_test_reg64Low32_imm32(x64GenContext, rRegOperand2, (1<codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set - if( b == 5 ) - { - x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<codeBufferIndex); - } - sint32 jumpInstructionJumpToEnd = x64GenContext->codeBufferIndex; - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NONE, 0); - // signed shift - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToSignedShift, x64GenContext->codeBufferIndex); - for(sint32 b=0; b<6; b++) - { - // check if we need to shift by (1<codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if bit not set - // set ca if any non-zero bit is shifted out - x64Gen_test_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (1<<(1<codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 0); // jump if no bit is set - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToAfterCa, x64GenContext->codeBufferIndex); - // arithmetic shift - if( b == 5 ) - { - // copy sign bit into all bits - x64Gen_sar_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (1<codeBufferIndex); - } - // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionJumpToEnd, x64GenContext->codeBufferIndex); - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_RESV_TEMP); - // update CR if requested - // todo - } - else if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED || imlInstruction->operation == PPCREC_IML_OP_DIVIDE_UNSIGNED ) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); - - x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX); - x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), REG_EDX); - // mov operand 2 to temp register - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2); - // mov operand1 to EAX - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_EAX, rRegOperand1); - // sign or zero extend EAX to EDX:EAX based on division sign mode - if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED ) - x64Gen_cdq(x64GenContext); - else - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, REG_EDX, REG_EDX); - // make sure we avoid division by zero - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_EQUAL, 3); - // divide - if( imlInstruction->operation == PPCREC_IML_OP_DIVIDE_SIGNED ) - x64Gen_idiv_reg64Low32(x64GenContext, REG_RESV_TEMP); - else - x64Gen_div_reg64Low32(x64GenContext, REG_RESV_TEMP); - // result of division is now stored in EAX, move it to result register - if( rRegResult != REG_EAX ) - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_EAX); - // restore EAX / EDX - if( rRegResult != REG_RAX ) - x64Emit_mov_reg64_mem32(x64GenContext, REG_EAX, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); - if( rRegResult != REG_RDX ) - x64Emit_mov_reg64_mem32(x64GenContext, REG_EDX, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1])); - // set cr bits if requested - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_ARITHMETIC ) - { - assert_dbg(); - } - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED || imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_UNSIGNED ) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); - - x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0]), REG_EAX); - x64Emit_mov_mem32_reg32(x64GenContext, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1]), REG_EDX); - // mov operand 2 to temp register - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2); - // mov operand1 to EAX - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_EAX, rRegOperand1); - if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED ) - { - // zero extend EAX to EDX:EAX - x64Gen_xor_reg64Low32_reg64Low32(x64GenContext, REG_EDX, REG_EDX); - } - else - { - // sign extend EAX to EDX:EAX - x64Gen_cdq(x64GenContext); - } - // multiply - if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_HIGH_SIGNED ) - x64Gen_imul_reg64Low32(x64GenContext, REG_RESV_TEMP); - else - x64Gen_mul_reg64Low32(x64GenContext, REG_RESV_TEMP); - // result of multiplication is now stored in EDX:EAX, move it to result register - if( rRegResult != REG_EDX ) - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, REG_EDX); - // restore EAX / EDX - if( rRegResult != REG_RAX ) - x64Emit_mov_reg64_mem32(x64GenContext, REG_EAX, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[0])); - if( rRegResult != REG_RDX ) - x64Emit_mov_reg64_mem32(x64GenContext, REG_EDX, REG_RSP, (uint32)offsetof(PPCInterpreter_t, temporaryGPR[1])); - // set cr bits if requested - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegResult); - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_ORC ) - { - // registerResult = registerOperand1 | ~registerOperand2 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_r.registerResult); - sint32 rRegOperand1 = tempToRealRegister(imlInstruction->op_r_r_r.registerA); - sint32 rRegOperand2 = tempToRealRegister(imlInstruction->op_r_r_r.registerB); - - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand2); - x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); - if( rRegResult != rRegOperand1 ) - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, rRegResult, rRegOperand1); - x64Gen_or_reg64Low32_reg64Low32(x64GenContext, rRegResult, REG_RESV_TEMP); - - // set cr bits if enabled - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - return true; - } - } - else - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r_r(): Unsupported operation 0x%x\n", imlInstruction->operation); - return false; - } - return true; -} - -bool PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - if( imlInstruction->operation == PPCREC_IML_OP_ADD ) - { - // registerResult = registerOperand + immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_s32.registerResult); - sint32 rRegOperand = tempToRealRegister(imlInstruction->op_r_r_s32.registerA); - uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; - if( rRegResult != rRegOperand ) - { - // copy value to destination register before doing addition - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand); - } - x64Gen_add_reg64Low32_imm32(x64GenContext, rRegResult, (uint32)immU32); - } - else if( imlInstruction->operation == PPCREC_IML_OP_ADD_UPDATE_CARRY ) - { - // registerResult = registerOperand + immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_s32.registerResult); - sint32 rRegOperand = tempToRealRegister(imlInstruction->op_r_r_s32.registerA); - uint32 immU32 = (uint32)imlInstruction->op_r_r_s32.immS32; - if( rRegResult != rRegOperand ) - { - // copy value to destination register before doing addition - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand); - } - x64Gen_add_reg64Low32_imm32(x64GenContext, rRegResult, (uint32)immU32); - // update carry flag - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_CARRY, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - // set cr bits if enabled - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - if( imlInstruction->crMode != PPCREC_CR_MODE_LOGICAL ) - { - assert_dbg(); - } - sint32 crRegister = imlInstruction->crRegister; - //x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); - //x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - //x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_SUBFC ) - { - // registerResult = immS32 - registerOperand - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_s32.registerResult); - sint32 rRegOperand = tempToRealRegister(imlInstruction->op_r_r_s32.registerA); - sint32 immS32 = (sint32)imlInstruction->op_r_r_s32.immS32; - if( rRegResult != rRegOperand ) - { - // copy value to destination register before doing addition - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand); - } - // set carry to zero - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // ((~a+b)<~a) == true -> ca = 1 - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand); - x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); - x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)immS32); - x64Gen_not_reg64Low32(x64GenContext, rRegOperand); - x64Gen_cmp_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, rRegOperand); - x64Gen_not_reg64Low32(x64GenContext, rRegOperand); - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); - // reset carry flag + jump destination afterwards - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); - // OR ((~a+b+1)<1) == true -> ca = 1 - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, rRegOperand); - // todo: Optimize by reusing result in REG_RESV_TEMP from above and only add 1 - x64Gen_not_reg64Low32(x64GenContext, REG_RESV_TEMP); - x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (uint32)immS32); - x64Gen_add_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE_EQUAL, 0); - // reset carry flag + jump destination afterwards - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 1); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); - // do actual computation of value, note: a - b is equivalent to a + ~b + 1 - x64Gen_not_reg64Low32(x64GenContext, rRegResult); - x64Gen_add_reg64Low32_imm32(x64GenContext, rRegResult, (uint32)immS32 + 1); - } - else if( imlInstruction->operation == PPCREC_IML_OP_RLWIMI ) - { - // registerResult = ((registerResult<<op_r_r_s32.immS32; - uint32 mb = (vImm>>0)&0xFF; - uint32 me = (vImm>>8)&0xFF; - uint32 sh = (vImm>>16)&0xFF; - uint32 mask = ppc_mask(mb, me); - // save cr - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // copy rS to temporary register - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, tempToRealRegister(imlInstruction->op_r_r_s32.registerA)); - // rotate destination register - if( sh ) - x64Gen_rol_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (uint8)sh&0x1F); - // AND destination register with inverted mask - x64Gen_and_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), ~mask); - // AND temporary rS register with mask - x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, mask); - // OR result with temporary - x64Gen_or_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), REG_RESV_TEMP); - } - else if( imlInstruction->operation == PPCREC_IML_OP_MULTIPLY_SIGNED ) - { - // registerResult = registerOperand * immS32 - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 rRegResult = tempToRealRegister(imlInstruction->op_r_r_s32.registerResult); - sint32 rRegOperand = tempToRealRegister(imlInstruction->op_r_r_s32.registerA); - sint32 immS32 = (uint32)imlInstruction->op_r_r_s32.immS32; - x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (sint64)immS32); // todo: Optimize - if( rRegResult != rRegOperand ) - x64Gen_mov_reg64_reg64(x64GenContext, rRegResult, rRegOperand); - x64Gen_imul_reg64Low32_reg64Low32(x64GenContext, rRegResult, REG_RESV_TEMP); - } - else if( imlInstruction->operation == PPCREC_IML_OP_SRAW ) - { - // registerResult = registerOperand>>SH and set xer ca flag - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - uint32 sh = (uint32)imlInstruction->op_r_r_s32.immS32; - // MOV registerResult, registerOperand (if different) - if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult ) - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), tempToRealRegister(imlInstruction->op_r_r_s32.registerA)); - // todo: Detect if we don't need to update carry - // generic case - // TEST registerResult, (1<<(SH+1))-1 - uint32 caTestMask = 0; - if (sh >= 31) - caTestMask = 0x7FFFFFFF; - else - caTestMask = (1 << (sh)) - 1; - x64Gen_test_reg64Low32_imm32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), caTestMask); - // SETNE/NZ [ESP+XER_CA] - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_NOT_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, xer_ca)); - // SAR registerResult, SH - x64Gen_sar_reg64Low32_imm8(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), sh); - // JNS (if sign not set) - sint32 jumpInstructionOffset = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGN, 0); // todo: Can use 2-byte form of jump instruction here - // MOV BYTE [ESP+xer_ca], 0 - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, xer_ca), 0); - // jump destination - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset, x64GenContext->codeBufferIndex); - // CR update - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - sint32 crRegister = imlInstruction->crRegister; - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), tempToRealRegister(imlInstruction->op_r_r_s32.registerResult)); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGN, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_LT)); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_SIGNED_GREATER, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_GT)); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*(crRegister * 4 + PPCREC_CR_BIT_EQ)); - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT || - imlInstruction->operation == PPCREC_IML_OP_RIGHT_SHIFT ) - { - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // MOV registerResult, registerOperand (if different) - if( imlInstruction->op_r_r_s32.registerA != imlInstruction->op_r_r_s32.registerResult ) - x64Gen_mov_reg64_reg64(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), tempToRealRegister(imlInstruction->op_r_r_s32.registerA)); - // Shift - if( imlInstruction->operation == PPCREC_IML_OP_LEFT_SHIFT ) - x64Gen_shl_reg64Low32_imm8(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), imlInstruction->op_r_r_s32.immS32); - else - x64Gen_shr_reg64Low32_imm8(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), imlInstruction->op_r_r_s32.immS32); - // CR update - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - // since SHL/SHR only modifies the OF flag we need another TEST reg,reg here - x64Gen_test_reg64Low32_reg64Low32(x64GenContext, tempToRealRegister(imlInstruction->op_r_r_s32.registerResult), tempToRealRegister(imlInstruction->op_r_r_s32.registerResult)); - PPCRecompilerX64Gen_updateCRLogical(PPCRecFunction, ppcImlGenContext, x64GenContext, imlInstruction); - } - } - else - { - debug_printf("PPCRecompilerX64Gen_imlInstruction_r_r_s32(): Unsupported operation 0x%x\n", imlInstruction->operation); - return false; - } - return true; -} - -bool PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlSegment_t* imlSegment, PPCRecImlInstruction_t* imlInstruction) -{ - if( imlInstruction->op_conditionalJump.condition == PPCREC_JUMP_CONDITION_NONE ) - { - // jump always - if (imlInstruction->op_conditionalJump.jumpAccordingToSegment) - { - // jump to segment - if (imlSegment->nextSegmentBranchTaken == nullptr) - assert_dbg(); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_SEGMENT, imlSegment->nextSegmentBranchTaken); - x64Gen_jmp_imm32(x64GenContext, 0); - } - else - { - // deprecated (jump to jumpmark) - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - x64Gen_jmp_imm32(x64GenContext, 0); - } - } - else - { - if (imlInstruction->op_conditionalJump.jumpAccordingToSegment) - assert_dbg(); - // generate jump update marker - if( imlInstruction->op_conditionalJump.crRegisterIndex == PPCREC_CR_TEMPORARY || imlInstruction->op_conditionalJump.crRegisterIndex >= 8 ) - { - // temporary cr is used, which means we use the currently active eflags - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - sint32 condition = imlInstruction->op_conditionalJump.condition; - if( condition == PPCREC_JUMP_CONDITION_E ) - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - else if( condition == PPCREC_JUMP_CONDITION_NE ) - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_EQUAL, 0); - else - assert_dbg(); - } - else - { - uint8 crBitIndex = imlInstruction->op_conditionalJump.crRegisterIndex*4 + imlInstruction->op_conditionalJump.crBitIndex; - if (imlInstruction->op_conditionalJump.crRegisterIndex == x64GenContext->activeCRRegister ) - { - if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_UNSIGNED_ARITHMETIC) - { - if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_CARRY : X86_CONDITION_NOT_CARRY, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_UNSIGNED_ABOVE : X86_CONDITION_UNSIGNED_BELOW_EQUAL, 0); - return true; - } - } - else if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_SIGNED_ARITHMETIC) - { - if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_LESS : X86_CONDITION_SIGNED_GREATER_EQUAL, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); - return true; - } - } - else if (x64GenContext->activeCRState == PPCREC_CR_STATE_TYPE_LOGICAL) - { - if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_LT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGN : X86_CONDITION_NOT_SIGN, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_EQ) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_EQUAL : X86_CONDITION_NOT_EQUAL, 0); - return true; - } - else if (imlInstruction->op_conditionalJump.crBitIndex == CR_BIT_GT) - { - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - x64Gen_jmpc_far(x64GenContext, imlInstruction->op_conditionalJump.bitMustBeSet ? X86_CONDITION_SIGNED_GREATER : X86_CONDITION_SIGNED_LESS_EQUAL, 0); - return true; - } - } - } - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + crBitIndex * sizeof(uint8), 0); - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - if( imlInstruction->op_conditionalJump.bitMustBeSet ) - { - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_CARRY, 0); - } - else - { - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_CARRY, 0); - } - } - } - return true; -} - -bool PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - // some tests (all performed on a i7-4790K) - // 1) DEC [mem] + JNS has significantly worse performance than BT + JNC (probably due to additional memory write) - // 2) CMP [mem], 0 + JG has about equal (or slightly worse) performance than BT + JNC - - // BT - x64Gen_bt_mem8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, remainingCycles), 31); // check if negative - PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X64_RELOC_LINK_TO_PPC, (void*)(size_t)imlInstruction->op_conditionalJump.jumpmarkAddress); - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_NOT_CARRY, 0); - return true; -} - -/* -* PPC condition register operation -*/ -bool PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); // while these instruction do not directly affect eflags, they change the CR bit - if (imlInstruction->operation == PPCREC_IML_OP_CR_CLEAR) - { - // clear cr bit - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD, 0); - return true; - } - else if (imlInstruction->operation == PPCREC_IML_OP_CR_SET) - { - // set cr bit - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD, 1); - return true; - } - else if(imlInstruction->operation == PPCREC_IML_OP_CR_OR || imlInstruction->operation == PPCREC_IML_OP_CR_ORC || - imlInstruction->operation == PPCREC_IML_OP_CR_AND || imlInstruction->operation == PPCREC_IML_OP_CR_ANDC ) - { - x64Emit_movZX_reg64_mem8(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crB); - if (imlInstruction->operation == PPCREC_IML_OP_CR_ORC || imlInstruction->operation == PPCREC_IML_OP_CR_ANDC) - { - return false; // untested - x64Gen_int3(x64GenContext); - x64Gen_xor_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 1); // complement - } - if(imlInstruction->operation == PPCREC_IML_OP_CR_OR || imlInstruction->operation == PPCREC_IML_OP_CR_ORC) - x64Gen_or_reg64Low8_mem8Reg64(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crA); - else - x64Gen_and_reg64Low8_mem8Reg64(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crA); - - x64Gen_mov_mem8Reg64_reg64Low8(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, cr) + sizeof(uint8)*imlInstruction->op_cr.crD); - - return true; - } - else - { - assert_dbg(); - } - return false; -} - - -void PPCRecompilerX64Gen_imlInstruction_ppcEnter(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - imlInstruction->op_ppcEnter.x64Offset = x64GenContext->codeBufferIndex; - // generate code - if( ppcImlGenContext->hasFPUInstruction ) - { - // old FPU unavailable code - //PPCRecompilerX86_crConditionFlags_saveBeforeOverwrite(PPCRecFunction, ppcImlGenContext, x64GenContext); - //// skip if FP bit in MSR is set - //// #define MSR_FP (1<<13) - //x64Gen_bt_mem8(x64GenContext, REG_ESP, offsetof(PPCInterpreter_t, msr), 13); - //uint32 jmpCodeOffset = x64GenContext->codeBufferIndex; - //x64Gen_jmpc(x64GenContext, X86_CONDITION_CARRY, 0); - //x64Gen_mov_reg32_imm32(x64GenContext, REG_EAX, imlInstruction->op_ppcEnter.ppcAddress&0x7FFFFFFF); - //PPCRecompilerX64Gen_rememberRelocatableOffset(x64GenContext, X86_RELOC_MAKE_RELATIVE); - //x64Gen_jmp_imm32(x64GenContext, (uint32)PPCRecompiler_recompilerCallEscapeAndCallFPUUnavailable); - //// patch jump - //*(uint32*)(x64GenContext->codeBuffer+jmpCodeOffset+2) = x64GenContext->codeBufferIndex-jmpCodeOffset-6; - } -} - -void PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - uint32 name = imlInstruction->op_r_name.name; - if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) - { - x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0)); - } - else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) - { - sint32 sprIndex = (name - PPCREC_NAME_SPR0); - if (sprIndex == SPR_LR) - x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, spr.LR)); - else if (sprIndex == SPR_CTR) - x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, spr.CTR)); - else if (sprIndex == SPR_XER) - x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, spr.XER)); - else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) - { - sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, memOffset); - } - else - assert_dbg(); - //x64Emit_mov_reg64_mem32(x64GenContext, tempToRealRegister(imlInstruction->op_r_name.registerIndex), REG_RSP, offsetof(PPCInterpreter_t, spr)+sizeof(uint32)*(name-PPCREC_NAME_SPR0)); - } - else - assert_dbg(); -} - -void PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - uint32 name = imlInstruction->op_r_name.name; - if( name >= PPCREC_NAME_R0 && name < PPCREC_NAME_R0+32 ) - { - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, gpr)+sizeof(uint32)*(name-PPCREC_NAME_R0), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); - } - else if( name >= PPCREC_NAME_SPR0 && name < PPCREC_NAME_SPR0+999 ) - { - uint32 sprIndex = (name - PPCREC_NAME_SPR0); - if (sprIndex == SPR_LR) - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.LR), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); - else if (sprIndex == SPR_CTR) - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.CTR), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); - else if (sprIndex == SPR_XER) - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, spr.XER), tempToRealRegister(imlInstruction->op_r_name.registerIndex)); - else if (sprIndex >= SPR_UGQR0 && sprIndex <= SPR_UGQR7) - { - sint32 memOffset = offsetof(PPCInterpreter_t, spr.UGQR) + sizeof(PPCInterpreter_t::spr.UGQR[0]) * (sprIndex - SPR_UGQR0); - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, memOffset, tempToRealRegister(imlInstruction->op_r_name.registerIndex)); - } - else - assert_dbg(); - } - else - assert_dbg(); -} - -uint8* codeMemoryBlock = nullptr; -sint32 codeMemoryBlockIndex = 0; -sint32 codeMemoryBlockSize = 0; - -std::mutex mtx_allocExecutableMemory; - -uint8* PPCRecompilerX86_allocateExecutableMemory(sint32 size) -{ - std::lock_guard lck(mtx_allocExecutableMemory); - if( codeMemoryBlockIndex+size > codeMemoryBlockSize ) - { - // allocate new block - codeMemoryBlockSize = std::max(1024*1024*4, size+1024); // 4MB (or more if the function is larger than 4MB) - codeMemoryBlockIndex = 0; - codeMemoryBlock = (uint8*)MemMapper::AllocateMemory(nullptr, codeMemoryBlockSize, MemMapper::PAGE_PERMISSION::P_RWX); - } - uint8* codeMem = codeMemoryBlock + codeMemoryBlockIndex; - codeMemoryBlockIndex += size; - // pad to 4 byte alignment - while (codeMemoryBlockIndex & 3) - { - codeMemoryBlock[codeMemoryBlockIndex] = 0x90; - codeMemoryBlockIndex++; - } - return codeMem; -} - -void PPCRecompiler_dumpIML(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext); - -bool PPCRecompiler_generateX64Code(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext) -{ - x64GenContext_t x64GenContext = {0}; - x64GenContext.codeBufferSize = 1024; - x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); - x64GenContext.codeBufferIndex = 0; - x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER; - - // generate iml instruction code - bool codeGenerationFailed = false; - for(sint32 s=0; ssegmentListCount; s++) - { - PPCRecImlSegment_t* imlSegment = ppcImlGenContext->segmentList[s]; - ppcImlGenContext->segmentList[s]->x64Offset = x64GenContext.codeBufferIndex; - for(sint32 i=0; iimlListCount; i++) - { - PPCRecImlInstruction_t* imlInstruction = imlSegment->imlList+i; - - if( imlInstruction->type == PPCREC_IML_TYPE_R_NAME ) - { - PPCRecompilerX64Gen_imlInstruction_r_name(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_NAME_R ) - { - PPCRecompilerX64Gen_imlInstruction_name_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R ) - { - if( PPCRecompilerX64Gen_imlInstruction_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false ) - { - codeGenerationFailed = true; - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_R_S32) - { - if (PPCRecompilerX64Gen_imlInstruction_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) - { - codeGenerationFailed = true; - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_CONDITIONAL_R_S32) - { - if (PPCRecompilerX64Gen_imlInstruction_conditional_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false) - { - codeGenerationFailed = true; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R_S32 ) - { - if( PPCRecompilerX64Gen_imlInstruction_r_r_s32(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false ) - { - codeGenerationFailed = true; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_R_R_R ) - { - if( PPCRecompilerX64Gen_imlInstruction_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false ) - { - codeGenerationFailed = true; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP ) - { - if( PPCRecompilerX64Gen_imlInstruction_conditionalJump(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlSegment, imlInstruction) == false ) - { - codeGenerationFailed = true; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_CJUMP_CYCLE_CHECK ) - { - PPCRecompilerX64Gen_imlInstruction_conditionalJumpCycleCheck(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_MACRO ) - { - if( PPCRecompilerX64Gen_imlInstruction_macro(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false ) - { - codeGenerationFailed = true; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD ) - { - if( PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false ) - { - codeGenerationFailed = true; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_LOAD_INDEXED ) - { - if( PPCRecompilerX64Gen_imlInstruction_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false ) - { - codeGenerationFailed = true; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_STORE ) - { - if( PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false ) - { - codeGenerationFailed = true; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_STORE_INDEXED ) - { - if( PPCRecompilerX64Gen_imlInstruction_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false ) - { - codeGenerationFailed = true; - } - } - else if (imlInstruction->type == PPCREC_IML_TYPE_MEM2MEM) - { - PPCRecompilerX64Gen_imlInstruction_mem2mem(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_CR ) - { - if( PPCRecompilerX64Gen_imlInstruction_cr(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction) == false ) - { - codeGenerationFailed = true; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_JUMPMARK ) - { - // no op - } - else if( imlInstruction->type == PPCREC_IML_TYPE_NO_OP ) - { - // no op - } - else if( imlInstruction->type == PPCREC_IML_TYPE_PPC_ENTER ) - { - PPCRecompilerX64Gen_imlInstruction_ppcEnter(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_NAME ) - { - PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_NAME_R ) - { - PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD ) - { - if( PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false ) - { - codeGenerationFailed = true; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_LOAD_INDEXED ) - { - if( PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false ) - { - codeGenerationFailed = true; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE ) - { - if( PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, false) == false ) - { - codeGenerationFailed = true; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_STORE_INDEXED ) - { - if( PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction, true) == false ) - { - codeGenerationFailed = true; - } - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R ) - { - PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R ) - { - PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R_R_R_R ) - { - PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } - else if( imlInstruction->type == PPCREC_IML_TYPE_FPR_R ) - { - PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction, ppcImlGenContext, &x64GenContext, imlInstruction); - } - else - { - debug_printf("PPCRecompiler_generateX64Code(): Unsupported iml type 0x%x\n", imlInstruction->type); - assert_dbg(); - } - } - } - // handle failed code generation - if( codeGenerationFailed ) - { - free(x64GenContext.codeBuffer); - if (x64GenContext.relocateOffsetTable) - free(x64GenContext.relocateOffsetTable); - return false; - } - // allocate executable memory - uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.codeBufferIndex); - size_t baseAddress = (size_t)executableMemory; - // fix relocs - for(sint32 i=0; isegmentListCount; s++) - { - if (ppcImlGenContext->segmentList[s]->isJumpDestination && ppcImlGenContext->segmentList[s]->jumpDestinationPPCAddress == ppcOffset) - { - x64Offset = ppcImlGenContext->segmentList[s]->x64Offset; - break; - } - } - if (x64Offset == 0xFFFFFFFF) - { - debug_printf("Recompiler could not resolve jump (function at 0x%08x)\n", PPCRecFunction->ppcAddress); - // todo: Cleanup - return false; - } - } - else - { - PPCRecImlSegment_t* destSegment = (PPCRecImlSegment_t*)x64GenContext.relocateOffsetTable[i].extraInfo; - x64Offset = destSegment->x64Offset; - } - uint32 relocBase = x64GenContext.relocateOffsetTable[i].offset; - uint8* relocInstruction = x64GenContext.codeBuffer+relocBase; - if( relocInstruction[0] == 0x0F && (relocInstruction[1] >= 0x80 && relocInstruction[1] <= 0x8F) ) - { - // Jcc relativeImm32 - sint32 distanceNearJump = (sint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 2)); - if (distanceNearJump >= -128 && distanceNearJump < 127) // disabled - { - // convert to near Jcc - *(uint8*)(relocInstruction + 0) = (uint8)(relocInstruction[1]-0x80 + 0x70); - // patch offset - *(uint8*)(relocInstruction + 1) = (uint8)distanceNearJump; - // replace unused 4 bytes with NOP instruction - relocInstruction[2] = 0x0F; - relocInstruction[3] = 0x1F; - relocInstruction[4] = 0x40; - relocInstruction[5] = 0x00; - } - else - { - // patch offset - *(uint32*)(relocInstruction + 2) = (uint32)((baseAddress + x64Offset) - (baseAddress + relocBase + 6)); - } - } - else if( relocInstruction[0] == 0xE9 ) - { - // JMP relativeImm32 - *(uint32*)(relocInstruction+1) = (uint32)((baseAddress+x64Offset)-(baseAddress+relocBase+5)); - } - else - assert_dbg(); - } - else - { - assert_dbg(); - } - } - - // copy code to executable memory - memcpy(executableMemory, x64GenContext.codeBuffer, x64GenContext.codeBufferIndex); - free(x64GenContext.codeBuffer); - x64GenContext.codeBuffer = nullptr; - if (x64GenContext.relocateOffsetTable) - free(x64GenContext.relocateOffsetTable); - // set code - PPCRecFunction->x86Code = executableMemory; - PPCRecFunction->x86Size = x64GenContext.codeBufferIndex; - return true; -} - -void PPCRecompilerX64Gen_generateEnterRecompilerCode() -{ - x64GenContext_t x64GenContext = {0}; - x64GenContext.codeBufferSize = 1024; - x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); - x64GenContext.codeBufferIndex = 0; - x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER; - - // start of recompiler entry function - x64Gen_push_reg64(&x64GenContext, REG_RAX); - x64Gen_push_reg64(&x64GenContext, REG_RCX); - x64Gen_push_reg64(&x64GenContext, REG_RDX); - x64Gen_push_reg64(&x64GenContext, REG_RBX); - x64Gen_push_reg64(&x64GenContext, REG_RBP); - x64Gen_push_reg64(&x64GenContext, REG_RDI); - x64Gen_push_reg64(&x64GenContext, REG_RSI); - x64Gen_push_reg64(&x64GenContext, REG_R8); - x64Gen_push_reg64(&x64GenContext, REG_R9); - x64Gen_push_reg64(&x64GenContext, REG_R10); - x64Gen_push_reg64(&x64GenContext, REG_R11); - x64Gen_push_reg64(&x64GenContext, REG_R12); - x64Gen_push_reg64(&x64GenContext, REG_R13); - x64Gen_push_reg64(&x64GenContext, REG_R14); - x64Gen_push_reg64(&x64GenContext, REG_R15); - - // 000000007775EF04 | E8 00 00 00 00 call +0x00 - x64Gen_writeU8(&x64GenContext, 0xE8); - x64Gen_writeU8(&x64GenContext, 0x00); - x64Gen_writeU8(&x64GenContext, 0x00); - x64Gen_writeU8(&x64GenContext, 0x00); - x64Gen_writeU8(&x64GenContext, 0x00); - //000000007775EF09 | 48 83 04 24 05 add qword ptr ss:[rsp],5 - x64Gen_writeU8(&x64GenContext, 0x48); - x64Gen_writeU8(&x64GenContext, 0x83); - x64Gen_writeU8(&x64GenContext, 0x04); - x64Gen_writeU8(&x64GenContext, 0x24); - uint32 jmpPatchOffset = x64GenContext.codeBufferIndex; - x64Gen_writeU8(&x64GenContext, 0); // skip the distance until after the JMP - x64Emit_mov_mem64_reg64(&x64GenContext, REG_RDX, offsetof(PPCInterpreter_t, rspTemp), REG_RSP); - - - // MOV RSP, RDX (ppc interpreter instance) - x64Gen_mov_reg64_reg64(&x64GenContext, REG_RSP, REG_RDX); - // MOV R15, ppcRecompilerInstanceData - x64Gen_mov_reg64_imm64(&x64GenContext, REG_R15, (uint64)ppcRecompilerInstanceData); - // MOV R13, memory_base - x64Gen_mov_reg64_imm64(&x64GenContext, REG_R13, (uint64)memory_base); - - //JMP recFunc - x64Gen_jmp_reg64(&x64GenContext, REG_RCX); // call argument 1 - - x64GenContext.codeBuffer[jmpPatchOffset] = (x64GenContext.codeBufferIndex-(jmpPatchOffset-4)); - - //recompilerExit1: - x64Gen_pop_reg64(&x64GenContext, REG_R15); - x64Gen_pop_reg64(&x64GenContext, REG_R14); - x64Gen_pop_reg64(&x64GenContext, REG_R13); - x64Gen_pop_reg64(&x64GenContext, REG_R12); - x64Gen_pop_reg64(&x64GenContext, REG_R11); - x64Gen_pop_reg64(&x64GenContext, REG_R10); - x64Gen_pop_reg64(&x64GenContext, REG_R9); - x64Gen_pop_reg64(&x64GenContext, REG_R8); - x64Gen_pop_reg64(&x64GenContext, REG_RSI); - x64Gen_pop_reg64(&x64GenContext, REG_RDI); - x64Gen_pop_reg64(&x64GenContext, REG_RBP); - x64Gen_pop_reg64(&x64GenContext, REG_RBX); - x64Gen_pop_reg64(&x64GenContext, REG_RDX); - x64Gen_pop_reg64(&x64GenContext, REG_RCX); - x64Gen_pop_reg64(&x64GenContext, REG_RAX); - // RET - x64Gen_ret(&x64GenContext); - - uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.codeBufferIndex); - // copy code to executable memory - memcpy(executableMemory, x64GenContext.codeBuffer, x64GenContext.codeBufferIndex); - free(x64GenContext.codeBuffer); - PPCRecompiler_enterRecompilerCode = (void ATTR_MS_ABI (*)(uint64,uint64))executableMemory; -} - - -void* PPCRecompilerX64Gen_generateLeaveRecompilerCode() -{ - x64GenContext_t x64GenContext = {0}; - x64GenContext.codeBufferSize = 128; - x64GenContext.codeBuffer = (uint8*)malloc(x64GenContext.codeBufferSize); - x64GenContext.codeBufferIndex = 0; - x64GenContext.activeCRRegister = PPC_REC_INVALID_REGISTER; - - // update instruction pointer - // LR is in EDX - x64Emit_mov_mem32_reg32(&x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, instructionPointer), REG_EDX); - - // MOV RSP, [ppcRecompilerX64_rspTemp] - x64Emit_mov_reg64_mem64(&x64GenContext, REG_RSP, REG_RESV_HCPU, offsetof(PPCInterpreter_t, rspTemp)); - - // RET - x64Gen_ret(&x64GenContext); - - uint8* executableMemory = PPCRecompilerX86_allocateExecutableMemory(x64GenContext.codeBufferIndex); - // copy code to executable memory - memcpy(executableMemory, x64GenContext.codeBuffer, x64GenContext.codeBufferIndex); - free(x64GenContext.codeBuffer); - return executableMemory; -} - -void PPCRecompilerX64Gen_generateRecompilerInterfaceFunctions() -{ - PPCRecompilerX64Gen_generateEnterRecompilerCode(); - PPCRecompiler_leaveRecompilerCode_unvisited = (void ATTR_MS_ABI (*)())PPCRecompilerX64Gen_generateLeaveRecompilerCode(); - PPCRecompiler_leaveRecompilerCode_visited = (void ATTR_MS_ABI (*)())PPCRecompilerX64Gen_generateLeaveRecompilerCode(); - cemu_assert_debug(PPCRecompiler_leaveRecompilerCode_unvisited != PPCRecompiler_leaveRecompilerCode_visited); -} \ No newline at end of file diff --git a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp b/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp deleted file mode 100644 index fbb95b2f..00000000 --- a/src/Cafe/HW/Espresso/Recompiler/PPCRecompilerX64FPU.cpp +++ /dev/null @@ -1,1244 +0,0 @@ -#include "PPCRecompiler.h" -#include "PPCRecompilerIml.h" -#include "PPCRecompilerX64.h" -#include "asm/x64util.h" - -void PPCRecompilerX64Gen_imlInstruction_fpr_r_name(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - uint32 name = imlInstruction->op_r_name.name; - if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) - { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, tempToRealFPRRegister(imlInstruction->op_r_name.registerIndex), REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); - } - else if( name >= PPCREC_NAME_TEMPORARY_FPR0 || name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) - { - x64Gen_movupd_xmmReg_memReg128(x64GenContext, tempToRealFPRRegister(imlInstruction->op_r_name.registerIndex), REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); - } - else - { - cemu_assert_debug(false); - } -} - -void PPCRecompilerX64Gen_imlInstruction_fpr_name_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - uint32 name = imlInstruction->op_r_name.name; - if( name >= PPCREC_NAME_FPR0 && name < (PPCREC_NAME_FPR0+32) ) - { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, tempToRealFPRRegister(imlInstruction->op_r_name.registerIndex), REG_ESP, offsetof(PPCInterpreter_t, fpr)+sizeof(FPR_t)*(name-PPCREC_NAME_FPR0)); - } - else if( name >= PPCREC_NAME_TEMPORARY_FPR0 && name < (PPCREC_NAME_TEMPORARY_FPR0+8) ) - { - x64Gen_movupd_memReg128_xmmReg(x64GenContext, tempToRealFPRRegister(imlInstruction->op_r_name.registerIndex), REG_ESP, offsetof(PPCInterpreter_t, temporaryFPR)+sizeof(FPR_t)*(name-PPCREC_NAME_TEMPORARY_FPR0)); - } - else - { - cemu_assert_debug(false); - } -} - -void PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, sint32 registerXMM, bool isLoad, bool scalePS1, sint32 registerGQR) -{ - // load GQR - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, registerGQR); - // extract scale field and multiply by 16 to get array offset - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, (isLoad?16:0)+8-4); - x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, (0x3F<<4)); - // multiply xmm by scale - x64Gen_add_reg64_reg64(x64GenContext, REG_RESV_TEMP, REG_RESV_RECDATA); - if (isLoad) - { - if(scalePS1) - x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_ld_scale_ps0_ps1)); - else - x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_ld_scale_ps0_1)); - } - else - { - if (scalePS1) - x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_st_scale_ps0_ps1)); - else - x64Gen_mulpd_xmmReg_memReg128(x64GenContext, registerXMM, REG_RESV_TEMP, offsetof(PPCRecompilerInstanceData_t, _psq_st_scale_ps0_1)); - } -} - -// generate code for PSQ load for a particular type -// if scaleGQR is -1 then a scale of 1.0 is assumed (no scale) -void PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, sint32 registerGQR = -1) -{ - if (mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1) - { - if (indexed) - { - assert_dbg(); - } - // optimized code for ps float load - x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_R13, memReg, memImmS32); - x64Gen_bswap_reg64(x64GenContext, REG_RESV_TEMP); - x64Gen_rol_reg64_imm8(x64GenContext, REG_RESV_TEMP, 32); // swap upper and lower DWORD - x64Gen_movq_xmmReg_reg64(x64GenContext, registerXMM, REG_RESV_TEMP); - x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext, registerXMM, registerXMM); - // note: floats are not scaled - } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0) - { - if (indexed) - { - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memRegEx); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, memReg); - if (hasMOVBESupport) - { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, memImmS32); - } - else - { - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, memImmS32); - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - } - } - else - { - if (hasMOVBESupport) - { - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32); - } - else - { - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, memReg, memImmS32); - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - } - } - if (hasAVXSupport) - { - x64Gen_movd_xmmReg_reg64Low32(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP); - } - else - { - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR), REG_RESV_TEMP); - x64Gen_movddup_xmmReg_memReg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - } - x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP); - // load constant 1.0 into lower half and upper half of temp register - x64Gen_movddup_xmmReg_memReg64(x64GenContext, registerXMM, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble1_1)); - // overwrite lower half with single from memory - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, registerXMM, REG_RESV_FPR_TEMP); - // note: floats are not scaled - } - else - { - sint32 readSize; - bool isSigned = false; - if (mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1) - { - readSize = 16; - isSigned = true; - } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1) - { - readSize = 16; - isSigned = false; - } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1) - { - readSize = 8; - isSigned = true; - } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1) - { - readSize = 8; - isSigned = false; - } - else - assert_dbg(); - - bool loadPS1 = (mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1); - for (sint32 wordIndex = 0; wordIndex < 2; wordIndex++) - { - if (indexed) - { - assert_dbg(); - } - // read from memory - if (wordIndex == 1 && loadPS1 == false) - { - // store constant 1 - x64Gen_mov_mem32Reg64_imm32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * 1, 1); - } - else - { - uint32 memOffset = memImmS32 + wordIndex * (readSize / 8); - if (readSize == 16) - { - // half word - x64Gen_movZeroExtend_reg64Low16_mem16Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_R13, memReg, memOffset); - x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); // endian swap - if (isSigned) - x64Gen_movSignExtend_reg64Low32_reg64Low16(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); - else - x64Gen_movZeroExtend_reg64Low32_reg64Low16(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); - } - else if (readSize == 8) - { - // byte - x64Emit_mov_reg64b_mem8(x64GenContext, REG_RESV_TEMP, REG_R13, memReg, memOffset); - if (isSigned) - x64Gen_movSignExtend_reg64Low32_reg64Low8(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); - else - x64Gen_movZeroExtend_reg64Low32_reg64Low8(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); - } - // store - x64Emit_mov_mem32_reg32(x64GenContext, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR) + sizeof(uint32) * wordIndex, REG_RESV_TEMP); - } - } - // convert the two integers to doubles - x64Gen_cvtpi2pd_xmmReg_mem64Reg64(x64GenContext, registerXMM, REG_RESV_HCPU, offsetof(PPCInterpreter_t, temporaryGPR)); - // scale - if (registerGQR >= 0) - PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext, x64GenContext, registerXMM, true, loadPS1, registerGQR); - } -} - -void PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, sint32 registerGQR) -{ - bool loadPS1 = (mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1); - // load GQR - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, registerGQR); - // extract load type field - x64Gen_shr_reg64Low32_imm8(x64GenContext, REG_RESV_TEMP, 16); - x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); - // jump cases - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 4); // type 4 -> u8 - sint32 jumpOffset_caseU8 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 5); // type 5 -> u16 - sint32 jumpOffset_caseU16 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 6); // type 4 -> s8 - sint32 jumpOffset_caseS8 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // type 5 -> s16 - sint32 jumpOffset_caseS16 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - // default case -> float - - // generate cases - uint32 jumpOffset_endOfFloat; - uint32 jumpOffset_endOfU8; - uint32 jumpOffset_endOfU16; - uint32 jumpOffset_endOfS8; - - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfFloat = x64GenContext->codeBufferIndex; - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_U16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU8 = x64GenContext->codeBufferIndex; - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_S16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU16 = x64GenContext->codeBufferIndex; - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_U8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfS8 = x64GenContext->codeBufferIndex; - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, loadPS1 ? PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1 : PPCREC_FPR_LD_MODE_PSQ_S8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->codeBufferIndex); -} - -// load from memory -bool PPCRecompilerX64Gen_imlInstruction_fpr_load(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed) -{ - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 realRegisterXMM = tempToRealFPRRegister(imlInstruction->op_storeLoad.registerData); - sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); - sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; - if( indexed ) - realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); - uint8 mode = imlInstruction->op_storeLoad.mode; - - if( mode == PPCREC_FPR_LD_MODE_SINGLE_INTO_PS0_PS1 ) - { - // load byte swapped single into temporary FPR - if( indexed ) - { - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem); - if( hasMOVBESupport ) - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); - else - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32); - } - else - { - if( hasMOVBESupport ) - x64Gen_movBEZeroExtend_reg64_mem32Reg64PlusReg64(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); - else - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_RESV_MEMBASE, realRegisterMem, imlInstruction->op_storeLoad.immS32); - } - if( hasMOVBESupport == false ) - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - if( hasAVXSupport ) - { - x64Gen_movd_xmmReg_reg64Low32(x64GenContext, realRegisterXMM, REG_RESV_TEMP); - } - else - { - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR), REG_RESV_TEMP); - x64Gen_movddup_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - } - - if (imlInstruction->op_storeLoad.flags2.notExpanded) - { - // leave value as single - } - else - { - x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, realRegisterXMM); - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, realRegisterXMM, realRegisterXMM); - } - } - else if( mode == PPCREC_FPR_LD_MODE_DOUBLE_INTO_PS0 ) - { - if( hasAVXSupport ) - { - if( indexed ) - { - // calculate offset - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2); - // load value - x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0); - x64Gen_bswap_reg64(x64GenContext, REG_RESV_TEMP); - x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP); - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP); - } - else - { - x64Emit_mov_reg64_mem64(x64GenContext, REG_RESV_TEMP, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+0); - x64Gen_bswap_reg64(x64GenContext, REG_RESV_TEMP); - x64Gen_movq_xmmReg_reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_TEMP); - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, realRegisterXMM, REG_RESV_FPR_TEMP); - } - } - else - { - if( indexed ) - { - // calculate offset - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2); - // load double low part to temporaryFPR - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+0); - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP); - // calculate offset again - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, realRegisterMem2); - // load double high part to temporaryFPR - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_R13, REG_RESV_TEMP, imlInstruction->op_storeLoad.immS32+4); - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP); - // load double from temporaryFPR - x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - } - else - { - // load double low part to temporaryFPR - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+0); - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+4, REG_RESV_TEMP); - // load double high part to temporaryFPR - x64Emit_mov_reg32_mem32(x64GenContext, REG_RESV_TEMP, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+4); - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Emit_mov_mem32_reg64(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+0, REG_RESV_TEMP); - // load double from temporaryFPR - x64Gen_movlpd_xmmReg_memReg64(x64GenContext, realRegisterXMM, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - } - } - } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_FLOAT_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_S16_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_U16_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_S8_PS0 || - mode == PPCREC_FPR_LD_MODE_PSQ_U8_PS0_PS1 ) - { - PPCRecompilerX64Gen_imlInstr_psq_load(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed); - } - else if (mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0_PS1 || - mode == PPCREC_FPR_LD_MODE_PSQ_GENERIC_PS0) - { - PPCRecompilerX64Gen_imlInstr_psq_load_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, tempToRealRegister(imlInstruction->op_storeLoad.registerGQR)); - } - else - { - return false; - } - return true; -} - -void PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, sint32 registerGQR = -1) -{ - bool storePS1 = (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1); - bool isFloat = mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0 || mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1; - if (registerGQR >= 0) - { - // move to temporary xmm and update registerXMM - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM); - registerXMM = REG_RESV_FPR_TEMP; - // apply scale - if(isFloat == false) - PPCRecompilerX64Gen_imlInstr_gqr_generateScaleCode(ppcImlGenContext, x64GenContext, registerXMM, false, storePS1, registerGQR); - } - if (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0) - { - x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM); - if (hasAVXSupport) - { - x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); - } - else - { - x64Gen_movsd_memReg64_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - } - if (hasMOVBESupport == false) - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - if (indexed) - { - cemu_assert_debug(memReg != memRegEx); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx); - } - if (hasMOVBESupport) - x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, memReg, memImmS32, REG_RESV_TEMP); - else - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, memReg, memImmS32, REG_RESV_TEMP); - if (indexed) - { - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, memReg, memRegEx); - } - return; - } - else if (mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1) - { - if (indexed) - assert_dbg(); // todo - x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM); - x64Gen_movq_reg64_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); - x64Gen_rol_reg64_imm8(x64GenContext, REG_RESV_TEMP, 32); // swap upper and lower DWORD - x64Gen_bswap_reg64(x64GenContext, REG_RESV_TEMP); - x64Gen_mov_mem64Reg64PlusReg64_reg64(x64GenContext, REG_RESV_TEMP, REG_R13, memReg, memImmS32); - return; - } - // store as integer - // get limit from mode - sint32 clampMin, clampMax; - sint32 bitWriteSize; - if (mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 ) - { - clampMin = -128; - clampMax = 127; - bitWriteSize = 8; - } - else if (mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 ) - { - clampMin = 0; - clampMax = 255; - bitWriteSize = 8; - } - else if (mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 ) - { - clampMin = 0; - clampMax = 0xFFFF; - bitWriteSize = 16; - } - else if (mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 ) - { - clampMin = -32768; - clampMax = 32767; - bitWriteSize = 16; - } - else - { - cemu_assert(false); - } - for (sint32 valueIndex = 0; valueIndex < (storePS1?2:1); valueIndex++) - { - // todo - multiply by GQR scale - if (valueIndex == 0) - { - // convert low half (PS0) to integer - x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, REG_RESV_TEMP, registerXMM); - } - else - { - // load top half (PS1) into bottom half of temporary register - x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, registerXMM); - // convert low half to integer - x64Gen_cvttsd2si_reg64Low_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); - } - // max(i, -clampMin) - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMin); - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGNED_GREATER_EQUAL, 0); - x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMin); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); - // min(i, clampMax) - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMax); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_SIGNED_LESS_EQUAL, 0); - x64Gen_mov_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, clampMax); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); - // endian swap - if( bitWriteSize == 16) - x64Gen_rol_reg64Low16_imm8(x64GenContext, REG_RESV_TEMP, 8); - // write to memory - if (indexed) - assert_dbg(); // unsupported - sint32 memOffset = memImmS32 + valueIndex * (bitWriteSize/8); - if (bitWriteSize == 8) - x64Gen_movTruncate_mem8Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memOffset, REG_RESV_TEMP); - else if (bitWriteSize == 16) - x64Gen_movTruncate_mem16Reg64PlusReg64_reg64(x64GenContext, REG_RESV_MEMBASE, memReg, memOffset, REG_RESV_TEMP); - } -} - -void PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, uint8 mode, sint32 registerXMM, sint32 memReg, sint32 memRegEx, sint32 memImmS32, bool indexed, sint32 registerGQR) -{ - bool storePS1 = (mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1); - // load GQR - x64Gen_mov_reg64_reg64(x64GenContext, REG_RESV_TEMP, registerGQR); - // extract store type field - x64Gen_and_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); - // jump cases - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 4); // type 4 -> u8 - sint32 jumpOffset_caseU8 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 5); // type 5 -> u16 - sint32 jumpOffset_caseU16 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 6); // type 4 -> s8 - sint32 jumpOffset_caseS8 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - x64Gen_cmp_reg64Low32_imm32(x64GenContext, REG_RESV_TEMP, 7); // type 5 -> s16 - sint32 jumpOffset_caseS16 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_far(x64GenContext, X86_CONDITION_EQUAL, 0); - // default case -> float - - // generate cases - uint32 jumpOffset_endOfFloat; - uint32 jumpOffset_endOfU8; - uint32 jumpOffset_endOfU16; - uint32 jumpOffset_endOfS8; - - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfFloat = x64GenContext->codeBufferIndex; - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU16, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_U16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU8 = x64GenContext->codeBufferIndex; - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS16, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_S16_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfU16 = x64GenContext->codeBufferIndex; - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseU8, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_U8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - jumpOffset_endOfS8 = x64GenContext->codeBufferIndex; - x64Gen_jmp_imm32(x64GenContext, 0); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_caseS8, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, storePS1 ? PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 : PPCREC_FPR_ST_MODE_PSQ_S8_PS0, registerXMM, memReg, memRegEx, memImmS32, indexed, registerGQR); - - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfFloat, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU8, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfU16, x64GenContext->codeBufferIndex); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpOffset_endOfS8, x64GenContext->codeBufferIndex); -} - -// store to memory -bool PPCRecompilerX64Gen_imlInstruction_fpr_store(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction, bool indexed) -{ - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - sint32 realRegisterXMM = tempToRealFPRRegister(imlInstruction->op_storeLoad.registerData); - sint32 realRegisterMem = tempToRealRegister(imlInstruction->op_storeLoad.registerMem); - sint32 realRegisterMem2 = PPC_REC_INVALID_REGISTER; - if( indexed ) - realRegisterMem2 = tempToRealRegister(imlInstruction->op_storeLoad.registerMem2); - uint8 mode = imlInstruction->op_storeLoad.mode; - if( mode == PPCREC_FPR_ST_MODE_SINGLE_FROM_PS0 ) - { - if (imlInstruction->op_storeLoad.flags2.notExpanded) - { - // value is already in single format - if (hasAVXSupport) - { - x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM); - } - else - { - x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - } - } - else - { - x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, realRegisterXMM); - if (hasAVXSupport) - { - x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, REG_RESV_FPR_TEMP); - } - else - { - x64Gen_movsd_memReg64_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - } - } - if( hasMOVBESupport == false ) - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - if( indexed ) - { - if( realRegisterMem == realRegisterMem2 ) - assert_dbg(); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - if( hasMOVBESupport ) - x64Gen_movBETruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); - else - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); - if( indexed ) - { - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - } - else if( mode == PPCREC_FPR_ST_MODE_DOUBLE_FROM_PS0 ) - { - if( indexed ) - { - if( realRegisterMem == realRegisterMem2 ) - assert_dbg(); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - // store double low part - x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+0); - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+4, REG_RESV_TEMP); - // store double high part - x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)+4); - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32+0, REG_RESV_TEMP); - if( indexed ) - { - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - } - else if( mode == PPCREC_FPR_ST_MODE_UI32_FROM_PS0 ) - { - if( hasAVXSupport ) - { - x64Gen_movd_reg64Low32_xmmReg(x64GenContext, REG_RESV_TEMP, realRegisterXMM); - } - else - { - x64Gen_movsd_memReg64_xmmReg(x64GenContext, realRegisterXMM, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - x64Emit_mov_reg64_mem32(x64GenContext, REG_RESV_TEMP, REG_RSP, offsetof(PPCInterpreter_t, temporaryFPR)); - } - x64Gen_bswap_reg64Lower32bit(x64GenContext, REG_RESV_TEMP); - if( indexed ) - { - if( realRegisterMem == realRegisterMem2 ) - assert_dbg(); - x64Gen_add_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); - x64Gen_sub_reg64Low32_reg64Low32(x64GenContext, realRegisterMem, realRegisterMem2); - } - else - { - x64Gen_movTruncate_mem32Reg64PlusReg64_reg64(x64GenContext, REG_R13, realRegisterMem, imlInstruction->op_storeLoad.immS32, REG_RESV_TEMP); - } - } - else if(mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_FLOAT_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_S8_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_U8_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_S16_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0 || - mode == PPCREC_FPR_ST_MODE_PSQ_U16_PS0_PS1 ) - { - cemu_assert_debug(imlInstruction->op_storeLoad.flags2.notExpanded == false); - PPCRecompilerX64Gen_imlInstr_psq_store(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed); - } - else if (mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0_PS1 || - mode == PPCREC_FPR_ST_MODE_PSQ_GENERIC_PS0) - { - PPCRecompilerX64Gen_imlInstr_psq_store_generic(ppcImlGenContext, x64GenContext, mode, realRegisterXMM, realRegisterMem, realRegisterMem2, imlInstruction->op_storeLoad.immS32, indexed, tempToRealRegister(imlInstruction->op_storeLoad.registerGQR)); - } - else - { - if( indexed ) - assert_dbg(); // todo - debug_printf("PPCRecompilerX64Gen_imlInstruction_fpr_store(): Unsupported mode %d\n", mode); - return false; - } - return true; -} - -void _swapPS0PS1(x64GenContext_t* x64GenContext, sint32 xmmReg) -{ - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, xmmReg, xmmReg, 1); -} - -// FPR op FPR -void PPCRecompilerX64Gen_imlInstruction_fpr_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM_AND_TOP ) - { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM_AND_TOP ) - { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - // VPUNPCKHQDQ - if (imlInstruction->op_fpr_r_r.registerResult == imlInstruction->op_fpr_r_r.registerOperand) - { - // unpack top to bottom and top - x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - } - //else if ( hasAVXSupport ) - //{ - // // unpack top to bottom and top with non-destructive destination - // // update: On Ivy Bridge this causes weird stalls? - // x64Gen_avx_VUNPCKHPD_xmm_xmm_xmm(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand, imlInstruction->op_fpr_r_r.registerOperand); - //} - else - { - // move top to bottom - x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - // duplicate bottom - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerResult); - } - - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_BOTTOM ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_TO_TOP ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_BOTTOM_AND_TOP_SWAPPED ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand ) - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_TOP ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand, 2); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_COPY_TOP_TO_BOTTOM ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // use unpckhpd here? - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand, 3); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM ) - { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_PAIR ) - { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - x64Gen_mulpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_BOTTOM ) - { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - x64Gen_divsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - } - else if (imlInstruction->operation == PPCREC_IML_OP_FPR_DIVIDE_PAIR) - { - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - assert_dbg(); - } - x64Gen_divpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM ) - { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_PAIR ) - { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - x64Gen_addpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR ) - { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) - { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - } - else if( imlInstruction->operation == PPCREC_IML_OP_ASSIGN ) - { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FCTIWZ ) - { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - x64Gen_cvttsd2si_xmmReg_xmmReg(x64GenContext, REG_RESV_TEMP, imlInstruction->op_fpr_r_r.registerOperand); - x64Gen_mov_reg64Low32_reg64Low32(x64GenContext, REG_RESV_TEMP, REG_RESV_TEMP); - // move to FPR register - x64Gen_movq_xmmReg_reg64(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_TEMP); - } - else if(imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM || - imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_TOP || - imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPO_BOTTOM ) - { - if( imlInstruction->crRegister == PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - if (imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_BOTTOM) - x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - else if (imlInstruction->operation == PPCREC_IML_OP_FPR_FCMPU_TOP) - { - // temporarily switch top/bottom of both operands and compare - if (imlInstruction->op_fpr_r_r.registerResult == imlInstruction->op_fpr_r_r.registerOperand) - { - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - } - else - { - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerOperand); - x64Gen_ucomisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerResult); - _swapPS0PS1(x64GenContext, imlInstruction->op_fpr_r_r.registerOperand); - } - } - else - x64Gen_comisd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - // todo: handle FPSCR updates - // update cr - sint32 crRegister = imlInstruction->crRegister; - // if the parity bit is set (NaN) we need to manually set CR LT, GT and EQ to 0 (comisd/ucomisd sets the respective flags to 1 in case of NaN) - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_PARITY, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_SO)); // unordered - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_PARITY, 0); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT)); // same as X64_CONDITION_CARRY - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_UNSIGNED_ABOVE, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT)); - x64Gen_setcc_mem8(x64GenContext, X86_CONDITION_EQUAL, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ)); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_LT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_GT), 0); - x64Gen_mov_mem8Reg64_imm8(x64GenContext, REG_RSP, offsetof(PPCInterpreter_t, cr)+sizeof(uint8)*(crRegister*4+PPCREC_CR_BIT_EQ), 0); - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_FRES_TO_BOTTOM_AND_TOP ) - { - if( imlInstruction->crRegister != PPC_REC_INVALID_REGISTER ) - { - assert_dbg(); - } - // move register to XMM15 - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand); - - // call assembly routine to calculate accurate FRES result in XMM15 - x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_fres); - x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); - - // copy result to bottom and top half of result register - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_FPR_TEMP); - } - else if (imlInstruction->operation == PPCREC_IML_OP_FPR_BOTTOM_RECIPROCAL_SQRT) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // move register to XMM15 - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand); - - // call assembly routine to calculate accurate FRSQRTE result in XMM15 - x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_frsqrte); - x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); - - // copy result to bottom of result register - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_FPR_TEMP); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_PAIR ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // copy register - if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand ) - { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - } - // toggle sign bits - x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskPair)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_PAIR ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // copy register - if( imlInstruction->op_fpr_r_r.registerResult != imlInstruction->op_fpr_r_r.registerOperand ) - { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, imlInstruction->op_fpr_r_r.registerOperand); - } - // set sign bit to 0 - x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskPair)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR || imlInstruction->operation == PPCREC_IML_OP_FPR_FRSQRTE_PAIR) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // calculate bottom half of result - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand); - if(imlInstruction->operation == PPCREC_IML_OP_FPR_FRES_PAIR) - x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_fres); - else - x64Gen_mov_reg64_imm64(x64GenContext, REG_RESV_TEMP, (uint64)recompiler_frsqrte); - x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // calculate fres result in xmm15 - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_FPR_TEMP); - - // calculate top half of result - // todo - this top to bottom copy can be optimized? - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r.registerOperand, 3); - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP, 1); // swap top and bottom - - x64Gen_call_reg64(x64GenContext, REG_RESV_TEMP); // calculate fres result in xmm15 - - x64Gen_unpcklpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r.registerResult, REG_RESV_FPR_TEMP); // copy bottom to top - } - else - { - assert_dbg(); - } -} - -/* - * FPR = op (fprA, fprB) - */ -void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - - if (imlInstruction->operation == PPCREC_IML_OP_FPR_MULTIPLY_BOTTOM) - { - if (imlInstruction->crRegister != PPC_REC_INVALID_REGISTER) - { - assert_dbg(); - } - if (imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA) - { - x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); - } - else if (imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandB) - { - x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA); - } - else - { - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA); - x64Gen_mulsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); - } - } - else if (imlInstruction->operation == PPCREC_IML_OP_FPR_ADD_BOTTOM) - { - // registerResult(fp0) = registerOperandA(fp0) + registerOperandB(fp0) - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // todo: Use AVX 3-operand VADDSD if available - if (imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA) - { - x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); - } - else if (imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandB) - { - x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA); - } - else - { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA); - x64Gen_addsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); - } - } - else if (imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_PAIR) - { - // registerResult = registerOperandA - registerOperandB - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - if( imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA ) - { - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); - } - else if (hasAVXSupport) - { - x64Gen_avx_VSUBPD_xmm_xmm_xmm(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA, imlInstruction->op_fpr_r_r_r.registerOperandB); - } - else if( imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandB ) - { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.registerOperandA); - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.registerOperandB); - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, REG_RESV_FPR_TEMP); - } - else - { - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA); - x64Gen_subpd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); - } - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUB_BOTTOM ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - if( imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandA ) - { - x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); - } - else if( imlInstruction->op_fpr_r_r_r.registerResult == imlInstruction->op_fpr_r_r_r.registerOperandB ) - { - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.registerOperandA); - x64Gen_subsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r.registerOperandB); - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, REG_RESV_FPR_TEMP); - } - else - { - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandA); - x64Gen_subsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r.registerOperandB); - } - } - else - assert_dbg(); -} - -/* - * FPR = op (fprA, fprB, fprC) - */ -void PPCRecompilerX64Gen_imlInstruction_fpr_r_r_r_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM0 ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - - // todo: Investigate if there are other optimizations possible if the operand registers overlap - // generic case - // 1) move frA bottom to frTemp bottom and top - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandA); - // 2) add frB (both halfs, lower half is overwritten in the next step) - x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandB); - // 3) Interleave top of frTemp and frC - x64Gen_unpckhpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandC); - // todo: We can optimize the REG_RESV_FPR_TEMP -> resultReg copy operation away when the result register does not overlap with any of the operand registers - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, REG_RESV_FPR_TEMP); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SUM1 ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // todo: Investigate if there are other optimizations possible if the operand registers overlap - // 1) move frA bottom to frTemp bottom and top - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandA); - // 2) add frB (both halfs, lower half is overwritten in the next step) - x64Gen_addpd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandB); - // 3) Copy bottom from frC - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandC); - //// 4) Swap bottom and top half - //x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_FPR_TEMP, 1); - // todo: We can optimize the REG_RESV_FPR_TEMP -> resultReg copy operation away when the result register does not overlap with any of the operand registers - x64Gen_movaps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, REG_RESV_FPR_TEMP); - - //float s0 = (float)hCPU->fpr[frC].fp0; - //float s1 = (float)(hCPU->fpr[frA].fp0 + hCPU->fpr[frB].fp1); - //hCPU->fpr[frD].fp0 = s0; - //hCPU->fpr[frD].fp1 = s1; - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_BOTTOM ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); - sint32 jumpInstructionOffset1 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); - // select C - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC); - sint32 jumpInstructionOffset2 = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); - // select B - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1, x64GenContext->codeBufferIndex); - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB); - // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2, x64GenContext->codeBufferIndex); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_SELECT_PAIR ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // select bottom - x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerOperandA, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); - sint32 jumpInstructionOffset1_bottom = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); - // select C bottom - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC); - sint32 jumpInstructionOffset2_bottom = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); - // select B bottom - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_bottom, x64GenContext->codeBufferIndex); - x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB); - // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_bottom, x64GenContext->codeBufferIndex); - // select top - x64Gen_movhlps_xmmReg_xmmReg(x64GenContext, REG_RESV_FPR_TEMP, imlInstruction->op_fpr_r_r_r_r.registerOperandA); // copy top to bottom (todo: May cause stall?) - x64Gen_comisd_xmmReg_mem64Reg64(x64GenContext, REG_RESV_FPR_TEMP, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_constDouble0_0)); - sint32 jumpInstructionOffset1_top = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_UNSIGNED_BELOW, 0); - // select C top - //x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC); - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandC, 2); - sint32 jumpInstructionOffset2_top = x64GenContext->codeBufferIndex; - x64Gen_jmpc_near(x64GenContext, X86_CONDITION_NONE, 0); - // select B top - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset1_top, x64GenContext->codeBufferIndex); - //x64Gen_movsd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB); - x64Gen_shufpd_xmmReg_xmmReg_imm8(x64GenContext, imlInstruction->op_fpr_r_r_r_r.registerResult, imlInstruction->op_fpr_r_r_r_r.registerOperandB, 2); - // end - PPCRecompilerX64Gen_redirectRelativeJump(x64GenContext, jumpInstructionOffset2_top, x64GenContext->codeBufferIndex); - } - else - assert_dbg(); -} - -/* - * Single FPR operation - */ -void PPCRecompilerX64Gen_imlInstruction_fpr_r(PPCRecFunction_t* PPCRecFunction, ppcImlGenContext_t* ppcImlGenContext, x64GenContext_t* x64GenContext, PPCRecImlInstruction_t* imlInstruction) -{ - PPCRecompilerX64Gen_crConditionFlags_forget(PPCRecFunction, ppcImlGenContext, x64GenContext); - if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATE_BOTTOM ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // toggle sign bit - x64Gen_xorps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ABS_BOTTOM ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // mask out sign bit - x64Gen_andps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_andAbsMaskBottom)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_NEGATIVE_ABS_BOTTOM ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // set sign bit - x64Gen_orps_xmmReg_mem128Reg64(x64GenContext, imlInstruction->op_fpr_r.registerResult, REG_RESV_RECDATA, offsetof(PPCRecompilerInstanceData_t, _x64XMM_xorNegateMaskBottom)); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_BOTTOM ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // convert to 32bit single - x64Gen_cvtsd2ss_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); - // convert back to 64bit double - x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); - } - else if( imlInstruction->operation == PPCREC_IML_OP_FPR_ROUND_TO_SINGLE_PRECISION_PAIR ) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // convert to 32bit singles - x64Gen_cvtpd2ps_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); - // convert back to 64bit doubles - x64Gen_cvtps2pd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); - } - else if (imlInstruction->operation == PPCREC_IML_OP_FPR_EXPAND_BOTTOM32_TO_BOTTOM64_AND_TOP64) - { - cemu_assert_debug(imlInstruction->crRegister == PPC_REC_INVALID_REGISTER); - // convert bottom to 64bit double - x64Gen_cvtss2sd_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); - // copy to top half - x64Gen_movddup_xmmReg_xmmReg(x64GenContext, imlInstruction->op_fpr_r.registerResult, imlInstruction->op_fpr_r.registerResult); - } - else - { - cemu_assert_unimplemented(); - } -} diff --git a/src/Cafe/HW/Latte/Core/FetchShader.cpp b/src/Cafe/HW/Latte/Core/FetchShader.cpp index 1c0a72a0..6c9893f9 100644 --- a/src/Cafe/HW/Latte/Core/FetchShader.cpp +++ b/src/Cafe/HW/Latte/Core/FetchShader.cpp @@ -228,13 +228,13 @@ void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* pars else if (srcSelX == LatteClauseInstruction_VTX::SRC_SEL::SEL_Y) { // use alu divisor 1 - attribGroup->attrib[groupAttribIndex].aluDivisor = (sint32)contextRegister[mmVGT_INSTANCE_STEP_RATE_0 + 0]; + attribGroup->attrib[groupAttribIndex].aluDivisor = (sint32)contextRegister[Latte::REGADDR::VGT_INSTANCE_STEP_RATE_0]; cemu_assert_debug(attribGroup->attrib[groupAttribIndex].aluDivisor > 0); } else if (srcSelX == LatteClauseInstruction_VTX::SRC_SEL::SEL_Z) { // use alu divisor 2 - attribGroup->attrib[groupAttribIndex].aluDivisor = (sint32)contextRegister[mmVGT_INSTANCE_STEP_RATE_0 + 1]; + attribGroup->attrib[groupAttribIndex].aluDivisor = (sint32)contextRegister[Latte::REGADDR::VGT_INSTANCE_STEP_RATE_1]; cemu_assert_debug(attribGroup->attrib[groupAttribIndex].aluDivisor > 0); } } @@ -505,7 +505,7 @@ LatteFetchShader* LatteFetchShader::FindByGPUState() lookupInfo->programSize = _getFSProgramSize(); lookupInfo->lastFrameAccessed = LatteGPUState.frameCounter; g_fetchShaderLookupCache.store(fsPhysAddr24, lookupInfo); -#ifndef PUBLIC_RELEASE +#ifdef CEMU_DEBUG_ASSERT cemu_assert_debug(g_fetchShaderLookupCache.lookup(fsPhysAddr24) == lookupInfo); #endif } @@ -516,16 +516,16 @@ FSpinlock s_spinlockFetchShaderCache; LatteFetchShader* LatteFetchShader::RegisterInCache(CacheHash fsHash) { - s_spinlockFetchShaderCache.acquire(); + s_spinlockFetchShaderCache.lock(); auto itr = s_fetchShaderByHash.find(fsHash); if (itr != s_fetchShaderByHash.end()) { LatteFetchShader* fs = itr->second; - s_spinlockFetchShaderCache.release(); + s_spinlockFetchShaderCache.unlock(); return fs; } s_fetchShaderByHash.emplace(fsHash, this); - s_spinlockFetchShaderCache.release(); + s_spinlockFetchShaderCache.unlock(); return nullptr; } @@ -533,11 +533,11 @@ void LatteFetchShader::UnregisterInCache() { if (!m_isRegistered) return; - s_spinlockFetchShaderCache.acquire(); + s_spinlockFetchShaderCache.lock(); auto itr = s_fetchShaderByHash.find(m_cacheHash); cemu_assert(itr == s_fetchShaderByHash.end()); s_fetchShaderByHash.erase(itr); - s_spinlockFetchShaderCache.release(); + s_spinlockFetchShaderCache.unlock(); } std::unordered_map LatteFetchShader::s_fetchShaderByHash; diff --git a/src/Cafe/HW/Latte/Core/Latte.h b/src/Cafe/HW/Latte/Core/Latte.h index ddff15ea..2636467b 100644 --- a/src/Cafe/HW/Latte/Core/Latte.h +++ b/src/Cafe/HW/Latte/Core/Latte.h @@ -25,6 +25,8 @@ struct LatteGPUState_t // context control uint32 contextControl0; uint32 contextControl1; + // optional features + bool allowFramebufferSizeOptimization{false}; // allow using scissor box as size hint to determine non-padded rendertarget size // draw context struct { @@ -45,11 +47,10 @@ struct LatteGPUState_t gx2GPUSharedArea_t* sharedArea; // quick reference to shared area MPTR sharedAreaAddr; // other - // todo: Currently we have the command buffer logic implemented as a FIFO ringbuffer. On real HW it's handled as a series of command buffers that are pushed individually. - std::atomic lastSubmittedCommandBufferTimestamp; uint32 gx2InitCalled; // incremented every time GX2Init() is called // OpenGL control uint32 glVendor; // GLVENDOR_* + bool isDRCPrimary = false; // temporary (replace with proper solution later) bool tvBufferUsesSRGB; bool drcBufferUsesSRGB; @@ -64,16 +65,14 @@ struct LatteGPUState_t { bool isEnabled; MPTR physPtr; - volatile uint32 flipRequestCount; - volatile uint32 flipExecuteCount; + std::atomic flipRequestCount; + std::atomic flipExecuteCount; }screen[2]; }osScreen; }; extern LatteGPUState_t LatteGPUState; -extern uint8* gxRingBufferReadPtr; // currently active read pointer (gx2 ring buffer or display list) - // texture #include "Cafe/HW/Latte/Core/LatteTexture.h" @@ -83,6 +82,10 @@ extern uint8* gxRingBufferReadPtr; // currently active read pointer (gx2 ring bu void LatteTextureLoader_estimateAccessedDataRange(LatteTexture* texture, sint32 sliceIndex, sint32 mipIndex, uint32& addrStart, uint32& addrEnd); // render target + +#define RENDER_TARGET_TV (1 << 0) +#define RENDER_TARGET_DRC (1 << 2) + void LatteRenderTarget_updateScissorBox(); void LatteRenderTarget_trackUpdates(); @@ -93,7 +96,7 @@ void LatteRenderTarget_copyToBackbuffer(LatteTextureView* textureView, bool isPa void LatteRenderTarget_GetCurrentVirtualViewportSize(sint32* viewportWidth, sint32* viewportHeight); void LatteRenderTarget_itHLESwapScanBuffer(); -void LatteRenderTarget_itHLEClearColorDepthStencil(uint32 clearMask, MPTR colorBufferMPTR, MPTR colorBufferFormat, Latte::E_HWTILEMODE colorBufferTilemode, uint32 colorBufferWidth, uint32 colorBufferHeight, uint32 colorBufferPitch, uint32 colorBufferViewFirstSlice, uint32 colorBufferViewNumSlice, MPTR depthBufferMPTR, MPTR depthBufferFormat, Latte::E_HWTILEMODE depthBufferTileMode, sint32 depthBufferWidth, sint32 depthBufferHeight, sint32 depthBufferPitch, sint32 depthBufferViewFirstSlice, sint32 depthBufferViewNumSlice, float r, float g, float b, float a, float clearDepth, uint32 clearStencil); +void LatteRenderTarget_itHLEClearColorDepthStencil(uint32 clearMask, MPTR colorBufferMPTR, Latte::E_GX2SURFFMT colorBufferFormat, Latte::E_HWTILEMODE colorBufferTilemode, uint32 colorBufferWidth, uint32 colorBufferHeight, uint32 colorBufferPitch, uint32 colorBufferViewFirstSlice, uint32 colorBufferViewNumSlice, MPTR depthBufferMPTR, Latte::E_GX2SURFFMT depthBufferFormat, Latte::E_HWTILEMODE depthBufferTileMode, sint32 depthBufferWidth, sint32 depthBufferHeight, sint32 depthBufferPitch, sint32 depthBufferViewFirstSlice, sint32 depthBufferViewNumSlice, float r, float g, float b, float a, float clearDepth, uint32 clearStencil); void LatteRenderTarget_itHLECopyColorBufferToScanBuffer(MPTR colorBufferPtr, uint32 colorBufferWidth, uint32 colorBufferHeight, uint32 colorBufferSliceIndex, uint32 colorBufferFormat, uint32 colorBufferPitch, Latte::E_HWTILEMODE colorBufferTilemode, uint32 colorBufferSwizzle, uint32 renderTarget); void LatteRenderTarget_unloadAll(); @@ -110,7 +113,7 @@ void LatteTC_RegisterTexture(LatteTexture* tex); void LatteTC_UnregisterTexture(LatteTexture* tex); uint32 LatteTexture_CalculateTextureDataHash(LatteTexture* hostTexture); -void LatteTexture_ReloadData(LatteTexture* hostTexture, uint32 textureUnit); +void LatteTexture_ReloadData(LatteTexture* hostTexture); bool LatteTC_HasTextureChanged(LatteTexture* hostTexture, bool force = false); void LatteTC_ResetTextureChangeTracker(LatteTexture* hostTexture, bool force = false); @@ -162,7 +165,7 @@ void LatteBufferCache_LoadRemappedUniforms(struct LatteDecompilerShader* shader, void LatteRenderTarget_updateViewport(); -#define LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE (1024) // maximum size for uniform blocks (in vec4s). On Nvidia hardware 4096 is the maximum (64K / 16 = 4096) all other vendors have much higher limits +#define LATTE_GLSL_DYNAMIC_UNIFORM_BLOCK_SIZE (4096) // maximum size for uniform blocks (in vec4s). On Nvidia hardware 4096 is the maximum (64K / 16 = 4096) all other vendors have much higher limits //static uint32 glTempError; //#define catchOpenGLError() glFinish(); if( (glTempError = glGetError()) != 0 ) { printf("OpenGL error 0x%x: %s : %d timestamp %08x\n", glTempError, __FILE__, __LINE__, GetTickCount()); __debugbreak(); } @@ -172,4 +175,5 @@ void LatteRenderTarget_updateViewport(); // Latte emulation control void Latte_Start(); void Latte_Stop(); -bool Latte_IsActive(); \ No newline at end of file +bool Latte_GetStopSignal(); // returns true if stop was requested or if in stopped state +void LatteThread_Exit(); \ No newline at end of file diff --git a/src/Cafe/HW/Latte/Core/LatteAsyncCommands.cpp b/src/Cafe/HW/Latte/Core/LatteAsyncCommands.cpp index a4ffa1c2..4b114ddf 100644 --- a/src/Cafe/HW/Latte/Core/LatteAsyncCommands.cpp +++ b/src/Cafe/HW/Latte/Core/LatteAsyncCommands.cpp @@ -96,7 +96,7 @@ void LatteAsyncCommands_waitUntilAllProcessed() void LatteAsyncCommands_checkAndExecute() { // quick check if queue is empty (requires no lock) - if (!Latte_IsActive()) + if (Latte_GetStopSignal()) LatteThread_Exit(); if (LatteAsyncCommandQueue.empty()) return; @@ -120,7 +120,7 @@ void LatteAsyncCommands_checkAndExecute() } else { - forceLogDebug_printf("Texture not found for readback"); + cemuLog_logDebug(LogType::Force, "Texture not found for readback"); } } else if (asyncCommand.type == ASYNC_CMD_DELETE_SHADER) @@ -135,4 +135,4 @@ void LatteAsyncCommands_checkAndExecute() LatteAsyncCommandQueue.pop(); } swl_gpuAsyncCommands.UnlockWrite(); -} \ No newline at end of file +} diff --git a/src/Cafe/HW/Latte/Core/LatteBufferCache.cpp b/src/Cafe/HW/Latte/Core/LatteBufferCache.cpp index b74f67b9..716312a3 100644 --- a/src/Cafe/HW/Latte/Core/LatteBufferCache.cpp +++ b/src/Cafe/HW/Latte/Core/LatteBufferCache.cpp @@ -29,7 +29,7 @@ class IntervalTree2 struct InternalRange { - InternalRange() {}; + InternalRange() = default; InternalRange(TRangeData _rangeBegin, TRangeData _rangeEnd) : rangeBegin(_rangeBegin), rangeEnd(_rangeEnd) { cemu_assert_debug(_rangeBegin < _rangeEnd); }; TRangeData rangeBegin; @@ -87,7 +87,7 @@ public: return; // do nothing if added range is already covered rangeBegin = (std::min)(rangeBegin, (*itr).first.rangeBegin); // DEBUG - make sure this is the start point of the merge process (the first entry that starts below minValue) -#ifndef PUBLIC_RELEASE +#ifdef CEMU_DEBUG_ASSERT if (itr != m_map.cbegin()) { // check previous result @@ -257,6 +257,11 @@ public: } } + bool empty() const + { + return m_map.empty(); + } + const std::map& getAll() const { return m_map; }; }; @@ -304,7 +309,7 @@ public: { if ((rangeBegin & 0xF)) { - forceLogDebug_printf("writeStreamout(): RangeBegin not aligned to 16. Begin %08x End %08x", rangeBegin, rangeEnd); + cemuLog_logDebugOnce(LogType::Force, "writeStreamout(): RangeBegin not aligned to 16. Begin {:08x} End {:08x}", rangeBegin, rangeEnd); rangeBegin = (rangeBegin + 0xF) & ~0xF; rangeEnd = std::max(rangeBegin, rangeEnd); } @@ -313,7 +318,7 @@ public: // todo - add support for 4 byte granularity for streamout writes and cache // used by Affordable Space Adventures and YWW Level 1-8 // also used by CoD Ghosts (8 byte granularity) - //forceLogDebug_printf("Streamout write size is not aligned to 16 bytes"); + //cemuLog_logDebug(LogType::Force, "Streamout write size is not aligned to 16 bytes"); rangeEnd &= ~0xF; } //cemu_assert_debug((rangeEnd & 0xF) == 0); @@ -455,48 +460,6 @@ public: } if(m_invalidationRangeEnd <= m_invalidationRangeBegin) m_hasInvalidation = false; - - //if (resRangeBegin <= m_invalidationRangeBegin) - //{ - // // shrink/replace invalidation range from the bottom - // uint32 uploadBegin = m_invalidationRangeBegin;//std::max(m_invalidationRangeBegin, resRangeBegin); - // uint32 uploadEnd = std::min(resRangeEnd, m_invalidationRangeEnd); - // cemu_assert_debug(uploadEnd >= uploadBegin); - // if (uploadBegin != uploadEnd) - // checkAndSyncModifications(uploadBegin, uploadEnd, true); - // m_invalidationRangeBegin = uploadEnd; - // cemu_assert_debug(m_invalidationRangeBegin <= m_invalidationRangeEnd); - // if (m_invalidationRangeBegin >= m_invalidationRangeEnd) - // m_hasInvalidation = false; - //} - //else if (resRangeEnd >= m_invalidationRangeEnd) - //{ - // // shrink/replace invalidation range from the top - // uint32 uploadBegin = std::max(m_invalidationRangeBegin, resRangeBegin); - // uint32 uploadEnd = m_invalidationRangeEnd;// std::min(resRangeEnd, m_invalidationRangeEnd); - // cemu_assert_debug(uploadEnd >= uploadBegin); - // if (uploadBegin != uploadEnd) - // checkAndSyncModifications(uploadBegin, uploadEnd, true); - // m_invalidationRangeEnd = uploadBegin; - // cemu_assert_debug(m_invalidationRangeBegin <= m_invalidationRangeEnd); - // if (m_invalidationRangeBegin >= m_invalidationRangeEnd) - // m_hasInvalidation = false; - //} - //else - //{ - // // since we cant cut holes into the range upload it in it's entirety - // cemu_assert_debug(m_invalidationRangeEnd <= m_rangeEnd); - // cemu_assert_debug(m_invalidationRangeBegin >= m_rangeBegin); - // cemu_assert_debug(m_invalidationRangeBegin < m_invalidationRangeEnd); - // checkAndSyncModifications(m_invalidationRangeBegin, m_invalidationRangeEnd, true); - // m_hasInvalidation = false; - //} - - - - // todo - dont re-upload the whole range immediately - // under ideal circumstances we would only upload the data range requested for the current draw call - // but this is a hot path so we can't check } } @@ -734,20 +697,34 @@ private: static uint64 hashPage(uint8* mem) { - // note - this algorithm is/was also baked into pageWriteStreamoutSignatures() - uint64 h = 0; - uint64* memU64 = (uint64*)mem; - for (uint32 i = 0; i < CACHE_PAGE_SIZE / 8; i++) - { - //h = _rotr64(h, 7); - //h ^= *memU64; - //memU64++; + static const uint64 k0 = 0x55F23EAD; + static const uint64 k1 = 0x185FDC6D; + static const uint64 k2 = 0xF7431F49; + static const uint64 k3 = 0xA4C7AE9D; - h = std::rotr(h, 7); - h += (*memU64 + (uint64)i); - memU64++; + cemu_assert_debug((CACHE_PAGE_SIZE % 32) == 0); + const uint64* ptr = (const uint64*)mem; + const uint64* end = ptr + (CACHE_PAGE_SIZE / sizeof(uint64)); + + uint64 h0 = 0; + uint64 h1 = 0; + uint64 h2 = 0; + uint64 h3 = 0; + while (ptr < end) + { + h0 = std::rotr(h0, 7); + h1 = std::rotr(h1, 7); + h2 = std::rotr(h2, 7); + h3 = std::rotr(h3, 7); + + h0 += ptr[0] * k0; + h1 += ptr[1] * k1; + h2 += ptr[2] * k2; + h3 += ptr[3] * k3; + ptr += 4; } - return h; + + return h0 + h1 + h2 + h3; } // flag page as having streamout data, also write streamout signatures to page memory @@ -813,6 +790,21 @@ private: static std::vector g_deallocateQueue; public: + static void UnloadAll() + { + size_t i = 0; + while (i < s_allCacheNodes.size()) + { + BufferCacheNode* node = s_allCacheNodes[i]; + node->ReleaseCacheMemoryImmediately(); + LatteBufferCache_removeSingleNodeFromTree(node); + delete node; + } + for(auto& it : s_allCacheNodes) + delete it; + s_allCacheNodes.clear(); + g_deallocateQueue.clear(); + } static void ProcessDeallocations() { @@ -871,11 +863,11 @@ public: // retry allocation if (!newRange->allocateCacheMemory()) { - forceLog_printf("Out-of-memory in GPU buffer (trying to allocate: %dKB) Cleaning up cache...", (rangeEnd - rangeBegin + 1023) / 1024); + cemuLog_log(LogType::Force, "Out-of-memory in GPU buffer (trying to allocate: {}KB) Cleaning up cache...", (rangeEnd - rangeBegin + 1023) / 1024); CleanupCacheAggressive(rangeBegin, rangeEnd); if (!newRange->allocateCacheMemory()) { - forceLog_printf("Failed to free enough memory in GPU buffer"); + cemuLog_log(LogType::Force, "Failed to free enough memory in GPU buffer"); cemu_assert(false); } } @@ -907,7 +899,7 @@ public: // todo - add support for splitting BufferCacheNode memory allocations, then we dont need to do a separate allocation if (!newRange->allocateCacheMemory()) { - forceLog_printf("Out-of-memory in GPU buffer during split operation"); + cemuLog_log(LogType::Force, "Out-of-memory in GPU buffer during split operation"); cemu_assert(false); } newRange->syncFromNode(nodeObject); @@ -917,7 +909,6 @@ public: }; std::vector BufferCacheNode::g_deallocateQueue; - IntervalTree2 g_gpuBufferCache; void LatteBufferCache_removeSingleNodeFromTree(BufferCacheNode* node) @@ -995,18 +986,83 @@ void LatteBufferCache_processDeallocations() void LatteBufferCache_init(size_t bufferSize) { + cemu_assert_debug(g_gpuBufferCache.empty()); g_gpuBufferHeap.reset(new VHeap(nullptr, (uint32)bufferSize)); g_renderer->bufferCache_init((uint32)bufferSize); } +void LatteBufferCache_UnloadAll() +{ + BufferCacheNode::UnloadAll(); +} + void LatteBufferCache_getStats(uint32& heapSize, uint32& allocationSize, uint32& allocNum) { g_gpuBufferHeap->getStats(heapSize, allocationSize, allocNum); } FSpinlock g_spinlockDCFlushQueue; -std::unordered_set* g_DCFlushQueue = new std::unordered_set(); // queued pages -std::unordered_set* g_DCFlushQueueAlternate = new std::unordered_set(); + +class SparseBitset +{ + static inline constexpr size_t TABLE_MASK = 0xFF; + +public: + bool Empty() const + { + return m_numNonEmptyVectors == 0; + } + + void Set(uint32 index) + { + auto& v = m_bits[index & TABLE_MASK]; + if (std::find(v.cbegin(), v.cend(), index) != v.end()) + return; + if (v.empty()) + { + m_nonEmptyVectors[m_numNonEmptyVectors] = &v; + m_numNonEmptyVectors++; + } + v.emplace_back(index); + } + + template + void ForAllAndClear(TFunc callbackFunc) + { + auto vCurrent = m_nonEmptyVectors + 0; + auto vEnd = m_nonEmptyVectors + m_numNonEmptyVectors; + while (vCurrent < vEnd) + { + std::vector* vec = *vCurrent; + vCurrent++; + for (const auto& it : *vec) + callbackFunc(it); + vec->clear(); + } + m_numNonEmptyVectors = 0; + } + + void Clear() + { + auto vCurrent = m_nonEmptyVectors + 0; + auto vEnd = m_nonEmptyVectors + m_numNonEmptyVectors; + while (vCurrent < vEnd) + { + std::vector* vec = *vCurrent; + vCurrent++; + vec->clear(); + } + m_numNonEmptyVectors = 0; + } + +private: + std::vector m_bits[TABLE_MASK + 1]; + std::vector* m_nonEmptyVectors[TABLE_MASK + 1]; + size_t m_numNonEmptyVectors{ 0 }; +}; + +SparseBitset* s_DCFlushQueue = new SparseBitset(); +SparseBitset* s_DCFlushQueueAlternate = new SparseBitset(); void LatteBufferCache_notifyDCFlush(MPTR address, uint32 size) { @@ -1015,22 +1071,20 @@ void LatteBufferCache_notifyDCFlush(MPTR address, uint32 size) uint32 firstPage = address / CACHE_PAGE_SIZE; uint32 lastPage = (address + size - 1) / CACHE_PAGE_SIZE; - g_spinlockDCFlushQueue.acquire(); + g_spinlockDCFlushQueue.lock(); for (uint32 i = firstPage; i <= lastPage; i++) - g_DCFlushQueue->emplace(i); - g_spinlockDCFlushQueue.release(); + s_DCFlushQueue->Set(i); + g_spinlockDCFlushQueue.unlock(); } void LatteBufferCache_processDCFlushQueue() { - if (g_DCFlushQueue->empty()) // accessing this outside of the lock is technically undefined/unsafe behavior but on all known implementations this is fine and we can avoid the spinlock + if (s_DCFlushQueue->Empty()) // quick check to avoid locking if there is no work to do return; - g_spinlockDCFlushQueue.acquire(); - std::swap(g_DCFlushQueue, g_DCFlushQueueAlternate); - g_spinlockDCFlushQueue.release(); - for (auto& itr : *g_DCFlushQueueAlternate) - LatteBufferCache_invalidatePage(itr * CACHE_PAGE_SIZE); - g_DCFlushQueueAlternate->clear(); + g_spinlockDCFlushQueue.lock(); + std::swap(s_DCFlushQueue, s_DCFlushQueueAlternate); + g_spinlockDCFlushQueue.unlock(); + s_DCFlushQueueAlternate->ForAllAndClear([](uint32 index) {LatteBufferCache_invalidatePage(index * CACHE_PAGE_SIZE); }); } void LatteBufferCache_notifyDrawDone() diff --git a/src/Cafe/HW/Latte/Core/LatteBufferCache.h b/src/Cafe/HW/Latte/Core/LatteBufferCache.h index da285192..62ae3f1f 100644 --- a/src/Cafe/HW/Latte/Core/LatteBufferCache.h +++ b/src/Cafe/HW/Latte/Core/LatteBufferCache.h @@ -1,6 +1,7 @@ #pragma once void LatteBufferCache_init(size_t bufferSize); +void LatteBufferCache_UnloadAll(); uint32 LatteBufferCache_retrieveDataInCache(MPTR physAddress, uint32 size); void LatteBufferCache_copyStreamoutDataToCache(MPTR physAddress, uint32 size, uint32 streamoutBufferOffset); diff --git a/src/Cafe/HW/Latte/Core/LatteBufferData.cpp b/src/Cafe/HW/Latte/Core/LatteBufferData.cpp index d864750a..85d4cdf7 100644 --- a/src/Cafe/HW/Latte/Core/LatteBufferData.cpp +++ b/src/Cafe/HW/Latte/Core/LatteBufferData.cpp @@ -9,6 +9,7 @@ #include "Cafe/GameProfile/GameProfile.h" #include "Cafe/HW/Latte/Core/LatteBufferCache.h" +#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h" template void rectGenerate4thVertex(uint32be* output, uint32be* input0, uint32be* input1, uint32be* input2) @@ -131,22 +132,18 @@ void LatteBufferCache_syncGPUUniformBuffers(LatteDecompilerShader* shader, const { if (shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK) { - // use full uniform buffers - for (sint32 t = 0; t < shader->uniformBufferListCount; t++) + for(const auto& buf : shader->list_quickBufferList) { - sint32 i = shader->uniformBufferList[t]; + sint32 i = buf.index; MPTR physicalAddr = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 0]; uint32 uniformSize = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 1] + 1; - - if (physicalAddr == MPTR_NULL) + if (physicalAddr == MPTR_NULL) [[unlikely]] { - // no data g_renderer->buffer_bindUniformBuffer(shaderType, i, 0, 0); continue; } - + uniformSize = std::min(uniformSize, buf.size); uint32 bindOffset = LatteBufferCache_retrieveDataInCache(physicalAddr, uniformSize); - g_renderer->buffer_bindUniformBuffer(shaderType, i, bindOffset, uniformSize); } } @@ -198,6 +195,19 @@ bool LatteBufferCache_Sync(uint32 minIndex, uint32 maxIndex, uint32 baseInstance if (fixedBufferSize == 0 || bufferStride == 0) fixedBufferSize += 128; + +#if BOOST_OS_MACOS + if(bufferStride % 4 != 0) + { + if (VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance()) + { + auto fixedBuffer = vkRenderer->buffer_genStrideWorkaroundVertexBuffer(bufferAddress, fixedBufferSize, bufferStride); + vkRenderer->buffer_bindVertexStrideWorkaroundBuffer(fixedBuffer.first, fixedBuffer.second, bufferIndex, fixedBufferSize); + continue; + } + } +#endif + uint32 bindOffset = LatteBufferCache_retrieveDataInCache(bufferAddress, fixedBufferSize); g_renderer->buffer_bindVertexBuffer(bufferIndex, bindOffset, fixedBufferSize); } diff --git a/src/Cafe/HW/Latte/Core/LatteCachedFBO.h b/src/Cafe/HW/Latte/Core/LatteCachedFBO.h index 47fa9b42..5f3aaed4 100644 --- a/src/Cafe/HW/Latte/Core/LatteCachedFBO.h +++ b/src/Cafe/HW/Latte/Core/LatteCachedFBO.h @@ -42,7 +42,7 @@ private: if(colorBuffer[i].texture == nullptr) continue; sint32 effectiveWidth, effectiveHeight; - LatteTexture_getEffectiveSize(colorBuffer[i].texture->baseTexture, &effectiveWidth, &effectiveHeight, nullptr, colorBuffer[i].texture->firstMip); + colorBuffer[i].texture->baseTexture->GetEffectiveSize(effectiveWidth, effectiveHeight, colorBuffer[i].texture->firstMip); if (rtEffectiveSize.x == 0 && rtEffectiveSize.y == 0) { rtEffectiveSize.x = effectiveWidth; @@ -50,12 +50,12 @@ private: } if (effectiveWidth < rtEffectiveSize.x) { - forceLogDebug_printf("Framebuffer has color texture with smaller effective width (%d -> %d)", rtEffectiveSize.x, effectiveWidth); + cemuLog_logDebug(LogType::Force, "Framebuffer has color texture with smaller effective width ({} -> {})", rtEffectiveSize.x, effectiveWidth); rtEffectiveSize.x = effectiveWidth; } if (effectiveHeight < rtEffectiveSize.y) { - forceLogDebug_printf("Framebuffer has color texture with smaller effective height (%d -> %d)", rtEffectiveSize.y, effectiveHeight); + cemuLog_logDebug(LogType::Force, "Framebuffer has color texture with smaller effective height ({} -> {})", rtEffectiveSize.y, effectiveHeight); rtEffectiveSize.y = effectiveHeight; } numViews++; @@ -64,7 +64,7 @@ private: if (depthBuffer.texture) { sint32 effectiveWidth, effectiveHeight; - LatteTexture_getEffectiveSize(depthBuffer.texture->baseTexture, &effectiveWidth, &effectiveHeight, nullptr, depthBuffer.texture->firstMip); + depthBuffer.texture->baseTexture->GetEffectiveSize(effectiveWidth, effectiveHeight, depthBuffer.texture->firstMip); if (rtEffectiveSize.x == 0 && rtEffectiveSize.y == 0) { rtEffectiveSize.x = effectiveWidth; @@ -72,12 +72,12 @@ private: } if (effectiveWidth < rtEffectiveSize.x) { - forceLogDebug_printf("Framebuffer has depth texture with smaller effective width (%d -> %d)", rtEffectiveSize.x, effectiveWidth); + cemuLog_logDebug(LogType::Force, "Framebuffer has depth texture with smaller effective width ({} -> {})", rtEffectiveSize.x, effectiveWidth); rtEffectiveSize.x = effectiveWidth; } if (effectiveHeight < rtEffectiveSize.y) { - forceLogDebug_printf("Framebuffer has depth texture with smaller effective height (%d -> %d)", rtEffectiveSize.y, effectiveHeight); + cemuLog_logDebug(LogType::Force, "Framebuffer has depth texture with smaller effective height ({} -> {})", rtEffectiveSize.y, effectiveHeight); rtEffectiveSize.y = effectiveHeight; } numViews++; @@ -145,4 +145,4 @@ public: private: static LatteCachedFBO* CreateCachedFBO(uint64 key); -}; \ No newline at end of file +}; diff --git a/src/Cafe/HW/Latte/Core/LatteCommandProcessor.cpp b/src/Cafe/HW/Latte/Core/LatteCommandProcessor.cpp index b95b84d3..4385cf49 100644 --- a/src/Cafe/HW/Latte/Core/LatteCommandProcessor.cpp +++ b/src/Cafe/HW/Latte/Core/LatteCommandProcessor.cpp @@ -13,17 +13,21 @@ #include "Cafe/HW/Latte/Core/LattePM4.h" #include "Cafe/OS/libs/coreinit/coreinit_Time.h" +#include "Cafe/OS/libs/TCL/TCL.h" // TCL currently handles the GPU command ringbuffer #include "Cafe/CafeSystem.h" +#include + +void LatteCP_DebugPrintCmdBuffer(uint32be* bufferPtr, uint32 size); + #define CP_TIMER_RECHECK 1024 -//#define FAST_DRAW_LOGGING +//#define LATTE_CP_LOGGING -uint8* gxRingBufferReadPtr; // currently active read pointer (gx2 ring buffer or display list) -uint8* gx2CPParserDisplayListPtr; -uint8* gx2CPParserDisplayListStart; // used for debugging -uint8* gx2CPParserDisplayListEnd; +typedef uint32be* LatteCMDPtr; +#define LatteReadCMD() ((uint32)*(cmd++)) +#define LatteSkipCMD(_nWords) cmd += (_nWords) void LatteThread_HandleOSScreen(); @@ -31,6 +35,14 @@ void LatteThread_Exit(); class DrawPassContext { + struct CmdQueuePos + { + CmdQueuePos(LatteCMDPtr current, LatteCMDPtr start, LatteCMDPtr end) : current(current), start(start), end(end) {}; + + LatteCMDPtr current; + LatteCMDPtr start; + LatteCMDPtr end; + }; public: bool isWithinDrawPass() const { @@ -54,6 +66,13 @@ public: if (numInstances == 0) return; + /* + if (GetAsyncKeyState('B')) + { + cemuLog_force("[executeDraw] {} Count {} BaseVertex {} BaseInstance {}", m_isFirstDraw?"Init":"Fast", count, baseVertex, baseInstance); + } + */ + if (!isAutoIndex) { cemu_assert_debug(physIndices != MPTR_NULL); @@ -66,6 +85,9 @@ public: { g_renderer->draw_execute(baseVertex, baseInstance, numInstances, count, MPTR_NULL, Latte::LATTE_VGT_DMA_INDEX_TYPE::E_INDEX_TYPE::AUTO, m_isFirstDraw); } + performanceMonitor.cycle[performanceMonitor.cycleIndex].drawCallCounter++; + if (!m_isFirstDraw) + performanceMonitor.cycle[performanceMonitor.cycleIndex].fastDrawCallCounter++; m_isFirstDraw = false; m_vertexBufferChanged = false; m_uniformBufferChanged = false; @@ -87,14 +109,41 @@ public: m_uniformBufferChanged = true; } + // command buffer processing position + void PushCurrentCommandQueuePos(LatteCMDPtr current, LatteCMDPtr start, LatteCMDPtr end) + { + m_queuePosStack.emplace_back(current, start, end); + } + + bool PopCurrentCommandQueuePos(LatteCMDPtr& current, LatteCMDPtr& start, LatteCMDPtr& end) + { + if (m_queuePosStack.empty()) + return false; + const auto& it = m_queuePosStack.back(); + current = it.current; + start = it.start; + end = it.end; + m_queuePosStack.pop_back(); + return true; + } + private: bool m_drawPassActive{ false }; bool m_isFirstDraw{false}; bool m_vertexBufferChanged{ false }; bool m_uniformBufferChanged{ false }; + boost::container::small_vector m_queuePosStack; }; -void LatteCP_processCommandBuffer(uint8* cmdBuffer, sint32 cmdSize, DrawPassContext& drawPassCtx); +void LatteCP_processCommandBuffer(DrawPassContext& drawPassCtx); + +// called whenever the GPU runs out of commands or hits a wait condition (semaphores, HLE waits) +void LatteCP_signalEnterWait() +{ + // based on the assumption that games won't do a rugpull and swap out buffer data in the middle of an uninterrupted sequence of drawcalls, + // we only flush caches when the GPU goes idle or has to wait for any operation + LatteIndices_invalidateAll(); +} /* * Read a U32 from the command buffer @@ -102,16 +151,12 @@ void LatteCP_processCommandBuffer(uint8* cmdBuffer, sint32 cmdSize, DrawPassCont */ uint32 LatteCP_readU32Deprc() { - uint32 v; - uint8* gxRingBufferWritePtr; - sint32 readDistance; // no display list active while (true) { - gxRingBufferWritePtr = gx2WriteGatherPipe.writeGatherPtrGxBuffer[GX2::sGX2MainCoreIndex]; - readDistance = (sint32)(gxRingBufferWritePtr - gxRingBufferReadPtr); - if (readDistance != 0) - break; + uint32 cmdWord; + if ( TCL::TCLGPUReadRBWord(cmdWord) ) + return cmdWord; g_renderer->NotifyLatteCommandProcessorIdle(); // let the renderer know in case it wants to flush any commands performanceMonitor.gpuTime_idleTime.beginMeasuring(); @@ -122,57 +167,9 @@ uint32 LatteCP_readU32Deprc() } LatteThread_HandleOSScreen(); // check if new frame was presented via OSScreen API - readDistance = (sint32)(gxRingBufferWritePtr - gxRingBufferReadPtr); - if (readDistance != 0) - break; - if (!Latte_IsActive()) - LatteThread_Exit(); - - // still no command data available, do some other tasks - LatteTiming_HandleTimedVsync(); - LatteAsyncCommands_checkAndExecute(); - std::this_thread::yield(); - performanceMonitor.gpuTime_idleTime.endMeasuring(); - } - v = *(uint32*)gxRingBufferReadPtr; - gxRingBufferReadPtr += 4; -#ifndef PUBLIC_RELEASE - if (v == 0xcdcdcdcd) - assert_dbg(); -#endif - v = _swapEndianU32(v); - return v; -} - -void LatteCP_waitForNWords(uint32 numWords) -{ - uint8* gxRingBufferWritePtr; - sint32 readDistance; - bool isFlushed = false; - sint32 waitDistance = numWords * sizeof(uint32be); - // no display list active - while (true) - { - gxRingBufferWritePtr = gx2WriteGatherPipe.writeGatherPtrGxBuffer[GX2::sGX2MainCoreIndex]; - readDistance = (sint32)(gxRingBufferWritePtr - gxRingBufferReadPtr); - if (readDistance < 0) - return; // wrap around means there is at least one full command queued after this - if (readDistance >= waitDistance) - break; - g_renderer->NotifyLatteCommandProcessorIdle(); // let the renderer know in case it wants to flush any commands - performanceMonitor.gpuTime_idleTime.beginMeasuring(); - // no command data available, spin in a busy loop for a while then check again - for (sint32 busy = 0; busy < 80; busy++) - { - _mm_pause(); - } - readDistance = (sint32)(gxRingBufferWritePtr - gxRingBufferReadPtr); - if (readDistance < 0) - return; // wrap around means there is at least one full command queued after this - if (readDistance >= waitDistance) - break; - - if (!Latte_IsActive()) + if ( TCL::TCLGPUReadRBWord(cmdWord) ) + return cmdWord; + if (Latte_GetStopSignal()) LatteThread_Exit(); // still no command data available, do some other tasks @@ -181,6 +178,7 @@ void LatteCP_waitForNWords(uint32 numWords) std::this_thread::yield(); performanceMonitor.gpuTime_idleTime.endMeasuring(); } + UNREACHABLE; } template @@ -193,10 +191,6 @@ void LatteCP_skipWords(uint32 wordsToSkip) } } -typedef uint32be* LatteCMDPtr; -#define LatteReadCMD() ((uint32)*(cmd++)) -#define LatteSkipCMD(_nWords) cmd += (_nWords) - LatteCMDPtr LatteCP_itSurfaceSync(LatteCMDPtr cmd) { uint32 invalidationFlags = LatteReadCMD(); @@ -215,33 +209,44 @@ LatteCMDPtr LatteCP_itSurfaceSync(LatteCMDPtr cmd) return cmd; } -template -void LatteCP_itIndirectBufferDepr(uint32 nWords) +// called from TCL command queue. Executes a memory command buffer +void LatteCP_itIndirectBufferDepr(LatteCMDPtr cmd, uint32 nWords) { cemu_assert_debug(nWords == 3); + uint32 physicalAddress = LatteReadCMD(); + uint32 physicalAddressHigh = LatteReadCMD(); // unused + uint32 sizeInU32s = LatteReadCMD(); - uint32 physicalAddress = readU32(); - uint32 physicalAddressHigh = readU32(); // unused - uint32 sizeInDWords = readU32(); - uint32 displayListSize = sizeInDWords * 4; - cemu_assert_debug(displayListSize >= 4); - DrawPassContext drawPassCtx; - LatteCP_processCommandBuffer(memory_getPointerFromPhysicalOffset(physicalAddress), displayListSize, drawPassCtx); - if (drawPassCtx.isWithinDrawPass()) - drawPassCtx.endDrawPass(); +#ifdef LATTE_CP_LOGGING + if (GetAsyncKeyState('A')) + LatteCP_DebugPrintCmdBuffer(MEMPTR(physicalAddress), displayListSize); +#endif + + if (sizeInU32s > 0) + { + DrawPassContext drawPassCtx; + uint32be* buf = MEMPTR(physicalAddress).GetPtr(); + drawPassCtx.PushCurrentCommandQueuePos(buf, buf, buf + sizeInU32s); + + LatteCP_processCommandBuffer(drawPassCtx); + if (drawPassCtx.isWithinDrawPass()) + drawPassCtx.endDrawPass(); + } } -LatteCMDPtr LatteCP_itIndirectBuffer(LatteCMDPtr cmd, uint32 nWords, DrawPassContext& drawPassCtx) +// pushes the command buffer to the stack +void LatteCP_itIndirectBuffer(LatteCMDPtr cmd, uint32 nWords, DrawPassContext& drawPassCtx) { cemu_assert_debug(nWords == 3); uint32 physicalAddress = LatteReadCMD(); uint32 physicalAddressHigh = LatteReadCMD(); // unused uint32 sizeInDWords = LatteReadCMD(); - uint32 displayListSize = sizeInDWords * 4; - cemu_assert_debug(displayListSize >= 4); - - LatteCP_processCommandBuffer(memory_getPointerFromPhysicalOffset(physicalAddress), displayListSize, drawPassCtx); - return cmd; + if (sizeInDWords > 0) + { + uint32 displayListSize = sizeInDWords * 4; + uint32be* buf = MEMPTR(physicalAddress).GetPtr(); + drawPassCtx.PushCurrentCommandQueuePos(buf, buf, buf + sizeInDWords); + } } LatteCMDPtr LatteCP_itStreamoutBufferUpdate(LatteCMDPtr cmd, uint32 nWords) @@ -299,7 +304,7 @@ LatteCMDPtr LatteCP_itSetRegistersGeneric(LatteCMDPtr cmd, uint32 nWords) uint32 registerIndex = TRegisterBase + registerOffset; uint32 registerStartIndex = registerIndex; uint32 registerEndIndex = registerStartIndex + nWords; -#ifndef PUBLIC_RELEASE +#ifdef CEMU_DEBUG_ASSERT cemu_assert_debug((registerIndex + nWords) <= LATTE_MAX_REGISTER); #endif uint32* outputReg = (uint32*)(LatteGPUState.contextRegister + registerIndex); @@ -340,7 +345,7 @@ LatteCMDPtr LatteCP_itSetRegistersGeneric(LatteCMDPtr cmd, uint32 nWords, TRegRa uint32 registerIndex = TRegisterBase + registerOffset; uint32 registerStartIndex = registerIndex; uint32 registerEndIndex = registerStartIndex + nWords; -#ifndef PUBLIC_RELEASE +#ifdef CEMU_DEBUG_ASSERT cemu_assert_debug((registerIndex + nWords) <= LATTE_MAX_REGISTER); #endif cbRegRange(registerStartIndex, registerEndIndex); @@ -417,6 +422,8 @@ LatteCMDPtr LatteCP_itWaitRegMem(LatteCMDPtr cmd, uint32 nWords) const uint32 GPU7_WAIT_MEM_OP_GREATER = 6; const uint32 GPU7_WAIT_MEM_OP_NEVER = 7; + LatteCP_signalEnterWait(); + bool stalls = false; if ((word0 & 0x10) != 0) { @@ -426,18 +433,45 @@ LatteCMDPtr LatteCP_itWaitRegMem(LatteCMDPtr cmd, uint32 nWords) { uint32 fenceMemValue = _swapEndianU32(*fencePtr); fenceMemValue &= fenceMask; - if (compareOp == GPU7_WAIT_MEM_OP_GEQUAL) + if (compareOp == GPU7_WAIT_MEM_OP_LESS) { - // greater or equal - if (fenceMemValue >= fenceValue) + if (fenceMemValue < fenceValue) + break; + } + else if (compareOp == GPU7_WAIT_MEM_OP_LEQUAL) + { + if (fenceMemValue <= fenceValue) break; } else if (compareOp == GPU7_WAIT_MEM_OP_EQUAL) { - // equal if (fenceMemValue == fenceValue) break; } + else if (compareOp == GPU7_WAIT_MEM_OP_NOTEQUAL) + { + if (fenceMemValue != fenceValue) + break; + } + else if (compareOp == GPU7_WAIT_MEM_OP_GEQUAL) + { + if (fenceMemValue >= fenceValue) + break; + } + else if (compareOp == GPU7_WAIT_MEM_OP_GREATER) + { + if (fenceMemValue > fenceValue) + break; + } + else if (compareOp == GPU7_WAIT_MEM_OP_ALWAYS) + { + break; + } + else if (compareOp == GPU7_WAIT_MEM_OP_NEVER) + { + cemuLog_logOnce(LogType::Force, "Latte: WAIT_MEM_OP_NEVER encountered"); + break; + } else assert_dbg(); if (!stalls) @@ -471,7 +505,7 @@ LatteCMDPtr LatteCP_itMemWrite(LatteCMDPtr cmd, uint32 nWords) MPTR valuePhysAddr = (word0 & ~3); if (valuePhysAddr == 0) { - cemuLog_force("GPU: Invalid itMemWrite to null pointer"); + cemuLog_log(LogType::Force, "GPU: Invalid itMemWrite to null pointer"); return cmd; } uint32be* memPtr = (uint32be*)memory_getPointerFromPhysicalOffset(valuePhysAddr); @@ -479,26 +513,55 @@ LatteCMDPtr LatteCP_itMemWrite(LatteCMDPtr cmd, uint32 nWords) if (word1 == 0x40000) { // write U32 - *memPtr = word2; + stdx::atomic_ref atomicRef(*memPtr); + atomicRef.store(word2); } else if (word1 == 0x00000) { - // write U64 (as two U32) - // note: The U32s are swapped - memPtr[0] = word2; - memPtr[1] = word3; + // write U64 + // note: The U32s are swapped here, but needs verification. Also, it seems like the two U32 halves are written independently and the U64 as a whole is not atomic -> investiagte + stdx::atomic_ref atomicRef(*(uint64be*)memPtr); + atomicRef.store(((uint64le)word2 << 32) | word3); } else if (word1 == 0x20000) { // write U64 (little endian) - memPtr[0] = _swapEndianU32(word2); - memPtr[1] = _swapEndianU32(word3); + stdx::atomic_ref atomicRef(*(uint64le*)memPtr); + atomicRef.store(((uint64le)word3 << 32) | word2); } else cemu_assert_unimplemented(); return cmd; } +LatteCMDPtr LatteCP_itEventWriteEOP(LatteCMDPtr cmd, uint32 nWords) +{ + cemu_assert_debug(nWords == 5); + uint32 word0 = LatteReadCMD(); + uint32 word1 = LatteReadCMD(); + uint32 word2 = LatteReadCMD(); + uint32 word3 = LatteReadCMD(); // value low bits + uint32 word4 = LatteReadCMD(); // value high bits + + cemu_assert_debug(word2 == 0x40000000 || word2 == 0x42000000); + + if (word0 == 0x504 && (word2&0x40000000)) // todo - figure out the flags + { + stdx::atomic_ref atomicRef(*(uint64be*)memory_getPointerFromPhysicalOffset(word1)); + uint64 val = ((uint64)word4 << 32) | word3; + atomicRef.store(val); + } + else + { cemu_assert_unimplemented(); + } + bool triggerInterrupt = (word2 & 0x2000000) != 0; + if (triggerInterrupt) + { + // todo - timestamp interrupt + } + TCL::TCLGPUNotifyNewRetirementTimestamp(); + return cmd; +} LatteCMDPtr LatteCP_itMemSemaphore(LatteCMDPtr cmd, uint32 nWords) { @@ -518,6 +581,7 @@ LatteCMDPtr LatteCP_itMemSemaphore(LatteCMDPtr cmd, uint32 nWords) else if(SEM_SIGNAL == 7) { // wait + LatteCP_signalEnterWait(); size_t loopCount = 0; while (true) { @@ -560,19 +624,19 @@ LatteCMDPtr LatteCP_itLoadReg(LatteCMDPtr cmd, uint32 nWords, uint32 regBase) MPTR physAddressRegArea = LatteReadCMD(); uint32 waitForIdle = LatteReadCMD(); uint32 loadEntries = (nWords - 2) / 2; - uint32 regIndex = 0; + uint32 regShadowMemAddr = physAddressRegArea; for (uint32 i = 0; i < loadEntries; i++) { uint32 regOffset = LatteReadCMD(); uint32 regCount = LatteReadCMD(); cemu_assert_debug(regCount != 0); + uint32 regAddr = regBase + regOffset; for (uint32 f = 0; f < regCount; f++) { - uint32 regAddr = regBase + regOffset + f; - uint32 regShadowMemAddr = physAddressRegArea + regIndex * 4; LatteGPUState.contextRegisterShadowAddr[regAddr] = regShadowMemAddr; - LatteGPUState.contextRegister[regAddr] = memory_readU32Direct(regShadowMemAddr); - regIndex++; + LatteGPUState.contextRegister[regAddr] = memory_read(regShadowMemAddr); + regAddr++; + regShadowMemAddr += 4; } } return cmd; @@ -616,8 +680,6 @@ LatteCMDPtr LatteCP_itDrawIndex2(LatteCMDPtr cmd, uint32 nWords, DrawPassContext uint32 count = LatteReadCMD(); uint32 ukn3 = LatteReadCMD(); - performanceMonitor.cycle[performanceMonitor.cycleIndex].drawCallCounter++; - LatteGPUState.currentDrawCallTick = GetTickCount(); drawPassCtx.executeDraw(count, false, physIndices); return cmd; @@ -629,8 +691,6 @@ LatteCMDPtr LatteCP_itDrawIndexAuto(LatteCMDPtr cmd, uint32 nWords, DrawPassCont uint32 count = LatteReadCMD(); uint32 ukn = LatteReadCMD(); - performanceMonitor.cycle[performanceMonitor.cycleIndex].drawCallCounter++; - if (LatteGPUState.drawContext.numInstances == 0) return cmd; LatteGPUState.currentDrawCallTick = GetTickCount(); @@ -639,7 +699,7 @@ LatteCMDPtr LatteCP_itDrawIndexAuto(LatteCMDPtr cmd, uint32 nWords, DrawPassCont { uint32 vsProgramCode = ((LatteGPUState.contextRegister[mmSQ_PGM_START_ES] & 0xFFFFFF) << 8); uint32 vsProgramSize = LatteGPUState.contextRegister[mmSQ_PGM_START_ES + 1] << 3; - forceLogDebug_printf("Compute %d %08x %08x (unsupported)\n", count, vsProgramCode, vsProgramSize); + cemuLog_logDebug(LogType::Force, "Compute {} {:08x} {:08x} (unsupported)", count, vsProgramCode, vsProgramSize); } else { @@ -693,7 +753,6 @@ LatteCMDPtr LatteCP_itDrawImmediate(LatteCMDPtr cmd, uint32 nWords, DrawPassCont // verify packet size if (nWords != (2 + numIndexU32s)) debugBreakpoint(); - performanceMonitor.cycle[performanceMonitor.cycleIndex].drawCallCounter++; uint32 baseVertex = LatteGPUState.contextRegister[mmSQ_VTX_BASE_VTX_LOC]; uint32 baseInstance = LatteGPUState.contextRegister[mmSQ_VTX_START_INST_LOC]; @@ -701,23 +760,13 @@ LatteCMDPtr LatteCP_itDrawImmediate(LatteCMDPtr cmd, uint32 nWords, DrawPassCont drawPassCtx.executeDraw(count, false, _tempIndexArrayMPTR); return cmd; - -} - -LatteCMDPtr LatteCP_itHLEFifoWrapAround(LatteCMDPtr cmd, uint32 nWords) -{ - cemu_assert_debug(nWords == 1); - uint32 unused = LatteReadCMD(); - gxRingBufferReadPtr = gx2WriteGatherPipe.gxRingBuffer; - cmd = (LatteCMDPtr)gxRingBufferReadPtr; - return cmd; } LatteCMDPtr LatteCP_itHLESampleTimer(LatteCMDPtr cmd, uint32 nWords) { cemu_assert_debug(nWords == 1); MPTR timerMPTR = (MPTR)LatteReadCMD(); - memory_writeU64Slow(timerMPTR, coreinit::coreinit_getTimerTick()); + memory_writeU64(timerMPTR, coreinit::OSGetSystemTime()); return cmd; } @@ -737,16 +786,6 @@ LatteCMDPtr LatteCP_itHLESpecialState(LatteCMDPtr cmd, uint32 nWords) return cmd; } -LatteCMDPtr LatteCP_itHLESetRetirementTimestamp(LatteCMDPtr cmd, uint32 nWords) -{ - cemu_assert_debug(nWords == 2); - uint32 timestampHigh = (uint32)LatteReadCMD(); - uint32 timestampLow = (uint32)LatteReadCMD(); - uint64 timestamp = ((uint64)timestampHigh << 32ULL) | (uint64)timestampLow; - GX2::__GX2NotifyNewRetirementTimestamp(timestamp); - return cmd; -} - LatteCMDPtr LatteCP_itHLEBeginOcclusionQuery(LatteCMDPtr cmd, uint32 nWords) { cemu_assert_debug(nWords == 1); @@ -820,8 +859,8 @@ LatteCMDPtr LatteCP_itHLEClearColorDepthStencil(LatteCMDPtr cmd, uint32 nWords) cemu_assert_debug(nWords == 23); uint32 clearMask = LatteReadCMD(); // color (1), depth (2), stencil (4) // color buffer - MPTR colorBufferMPTR = LatteReadCMD(); // MPTR for color buffer (physical address) - MPTR colorBufferFormat = LatteReadCMD(); // format for color buffer + MPTR colorBufferMPTR = LatteReadCMD(); // physical address for color buffer + Latte::E_GX2SURFFMT colorBufferFormat = (Latte::E_GX2SURFFMT)LatteReadCMD(); Latte::E_HWTILEMODE colorBufferTilemode = (Latte::E_HWTILEMODE)LatteReadCMD(); uint32 colorBufferWidth = LatteReadCMD(); uint32 colorBufferHeight = LatteReadCMD(); @@ -829,8 +868,8 @@ LatteCMDPtr LatteCP_itHLEClearColorDepthStencil(LatteCMDPtr cmd, uint32 nWords) uint32 colorBufferViewFirstSlice = LatteReadCMD(); uint32 colorBufferViewNumSlice = LatteReadCMD(); // depth buffer - MPTR depthBufferMPTR = LatteReadCMD(); // MPTR for depth buffer (physical address) - MPTR depthBufferFormat = LatteReadCMD(); // format for depth buffer + MPTR depthBufferMPTR = LatteReadCMD(); // physical address for depth buffer + Latte::E_GX2SURFFMT depthBufferFormat = (Latte::E_GX2SURFFMT)LatteReadCMD(); Latte::E_HWTILEMODE depthBufferTileMode = (Latte::E_HWTILEMODE)LatteReadCMD(); uint32 depthBufferWidth = LatteReadCMD(); uint32 depthBufferHeight = LatteReadCMD(); @@ -849,8 +888,8 @@ LatteCMDPtr LatteCP_itHLEClearColorDepthStencil(LatteCMDPtr cmd, uint32 nWords) LatteRenderTarget_itHLEClearColorDepthStencil( clearMask, - colorBufferMPTR, colorBufferFormat, colorBufferTilemode, colorBufferWidth, colorBufferHeight, colorBufferPitch, colorBufferViewFirstSlice, colorBufferViewNumSlice, - depthBufferMPTR, depthBufferFormat, depthBufferTileMode, depthBufferWidth, depthBufferHeight, depthBufferPitch, depthBufferViewFirstSlice, depthBufferViewNumSlice, + colorBufferMPTR, colorBufferFormat, colorBufferTilemode, colorBufferWidth, colorBufferHeight, colorBufferPitch, colorBufferViewFirstSlice, colorBufferViewNumSlice, + depthBufferMPTR, depthBufferFormat, depthBufferTileMode, depthBufferWidth, depthBufferHeight, depthBufferPitch, depthBufferViewFirstSlice, depthBufferViewNumSlice, r, g, b, a, clearDepth, clearStencil); return cmd; @@ -931,431 +970,411 @@ void LatteCP_dumpCommandBufferError(LatteCMDPtr cmdStart, LatteCMDPtr cmdEnd, La } // any drawcalls issued without changing textures, framebuffers, shader or other complex states can be done quickly without having to reinitialize the entire pipeline state -// we implement this optimization by having an optimized version of LatteCP_processCommandBuffer, called right after drawcalls, which only implements commands that dont interfere with fast drawing. Other commands will cause this function to return to the complex parser -LatteCMDPtr LatteCP_processCommandBuffer_continuousDrawPass(LatteCMDPtr cmd, LatteCMDPtr cmdStart, LatteCMDPtr cmdEnd, DrawPassContext& drawPassCtx) +// we implement this optimization by having a specialized version of LatteCP_processCommandBuffer, called right after drawcalls, which only implements commands that dont interfere with fast drawing. Other commands will cause this function to return to the complex and generic parser +void LatteCP_processCommandBuffer_continuousDrawPass(DrawPassContext& drawPassCtx) { cemu_assert_debug(drawPassCtx.isWithinDrawPass()); // quit early if there are parameters set which are generally incompatible with fast drawing if (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] != 0) { drawPassCtx.endDrawPass(); - return cmd; + return; } // check for other special states? - while (cmd < cmdEnd) + while (true) { - LatteCMDPtr cmdBeforeCommand = cmd; - uint32 itHeader = LatteReadCMD(); - uint32 itHeaderType = (itHeader >> 30) & 3; - if (itHeaderType == 3) + LatteCMDPtr cmd, cmdStart, cmdEnd; + if (!drawPassCtx.PopCurrentCommandQueuePos(cmd, cmdStart, cmdEnd)) { - uint32 itCode = (itHeader >> 8) & 0xFF; - uint32 nWords = ((itHeader >> 16) & 0x3FFF) + 1; - switch (itCode) - { - case IT_SET_RESOURCE: // attribute buffers, uniform buffers or texture units - { - cmd = LatteCP_itSetRegistersGeneric(cmd, nWords, [&drawPassCtx](uint32 registerStart, uint32 registerEnd) - { - if (registerStart >= Latte::REGADDR::SQ_TEX_RESOURCE_WORD_FIRST && registerStart <= Latte::REGADDR::SQ_TEX_RESOURCE_WORD_LAST) - drawPassCtx.endDrawPass(); // texture updates end the current draw sequence - else if (registerStart >= mmSQ_VTX_ATTRIBUTE_BLOCK_START && registerEnd <= mmSQ_VTX_ATTRIBUTE_BLOCK_END) - drawPassCtx.notifyModifiedVertexBuffer(); - else - drawPassCtx.notifyModifiedUniformBuffer(); - }); - if (!drawPassCtx.isWithinDrawPass()) - return cmd; - break; - } - case IT_SET_ALU_CONST: // uniform register - { - cmd = LatteCP_itSetRegistersGeneric(cmd, nWords); - break; - } - case IT_SET_CTL_CONST: - { - cmd = LatteCP_itSetRegistersGeneric(cmd, nWords); - break; - } - case IT_SET_CONFIG_REG: - { - cmd = LatteCP_itSetRegistersGeneric(cmd, nWords); - break; - } - case IT_INDEX_TYPE: - { - cmd = LatteCP_itIndexType(cmd, nWords); - break; - } - case IT_NUM_INSTANCES: - { - cmd = LatteCP_itNumInstances(cmd, nWords); - break; - } - case IT_DRAW_INDEX_2: - { -#ifdef FAST_DRAW_LOGGING - if(GetAsyncKeyState('A')) - forceLogRemoveMe_printf("Minimal draw"); -#endif - cmd = LatteCP_itDrawIndex2(cmd, nWords, drawPassCtx); - break; - } - case IT_SET_CONTEXT_REG: - { -#ifdef FAST_DRAW_LOGGING - if (GetAsyncKeyState('A')) - forceLogRemoveMe_printf("[FAST-DRAW] Quit due to command IT_SET_CONTEXT_REG Reg: %04x", (uint32)cmd[0] + 0xA000); -#endif - drawPassCtx.endDrawPass(); - return cmdBeforeCommand; - } - case IT_INDIRECT_BUFFER_PRIV: - { - cmd = LatteCP_itIndirectBuffer(cmd, nWords, drawPassCtx); - if (!drawPassCtx.isWithinDrawPass()) - return cmd; - break; - } - default: -#ifdef FAST_DRAW_LOGGING - if (GetAsyncKeyState('A')) - forceLogRemoveMe_printf("[FAST-DRAW] Quit due to command itCode 0x%02x", itCode); -#endif - drawPassCtx.endDrawPass(); - return cmdBeforeCommand; - } - } - else if (itHeaderType == 2) - { - // filler packet - } - else - { -#ifdef FAST_DRAW_LOGGING - if (GetAsyncKeyState('A')) - forceLogRemoveMe_printf("[FAST-DRAW] Quit due to unsupported headerType 0x%02x", itHeaderType); -#endif drawPassCtx.endDrawPass(); - return cmdBeforeCommand; - } - } - cemu_assert_debug(drawPassCtx.isWithinDrawPass()); - return cmd; -} - -void LatteCP_processCommandBuffer(uint8* cmdBuffer, sint32 cmdSize, DrawPassContext& drawPassCtx) -{ - LatteCMDPtr cmd = (LatteCMDPtr)cmdBuffer; - LatteCMDPtr cmdStart = (LatteCMDPtr)cmdBuffer; - LatteCMDPtr cmdEnd = (LatteCMDPtr)(cmdBuffer + cmdSize); - - if (drawPassCtx.isWithinDrawPass()) - { - cmd = LatteCP_processCommandBuffer_continuousDrawPass(cmd, cmdStart, cmdEnd, drawPassCtx); - cemu_assert_debug(cmd <= cmdEnd); - if (cmd == cmdEnd) return; - cemu_assert_debug(!drawPassCtx.isWithinDrawPass()); - } + } - while (cmd < cmdEnd) - { - uint32 itHeader = LatteReadCMD(); - uint32 itHeaderType = (itHeader >> 30) & 3; - if (itHeaderType == 3) + while (cmd < cmdEnd) { - uint32 itCode = (itHeader >> 8) & 0xFF; - uint32 nWords = ((itHeader >> 16) & 0x3FFF) + 1; -#ifndef PUBLIC_RELEASE - LatteCMDPtr expectedPostCmd = cmd + nWords; -#endif - switch (itCode) + LatteCMDPtr cmdBeforeCommand = cmd; + uint32 itHeader = LatteReadCMD(); + uint32 itHeaderType = (itHeader >> 30) & 3; + if (itHeaderType == 3) { - case IT_SET_CONTEXT_REG: - { - cmd = LatteCP_itSetRegistersGeneric(cmd, nWords); - } - break; - case IT_SET_RESOURCE: - { - cmd = LatteCP_itSetRegistersGeneric(cmd, nWords); - } - break; - case IT_SET_ALU_CONST: - { - cmd = LatteCP_itSetRegistersGeneric(cmd, nWords); - } - break; - case IT_SET_CTL_CONST: - { - cmd = LatteCP_itSetRegistersGeneric(cmd, nWords); - } - break; - case IT_SET_SAMPLER: - { - cmd = LatteCP_itSetRegistersGeneric(cmd, nWords); - } - break; - case IT_SET_CONFIG_REG: - { - cmd = LatteCP_itSetRegistersGeneric(cmd, nWords); - } - break; - case IT_SET_LOOP_CONST: - { - LatteSkipCMD(nWords); - // todo - } - break; - case IT_SURFACE_SYNC: - { - cmd = LatteCP_itSurfaceSync(cmd); - } - break; - case IT_INDIRECT_BUFFER_PRIV: - { - cmd = LatteCP_itIndirectBuffer(cmd, nWords, drawPassCtx); - if (drawPassCtx.isWithinDrawPass()) + uint32 itCode = (itHeader >> 8) & 0xFF; + uint32 nWords = ((itHeader >> 16) & 0x3FFF) + 1; + LatteCMDPtr cmdData = cmd; + cmd += nWords; + switch (itCode) { - cmd = LatteCP_processCommandBuffer_continuousDrawPass(cmd, cmdStart, cmdEnd, drawPassCtx); - cemu_assert_debug(cmd <= cmdEnd); - if (cmd == cmdEnd) + case IT_SET_RESOURCE: // attribute buffers, uniform buffers or texture units + { + LatteCP_itSetRegistersGeneric(cmdData, nWords, [&drawPassCtx](uint32 registerStart, uint32 registerEnd) + { + if ((registerStart >= Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_PS && registerStart < (Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_PS + Latte::GPU_LIMITS::NUM_TEXTURES_PER_STAGE * 7)) || + (registerStart >= Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_VS && registerStart < (Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_VS + Latte::GPU_LIMITS::NUM_TEXTURES_PER_STAGE * 7)) || + (registerStart >= Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_GS && registerStart < (Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_GS + Latte::GPU_LIMITS::NUM_TEXTURES_PER_STAGE * 7))) + drawPassCtx.endDrawPass(); // texture updates end the current draw sequence + else if (registerStart >= mmSQ_VTX_ATTRIBUTE_BLOCK_START && registerEnd <= mmSQ_VTX_ATTRIBUTE_BLOCK_END) + drawPassCtx.notifyModifiedVertexBuffer(); + else + drawPassCtx.notifyModifiedUniformBuffer(); + }); + if (!drawPassCtx.isWithinDrawPass()) + { + drawPassCtx.PushCurrentCommandQueuePos(cmd, cmdStart, cmdEnd); return; - cemu_assert_debug(!drawPassCtx.isWithinDrawPass()); + } + break; + } + case IT_SET_ALU_CONST: // uniform register + { + LatteCP_itSetRegistersGeneric(cmdData, nWords); + break; + } + case IT_SET_CTL_CONST: + { + LatteCP_itSetRegistersGeneric(cmdData, nWords); + break; + } + case IT_SET_CONFIG_REG: + { + LatteCP_itSetRegistersGeneric(cmdData, nWords); + break; + } + case IT_INDEX_TYPE: + { + LatteCP_itIndexType(cmdData, nWords); + break; + } + case IT_NUM_INSTANCES: + { + LatteCP_itNumInstances(cmdData, nWords); + break; + } + case IT_DRAW_INDEX_2: + { + LatteCP_itDrawIndex2(cmdData, nWords, drawPassCtx); + break; + } + case IT_SET_CONTEXT_REG: + { + drawPassCtx.endDrawPass(); + drawPassCtx.PushCurrentCommandQueuePos(cmdBeforeCommand, cmdStart, cmdEnd); + return; + } + case IT_INDIRECT_BUFFER_PRIV: + { + drawPassCtx.PushCurrentCommandQueuePos(cmd, cmdStart, cmdEnd); + LatteCP_itIndirectBuffer(cmdData, nWords, drawPassCtx); + if (!drawPassCtx.PopCurrentCommandQueuePos(cmd, cmdStart, cmdEnd)) // switch to sub buffer + cemu_assert_debug(false); + + //if (!drawPassCtx.isWithinDrawPass()) + // return cmdData; + break; + } + default: + // unsupported command for fast draw + drawPassCtx.endDrawPass(); + drawPassCtx.PushCurrentCommandQueuePos(cmdBeforeCommand, cmdStart, cmdEnd); + return; } -#ifndef PUBLIC_RELEASE - expectedPostCmd = cmd; -#endif } - break; - case IT_STRMOUT_BUFFER_UPDATE: + else if (itHeaderType == 2) { - cmd = LatteCP_itStreamoutBufferUpdate(cmd, nWords); - } - break; - case IT_INDEX_TYPE: - { - cmd = LatteCP_itIndexType(cmd, nWords); - } - break; - case IT_NUM_INSTANCES: - { - cmd = LatteCP_itNumInstances(cmd, nWords); - } - break; - case IT_DRAW_INDEX_2: - { - drawPassCtx.beginDrawPass(); -#ifdef FAST_DRAW_LOGGING - if (GetAsyncKeyState('A')) - forceLogRemoveMe_printf("[FAST-DRAW] Starting"); -#endif - cmd = LatteCP_itDrawIndex2(cmd, nWords, drawPassCtx); - cmd = LatteCP_processCommandBuffer_continuousDrawPass(cmd, cmdStart, cmdEnd, drawPassCtx); - cemu_assert_debug(cmd == cmdEnd || drawPassCtx.isWithinDrawPass() == false); // draw sequence should have ended if we didn't reach the end of the command buffer -#ifndef PUBLIC_RELEASE - expectedPostCmd = cmd; -#endif - } - break; - case IT_DRAW_INDEX_AUTO: - { - drawPassCtx.beginDrawPass(); - cmd = LatteCP_itDrawIndexAuto(cmd, nWords, drawPassCtx); - cmd = LatteCP_processCommandBuffer_continuousDrawPass(cmd, cmdStart, cmdEnd, drawPassCtx); - cemu_assert_debug(cmd == cmdEnd || drawPassCtx.isWithinDrawPass() == false); // draw sequence should have ended if we didn't reach the end of the command buffer -#ifndef PUBLIC_RELEASE - expectedPostCmd = cmd; -#endif -#ifdef FAST_DRAW_LOGGING - if (GetAsyncKeyState('A')) - forceLogRemoveMe_printf("[FAST-DRAW] Auto-draw"); -#endif - } - break; - case IT_DRAW_INDEX_IMMD: - { - DrawPassContext drawPassCtx; - drawPassCtx.beginDrawPass(); - cmd = LatteCP_itDrawImmediate(cmd, nWords, drawPassCtx); - drawPassCtx.endDrawPass(); - break; - } - case IT_WAIT_REG_MEM: - { - cmd = LatteCP_itWaitRegMem(cmd, nWords); - LatteTiming_HandleTimedVsync(); - LatteAsyncCommands_checkAndExecute(); - } - break; - case IT_MEM_WRITE: - { - cmd = LatteCP_itMemWrite(cmd, nWords); - } - break; - case IT_CONTEXT_CONTROL: - { - cmd = LatteCP_itContextControl(cmd, nWords); - } - break; - case IT_MEM_SEMAPHORE: - { - cmd = LatteCP_itMemSemaphore(cmd, nWords); - } - break; - case IT_LOAD_CONFIG_REG: - { - cmd = LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_CONFIG); - } - break; - case IT_LOAD_CONTEXT_REG: - { - cmd = LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_CONTEXT); - } - break; - case IT_LOAD_ALU_CONST: - { - cmd = LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_ALU_CONST); - } - break; - case IT_LOAD_LOOP_CONST: - { - cmd = LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_LOOP_CONST); - } - break; - case IT_LOAD_RESOURCE: - { - cmd = LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_RESOURCE); - } - break; - case IT_LOAD_SAMPLER: - { - cmd = LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_SAMPLER); - } - break; - case IT_SET_PREDICATION: - { - cmd = LatteCP_itSetPredication(cmd, nWords); - } - break; - case IT_HLE_COPY_COLORBUFFER_TO_SCANBUFFER: - { - cmd = LatteCP_itHLECopyColorBufferToScanBuffer(cmd, nWords); - } - break; - case IT_HLE_TRIGGER_SCANBUFFER_SWAP: - { - cmd = LatteCP_itHLESwapScanBuffer(cmd, nWords); - } - break; - case IT_HLE_WAIT_FOR_FLIP: - { - cmd = LatteCP_itHLEWaitForFlip(cmd, nWords); - } - break; - case IT_HLE_REQUEST_SWAP_BUFFERS: - { - cmd = LatteCP_itHLERequestSwapBuffers(cmd, nWords); - } - break; - case IT_HLE_CLEAR_COLOR_DEPTH_STENCIL: - { - cmd = LatteCP_itHLEClearColorDepthStencil(cmd, nWords); - } - break; - case IT_HLE_COPY_SURFACE_NEW: - { - cmd = LatteCP_itHLECopySurfaceNew(cmd, nWords); - } - break; - case IT_HLE_SAMPLE_TIMER: - { - cmd = LatteCP_itHLESampleTimer(cmd, nWords); - } - break; - case IT_HLE_SPECIAL_STATE: - { - cmd = LatteCP_itHLESpecialState(cmd, nWords); - } - break; - case IT_HLE_BEGIN_OCCLUSION_QUERY: - { - cmd = LatteCP_itHLEBeginOcclusionQuery(cmd, nWords); - } - break; - case IT_HLE_END_OCCLUSION_QUERY: - { - cmd = LatteCP_itHLEEndOcclusionQuery(cmd, nWords); - } - break; - case IT_HLE_SET_CB_RETIREMENT_TIMESTAMP: - { - cmd = LatteCP_itHLESetRetirementTimestamp(cmd, nWords); - } - break; - case IT_HLE_BOTTOM_OF_PIPE_CB: - { - cmd = LatteCP_itHLEBottomOfPipeCB(cmd, nWords); - } - break; - case IT_HLE_SYNC_ASYNC_OPERATIONS: - { - LatteSkipCMD(nWords); - LatteTextureReadback_UpdateFinishedTransfers(true); - LatteQuery_UpdateFinishedQueriesForceFinishAll(); - } - break; - default: - debug_printf("Unhandled IT %02x\n", itCode); - cemu_assert_debug(false); - LatteSkipCMD(nWords); - } -#ifndef PUBLIC_RELEASE - if(cmd != expectedPostCmd) - debug_printf("cmd %016p expectedPostCmd %016p\n", cmd, expectedPostCmd); - cemu_assert_debug(cmd == expectedPostCmd); -#endif - } - else if (itHeaderType == 2) - { - // filler packet - // has no body - } - else if (itHeaderType == 0) - { - uint32 registerBase = (itHeader & 0xFFFF); - uint32 registerCount = ((itHeader >> 16) & 0x3FFF) + 1; - if (registerBase == 0x304A) - { - GX2::__GX2NotifyEvent(GX2::GX2CallbackEventType::TIMESTAMP_TOP); - LatteSkipCMD(registerCount); - } - else if (registerBase == 0x304B) - { - LatteSkipCMD(registerCount); + // filler packet } else { + // unsupported command for fast draw + drawPassCtx.endDrawPass(); + drawPassCtx.PushCurrentCommandQueuePos(cmdBeforeCommand, cmdStart, cmdEnd); + return; + } + } + } + if (drawPassCtx.isWithinDrawPass()) + drawPassCtx.endDrawPass(); +} + +void LatteCP_processCommandBuffer(DrawPassContext& drawPassCtx) +{ + while (true) + { + LatteCMDPtr cmd, cmdStart, cmdEnd; + if (!drawPassCtx.PopCurrentCommandQueuePos(cmd, cmdStart, cmdEnd)) + break; + uint32 itHeader; + while (cmd < cmdEnd) + { + itHeader = LatteReadCMD(); + uint32 itHeaderType = (itHeader >> 30) & 3; + if (itHeaderType == 3) + { + uint32 itCode = (itHeader >> 8) & 0xFF; + uint32 nWords = ((itHeader >> 16) & 0x3FFF) + 1; + LatteCMDPtr cmdData = cmd; + cmd += nWords; + switch (itCode) + { + case IT_SET_CONTEXT_REG: + { + LatteCP_itSetRegistersGeneric(cmdData, nWords); + } + break; + case IT_SET_RESOURCE: + { + LatteCP_itSetRegistersGeneric(cmdData, nWords); + } + break; + case IT_SET_ALU_CONST: + { + LatteCP_itSetRegistersGeneric(cmdData, nWords); + } + break; + case IT_SET_CTL_CONST: + { + LatteCP_itSetRegistersGeneric(cmdData, nWords); + } + break; + case IT_SET_SAMPLER: + { + LatteCP_itSetRegistersGeneric(cmdData, nWords); + } + break; + case IT_SET_CONFIG_REG: + { + LatteCP_itSetRegistersGeneric(cmdData, nWords); + } + break; + case IT_SET_LOOP_CONST: + { + // todo + } + break; + case IT_SURFACE_SYNC: + { + LatteCP_itSurfaceSync(cmdData); + } + break; + case IT_INDIRECT_BUFFER_PRIV: + { + drawPassCtx.PushCurrentCommandQueuePos(cmd, cmdStart, cmdEnd); + LatteCP_itIndirectBuffer(cmdData, nWords, drawPassCtx); + if (!drawPassCtx.PopCurrentCommandQueuePos(cmd, cmdStart, cmdEnd)) // switch to sub buffer + cemu_assert_debug(false); + } + break; + case IT_STRMOUT_BUFFER_UPDATE: + { + LatteCP_itStreamoutBufferUpdate(cmdData, nWords); + } + break; + case IT_INDEX_TYPE: + { + LatteCP_itIndexType(cmdData, nWords); + } + break; + case IT_NUM_INSTANCES: + { + LatteCP_itNumInstances(cmdData, nWords); + } + break; + case IT_DRAW_INDEX_2: + { + drawPassCtx.beginDrawPass(); + LatteCP_itDrawIndex2(cmdData, nWords, drawPassCtx); + // enter fast draw mode + drawPassCtx.PushCurrentCommandQueuePos(cmd, cmdStart, cmdEnd); + LatteCP_processCommandBuffer_continuousDrawPass(drawPassCtx); + cemu_assert_debug(!drawPassCtx.isWithinDrawPass()); + if (!drawPassCtx.PopCurrentCommandQueuePos(cmd, cmdStart, cmdEnd)) + return; + } + break; + case IT_DRAW_INDEX_AUTO: + { + drawPassCtx.beginDrawPass(); + LatteCP_itDrawIndexAuto(cmdData, nWords, drawPassCtx); + // enter fast draw mode + drawPassCtx.PushCurrentCommandQueuePos(cmd, cmdStart, cmdEnd); + LatteCP_processCommandBuffer_continuousDrawPass(drawPassCtx); + cemu_assert_debug(!drawPassCtx.isWithinDrawPass()); + if (!drawPassCtx.PopCurrentCommandQueuePos(cmd, cmdStart, cmdEnd)) + return; + } + break; + case IT_DRAW_INDEX_IMMD: + { + DrawPassContext drawPassCtx; + drawPassCtx.beginDrawPass(); + LatteCP_itDrawImmediate(cmdData, nWords, drawPassCtx); + drawPassCtx.endDrawPass(); + break; + } + case IT_WAIT_REG_MEM: + { + LatteCP_itWaitRegMem(cmdData, nWords); + LatteTiming_HandleTimedVsync(); + LatteAsyncCommands_checkAndExecute(); + break; + } + case IT_MEM_WRITE: + { + LatteCP_itMemWrite(cmdData, nWords); + break; + } + case IT_CONTEXT_CONTROL: + { + LatteCP_itContextControl(cmdData, nWords); + break; + } + case IT_MEM_SEMAPHORE: + { + LatteCP_itMemSemaphore(cmdData, nWords); + break; + } + case IT_LOAD_CONFIG_REG: + { + LatteCP_itLoadReg(cmdData, nWords, LATTE_REG_BASE_CONFIG); + break; + } + case IT_LOAD_CONTEXT_REG: + { + LatteCP_itLoadReg(cmdData, nWords, LATTE_REG_BASE_CONTEXT); + break; + } + case IT_LOAD_ALU_CONST: + { + LatteCP_itLoadReg(cmdData, nWords, LATTE_REG_BASE_ALU_CONST); + break; + } + case IT_LOAD_LOOP_CONST: + { + LatteCP_itLoadReg(cmdData, nWords, LATTE_REG_BASE_LOOP_CONST); + break; + } + case IT_LOAD_RESOURCE: + { + LatteCP_itLoadReg(cmdData, nWords, LATTE_REG_BASE_RESOURCE); + break; + } + case IT_LOAD_SAMPLER: + { + LatteCP_itLoadReg(cmdData, nWords, LATTE_REG_BASE_SAMPLER); + break; + } + case IT_SET_PREDICATION: + { + LatteCP_itSetPredication(cmdData, nWords); + break; + } + case IT_HLE_COPY_COLORBUFFER_TO_SCANBUFFER: + { + LatteCP_itHLECopyColorBufferToScanBuffer(cmdData, nWords); + break; + } + case IT_HLE_TRIGGER_SCANBUFFER_SWAP: + { + LatteCP_signalEnterWait(); + LatteCP_itHLESwapScanBuffer(cmdData, nWords); + break; + } + case IT_HLE_WAIT_FOR_FLIP: + { + LatteCP_signalEnterWait(); + LatteCP_itHLEWaitForFlip(cmdData, nWords); + break; + } + case IT_HLE_REQUEST_SWAP_BUFFERS: + { + LatteCP_itHLERequestSwapBuffers(cmdData, nWords); + break; + } + case IT_HLE_CLEAR_COLOR_DEPTH_STENCIL: + { + LatteCP_itHLEClearColorDepthStencil(cmdData, nWords); + break; + } + case IT_HLE_COPY_SURFACE_NEW: + { + LatteCP_itHLECopySurfaceNew(cmdData, nWords); + break; + } + case IT_HLE_SAMPLE_TIMER: + { + LatteCP_itHLESampleTimer(cmdData, nWords); + break; + } + case IT_HLE_SPECIAL_STATE: + { + LatteCP_itHLESpecialState(cmdData, nWords); + break; + } + case IT_HLE_BEGIN_OCCLUSION_QUERY: + { + LatteCP_itHLEBeginOcclusionQuery(cmdData, nWords); + break; + } + case IT_HLE_END_OCCLUSION_QUERY: + { + LatteCP_itHLEEndOcclusionQuery(cmdData, nWords); + break; + } + case IT_HLE_BOTTOM_OF_PIPE_CB: + { + LatteCP_itHLEBottomOfPipeCB(cmdData, nWords); + break; + } + case IT_HLE_SYNC_ASYNC_OPERATIONS: + { + LatteTextureReadback_UpdateFinishedTransfers(true); + LatteQuery_UpdateFinishedQueriesForceFinishAll(); + break; + } + default: + debug_printf("Unhandled IT %02x\n", itCode); + cemu_assert_debug(false); + break; + } + } + else if (itHeaderType == 2) + { + // filler packet + // has no body + } + else if (itHeaderType == 0) + { + uint32 registerBase = (itHeader & 0xFFFF); + uint32 registerCount = ((itHeader >> 16) & 0x3FFF) + 1; + if (registerBase == 0x304A) + { + GX2::__GX2NotifyEvent(GX2::GX2CallbackEventType::TIMESTAMP_TOP); + LatteSkipCMD(registerCount); + } + else if (registerBase == 0x304B) + { + LatteSkipCMD(registerCount); + } + else + { + LatteCP_dumpCommandBufferError(cmdStart, cmdEnd, cmd); + cemu_assert_debug(false); + } + } + else + { + debug_printf("invalid itHeaderType %08x\n", itHeaderType); LatteCP_dumpCommandBufferError(cmdStart, cmdEnd, cmd); cemu_assert_debug(false); } } - else - { - debug_printf("invalid itHeaderType %08x\n", itHeaderType); - LatteCP_dumpCommandBufferError(cmdStart, cmdEnd, cmd); - cemu_assert_debug(false); - } + cemu_assert_debug(cmd == cmdEnd); } - cemu_assert_debug(cmd == cmdEnd); } void LatteCP_ProcessRingbuffer() { - sint32 timerRecheck = 0; // estimates how much CP processing time passed based on the executed commands, if the value exceeds CP_TIMER_RECHECK then _handleTimers() is called + sint32 timerRecheck = 0; // estimates how much CP processing time has elapsed based on the executed commands, if the value exceeds CP_TIMER_RECHECK then _handleTimers() is called + uint32be tmpBuffer[128]; while (true) { uint32 itHeader = LatteCP_readU32Deprc(); @@ -1364,82 +1383,78 @@ void LatteCP_ProcessRingbuffer() { uint32 itCode = (itHeader >> 8) & 0xFF; uint32 nWords = ((itHeader >> 16) & 0x3FFF) + 1; - LatteCP_waitForNWords(nWords); - LatteCMDPtr cmd = (LatteCMDPtr)gxRingBufferReadPtr; - uint8* expectedGxRingBufferReadPtr = gxRingBufferReadPtr + nWords*4; + cemu_assert(nWords < 128); + for (sint32 i=0; i(cmd, nWords); + LatteCP_itSetRegistersGeneric(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 512; } break; case IT_SET_RESOURCE: { - gxRingBufferReadPtr = (uint8*)LatteCP_itSetRegistersGeneric(cmd, nWords); + LatteCP_itSetRegistersGeneric(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 512; } break; case IT_SET_ALU_CONST: { - gxRingBufferReadPtr = (uint8*)LatteCP_itSetRegistersGeneric(cmd, nWords); + LatteCP_itSetRegistersGeneric(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 512; break; } case IT_SET_CTL_CONST: { - gxRingBufferReadPtr = (uint8*)LatteCP_itSetRegistersGeneric(cmd, nWords); + LatteCP_itSetRegistersGeneric(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 512; break; } case IT_SET_SAMPLER: { - gxRingBufferReadPtr = (uint8*)LatteCP_itSetRegistersGeneric(cmd, nWords); + LatteCP_itSetRegistersGeneric(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 512; break; } case IT_SET_CONFIG_REG: { - gxRingBufferReadPtr = (uint8*)LatteCP_itSetRegistersGeneric(cmd, nWords); + LatteCP_itSetRegistersGeneric(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 512; break; } case IT_INDIRECT_BUFFER_PRIV: { -#ifdef FAST_DRAW_LOGGING - if (GetAsyncKeyState('A')) - forceLogRemoveMe_printf("[FAST-DRAW] BEGIN CMD BUFFER"); -#endif - LatteCP_itIndirectBufferDepr(nWords); + LatteCP_itIndirectBufferDepr(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 512; -#ifdef FAST_DRAW_LOGGING - if (GetAsyncKeyState('A')) - forceLogRemoveMe_printf("[FAST-DRAW] END CMD BUFFER"); -#endif break; } case IT_STRMOUT_BUFFER_UPDATE: { - gxRingBufferReadPtr = (uint8*)LatteCP_itStreamoutBufferUpdate(cmd, nWords); + LatteCP_itStreamoutBufferUpdate(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 512; break; } case IT_INDEX_TYPE: { - gxRingBufferReadPtr = (uint8*)LatteCP_itIndexType(cmd, nWords); + LatteCP_itIndexType(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 1024; break; } case IT_NUM_INSTANCES: { - gxRingBufferReadPtr = (uint8*)LatteCP_itNumInstances(cmd, nWords); + LatteCP_itNumInstances(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 1024; break; } @@ -1447,7 +1462,7 @@ void LatteCP_ProcessRingbuffer() { DrawPassContext drawPassCtx; drawPassCtx.beginDrawPass(); - gxRingBufferReadPtr = (uint8*)LatteCP_itDrawIndex2(cmd, nWords, drawPassCtx); + LatteCP_itDrawIndex2(cmd, nWords, drawPassCtx); drawPassCtx.endDrawPass(); timerRecheck += CP_TIMER_RECHECK / 64; break; @@ -1456,7 +1471,7 @@ void LatteCP_ProcessRingbuffer() { DrawPassContext drawPassCtx; drawPassCtx.beginDrawPass(); - gxRingBufferReadPtr = (uint8*)LatteCP_itDrawIndexAuto(cmd, nWords, drawPassCtx); + LatteCP_itDrawIndexAuto(cmd, nWords, drawPassCtx); drawPassCtx.endDrawPass(); timerRecheck += CP_TIMER_RECHECK / 512; break; @@ -1465,165 +1480,157 @@ void LatteCP_ProcessRingbuffer() { DrawPassContext drawPassCtx; drawPassCtx.beginDrawPass(); - gxRingBufferReadPtr = (uint8*)LatteCP_itDrawImmediate(cmd, nWords, drawPassCtx); + LatteCP_itDrawImmediate(cmd, nWords, drawPassCtx); drawPassCtx.endDrawPass(); timerRecheck += CP_TIMER_RECHECK / 64; break; } case IT_WAIT_REG_MEM: { - gxRingBufferReadPtr = (uint8*)LatteCP_itWaitRegMem(cmd, nWords); + LatteCP_itWaitRegMem(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 16; break; } case IT_MEM_WRITE: { - gxRingBufferReadPtr = (uint8*)LatteCP_itMemWrite(cmd, nWords); + LatteCP_itMemWrite(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 128; break; } case IT_CONTEXT_CONTROL: { - gxRingBufferReadPtr = (uint8*)LatteCP_itContextControl(cmd, nWords); + LatteCP_itContextControl(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 128; break; } case IT_MEM_SEMAPHORE: { - gxRingBufferReadPtr = (uint8*)LatteCP_itMemSemaphore(cmd, nWords); + LatteCP_itMemSemaphore(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 128; break; } case IT_LOAD_CONFIG_REG: { - gxRingBufferReadPtr = (uint8*)LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_CONFIG); + LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_CONFIG); timerRecheck += CP_TIMER_RECHECK / 64; break; } case IT_LOAD_CONTEXT_REG: { - gxRingBufferReadPtr = (uint8*)LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_CONTEXT); + LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_CONTEXT); timerRecheck += CP_TIMER_RECHECK / 64; break; } case IT_LOAD_ALU_CONST: { - gxRingBufferReadPtr = (uint8*)LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_ALU_CONST); + LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_ALU_CONST); timerRecheck += CP_TIMER_RECHECK / 64; break; } case IT_LOAD_LOOP_CONST: { - gxRingBufferReadPtr = (uint8*)LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_LOOP_CONST); + LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_LOOP_CONST); timerRecheck += CP_TIMER_RECHECK / 64; break; } case IT_LOAD_RESOURCE: { - gxRingBufferReadPtr = (uint8*)LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_RESOURCE); + LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_RESOURCE); timerRecheck += CP_TIMER_RECHECK / 64; break; } case IT_LOAD_SAMPLER: { - gxRingBufferReadPtr = (uint8*)LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_SAMPLER); + LatteCP_itLoadReg(cmd, nWords, LATTE_REG_BASE_SAMPLER); timerRecheck += CP_TIMER_RECHECK / 64; break; } case IT_SET_LOOP_CONST: { - LatteSkipCMD(nWords); - gxRingBufferReadPtr = (uint8*)cmd; // todo break; } case IT_SET_PREDICATION: { - gxRingBufferReadPtr = (uint8*)LatteCP_itSetPredication(cmd, nWords); + LatteCP_itSetPredication(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 512; break; } + case IT_EVENT_WRITE_EOP: + { + LatteCP_itEventWriteEOP(cmd, nWords); + break; + } case IT_HLE_COPY_COLORBUFFER_TO_SCANBUFFER: { - gxRingBufferReadPtr = (uint8*)LatteCP_itHLECopyColorBufferToScanBuffer(cmd, nWords); + LatteCP_itHLECopyColorBufferToScanBuffer(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 64; break; } case IT_HLE_TRIGGER_SCANBUFFER_SWAP: { - gxRingBufferReadPtr = (uint8*)LatteCP_itHLESwapScanBuffer(cmd, nWords); + LatteCP_signalEnterWait(); + LatteCP_itHLESwapScanBuffer(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 64; break; } case IT_HLE_WAIT_FOR_FLIP: { - gxRingBufferReadPtr = (uint8*)LatteCP_itHLEWaitForFlip(cmd, nWords); + LatteCP_signalEnterWait(); + LatteCP_itHLEWaitForFlip(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 1; break; } case IT_HLE_REQUEST_SWAP_BUFFERS: { - gxRingBufferReadPtr = (uint8*)LatteCP_itHLERequestSwapBuffers(cmd, nWords); + LatteCP_itHLERequestSwapBuffers(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 32; break; } case IT_HLE_CLEAR_COLOR_DEPTH_STENCIL: { - gxRingBufferReadPtr = (uint8*)LatteCP_itHLEClearColorDepthStencil(cmd, nWords); + LatteCP_itHLEClearColorDepthStencil(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 128; break; } case IT_HLE_COPY_SURFACE_NEW: { - gxRingBufferReadPtr = (uint8*)LatteCP_itHLECopySurfaceNew(cmd, nWords); + LatteCP_itHLECopySurfaceNew(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 128; break; } - case IT_HLE_FIFO_WRAP_AROUND: - { - gxRingBufferReadPtr = (uint8*)LatteCP_itHLEFifoWrapAround(cmd, nWords); - expectedGxRingBufferReadPtr = gxRingBufferReadPtr; - timerRecheck += CP_TIMER_RECHECK / 512; - break; - } case IT_HLE_SAMPLE_TIMER: { - gxRingBufferReadPtr = (uint8*)LatteCP_itHLESampleTimer(cmd, nWords); + LatteCP_itHLESampleTimer(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 512; break; } case IT_HLE_SPECIAL_STATE: { - gxRingBufferReadPtr = (uint8*)LatteCP_itHLESpecialState(cmd, nWords); + LatteCP_itHLESpecialState(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 512; break; } case IT_HLE_BEGIN_OCCLUSION_QUERY: { - gxRingBufferReadPtr = (uint8*)LatteCP_itHLEBeginOcclusionQuery(cmd, nWords); + LatteCP_itHLEBeginOcclusionQuery(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 512; break; } case IT_HLE_END_OCCLUSION_QUERY: { - gxRingBufferReadPtr = (uint8*)LatteCP_itHLEEndOcclusionQuery(cmd, nWords); - timerRecheck += CP_TIMER_RECHECK / 512; - break; - } - case IT_HLE_SET_CB_RETIREMENT_TIMESTAMP: - { - gxRingBufferReadPtr = (uint8*)LatteCP_itHLESetRetirementTimestamp(cmd, nWords); + LatteCP_itHLEEndOcclusionQuery(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 512; break; } case IT_HLE_BOTTOM_OF_PIPE_CB: { - gxRingBufferReadPtr = (uint8*)LatteCP_itHLEBottomOfPipeCB(cmd, nWords); + LatteCP_itHLEBottomOfPipeCB(cmd, nWords); break; } case IT_HLE_SYNC_ASYNC_OPERATIONS: { - LatteCP_skipWords(nWords); + //LatteCP_skipWords(nWords); LatteTextureReadback_UpdateFinishedTransfers(true); LatteQuery_UpdateFinishedQueriesForceFinishAll(); break; @@ -1631,7 +1638,6 @@ void LatteCP_ProcessRingbuffer() default: cemu_assert_debug(false); } - cemu_assert_debug(expectedGxRingBufferReadPtr == gxRingBufferReadPtr); } else if (itHeaderType == 2) { @@ -1669,3 +1675,265 @@ void LatteCP_ProcessRingbuffer() } } } + +#ifdef LATTE_CP_LOGGING +void LatteCP_DebugPrintCmdBuffer(uint32be* bufferPtr, uint32 size) +{ + uint32be* bufferPtrInitial = bufferPtr; + uint32be* bufferPtrEnd = bufferPtr + (size/4); + while (bufferPtr < bufferPtrEnd) + { + std::string strPrefix = fmt::format("[PM4 Buf {:08x} Offs {:04x}]", MEMPTR(bufferPtr).GetMPTR(), (bufferPtr - bufferPtrInitial) * 4); + uint32 itHeader = *bufferPtr; + bufferPtr++; + uint32 itHeaderType = (itHeader >> 30) & 3; + if (itHeaderType == 3) + { + uint32 itCode = (itHeader >> 8) & 0xFF; + uint32 nWords = ((itHeader >> 16) & 0x3FFF) + 1; + uint32be* cmdData = bufferPtr; + bufferPtr += nWords; + switch (itCode) + { + case IT_SURFACE_SYNC: + { + cemuLog_log(LogType::Force, "{} IT_SURFACE_SYNC", strPrefix); + break; + } + case IT_SET_CONTEXT_REG: + { + std::string regVals; + for (uint32 i = 0; i < std::min(nWords - 1, 8); i++) + regVals.append(fmt::format("{:08x} ", cmdData[1 + i].value())); + cemuLog_log(LogType::Force, "{} IT_SET_CONTEXT_REG Reg {:04x} RegValues {}", strPrefix, cmdData[0].value(), regVals); + } + case IT_SET_RESOURCE: + { + std::string regVals; + for (uint32 i = 0; i < std::min(nWords - 1, 8); i++) + regVals.append(fmt::format("{:08x} ", cmdData[1+i].value())); + cemuLog_log(LogType::Force, "{} IT_SET_RESOURCE Reg {:04x} RegValues {}", strPrefix, cmdData[0].value(), regVals); + break; + } + case IT_SET_ALU_CONST: + { + cemuLog_log(LogType::Force, "{} IT_SET_ALU_CONST", strPrefix); + break; + } + case IT_SET_CTL_CONST: + { + cemuLog_log(LogType::Force, "{} IT_SET_CTL_CONST", strPrefix); + break; + } + case IT_SET_SAMPLER: + { + cemuLog_log(LogType::Force, "{} IT_SET_SAMPLER", strPrefix); + break; + } + case IT_SET_CONFIG_REG: + { + cemuLog_log(LogType::Force, "{} IT_SET_CONFIG_REG", strPrefix); + break; + } + case IT_INDIRECT_BUFFER_PRIV: + { + if (nWords != 3) + { + cemuLog_log(LogType::Force, "{} IT_INDIRECT_BUFFER_PRIV (malformed!)", strPrefix); + } + else + { + uint32 physicalAddress = cmdData[0]; + uint32 physicalAddressHigh = cmdData[1]; + uint32 sizeInDWords = cmdData[2]; + cemuLog_log(LogType::Force, "{} IT_INDIRECT_BUFFER_PRIV Addr {:08x} Size {:08x}", strPrefix, physicalAddress, sizeInDWords*4); + LatteCP_DebugPrintCmdBuffer(MEMPTR(physicalAddress), sizeInDWords * 4); + } + break; + } + case IT_STRMOUT_BUFFER_UPDATE: + { + cemuLog_log(LogType::Force, "{} IT_STRMOUT_BUFFER_UPDATE", strPrefix); + break; + } + case IT_INDEX_TYPE: + { + cemuLog_log(LogType::Force, "{} IT_INDEX_TYPE", strPrefix); + break; + } + case IT_NUM_INSTANCES: + { + cemuLog_log(LogType::Force, "{} IT_NUM_INSTANCES", strPrefix); + break; + } + case IT_DRAW_INDEX_2: + { + if (nWords != 5) + { + cemuLog_log(LogType::Force, "{} IT_DRAW_INDEX_2 (malformed!)", strPrefix); + } + else + { + uint32 ukn1 = cmdData[0]; + MPTR physIndices = cmdData[1]; + uint32 ukn2 = cmdData[2]; + uint32 count = cmdData[3]; + uint32 ukn3 = cmdData[4]; + cemuLog_log(LogType::Force, "{} IT_DRAW_INDEX_2 | Count {}", strPrefix, count); + } + break; + } + case IT_DRAW_INDEX_AUTO: + { + cemuLog_log(LogType::Force, "{} IT_DRAW_INDEX_AUTO", strPrefix); + break; + } + case IT_DRAW_INDEX_IMMD: + { + cemuLog_log(LogType::Force, "{} IT_DRAW_INDEX_IMMD", strPrefix); + break; + } + case IT_WAIT_REG_MEM: + { + cemuLog_log(LogType::Force, "{} IT_WAIT_REG_MEM", strPrefix); + break; + } + case IT_MEM_WRITE: + { + cemuLog_log(LogType::Force, "{} IT_MEM_WRITE", strPrefix); + break; + } + case IT_CONTEXT_CONTROL: + { + cemuLog_log(LogType::Force, "{} IT_CONTEXT_CONTROL", strPrefix); + break; + } + case IT_MEM_SEMAPHORE: + { + cemuLog_log(LogType::Force, "{} IT_MEM_SEMAPHORE", strPrefix); + break; + } + case IT_LOAD_CONFIG_REG: + { + cemuLog_log(LogType::Force, "{} IT_LOAD_CONFIG_REG", strPrefix); + break; + } + case IT_LOAD_CONTEXT_REG: + { + cemuLog_log(LogType::Force, "{} IT_LOAD_CONTEXT_REG", strPrefix); + break; + } + case IT_LOAD_ALU_CONST: + { + cemuLog_log(LogType::Force, "{} IT_LOAD_ALU_CONST", strPrefix); + break; + } + case IT_LOAD_LOOP_CONST: + { + cemuLog_log(LogType::Force, "{} IT_LOAD_LOOP_CONST", strPrefix); + break; + } + case IT_LOAD_RESOURCE: + { + cemuLog_log(LogType::Force, "{} IT_LOAD_RESOURCE", strPrefix); + break; + } + case IT_LOAD_SAMPLER: + { + cemuLog_log(LogType::Force, "{} IT_LOAD_SAMPLER", strPrefix); + break; + } + case IT_SET_LOOP_CONST: + { + cemuLog_log(LogType::Force, "{} IT_SET_LOOP_CONST", strPrefix); + break; + } + case IT_SET_PREDICATION: + { + cemuLog_log(LogType::Force, "{} IT_SET_PREDICATION", strPrefix); + break; + } + case IT_HLE_COPY_COLORBUFFER_TO_SCANBUFFER: + { + cemuLog_log(LogType::Force, "{} IT_HLE_COPY_COLORBUFFER_TO_SCANBUFFER", strPrefix); + break; + } + case IT_HLE_TRIGGER_SCANBUFFER_SWAP: + { + cemuLog_log(LogType::Force, "{} IT_HLE_TRIGGER_SCANBUFFER_SWAP", strPrefix); + break; + } + case IT_HLE_WAIT_FOR_FLIP: + { + cemuLog_log(LogType::Force, "{} IT_HLE_WAIT_FOR_FLIP", strPrefix); + break; + } + case IT_HLE_REQUEST_SWAP_BUFFERS: + { + cemuLog_log(LogType::Force, "{} IT_HLE_REQUEST_SWAP_BUFFERS", strPrefix); + break; + } + case IT_HLE_CLEAR_COLOR_DEPTH_STENCIL: + { + cemuLog_log(LogType::Force, "{} IT_HLE_CLEAR_COLOR_DEPTH_STENCIL", strPrefix); + break; + } + case IT_HLE_COPY_SURFACE_NEW: + { + cemuLog_log(LogType::Force, "{} IT_HLE_COPY_SURFACE_NEW", strPrefix); + break; + } + case IT_HLE_SAMPLE_TIMER: + { + cemuLog_log(LogType::Force, "{} IT_HLE_SAMPLE_TIMER", strPrefix); + break; + } + case IT_HLE_SPECIAL_STATE: + { + cemuLog_log(LogType::Force, "{} IT_HLE_SPECIAL_STATE", strPrefix); + break; + } + case IT_HLE_BEGIN_OCCLUSION_QUERY: + { + cemuLog_log(LogType::Force, "{} IT_HLE_BEGIN_OCCLUSION_QUERY", strPrefix); + break; + } + case IT_HLE_END_OCCLUSION_QUERY: + { + cemuLog_log(LogType::Force, "{} IT_HLE_END_OCCLUSION_QUERY", strPrefix); + break; + } + case IT_HLE_BOTTOM_OF_PIPE_CB: + { + cemuLog_log(LogType::Force, "{} IT_HLE_BOTTOM_OF_PIPE_CB", strPrefix); + break; + } + case IT_HLE_SYNC_ASYNC_OPERATIONS: + { + cemuLog_log(LogType::Force, "{} IT_HLE_SYNC_ASYNC_OPERATIONS", strPrefix); + break; + } + default: + cemuLog_log(LogType::Force, "{} Unsupported operation code", strPrefix); + return; + } + } + else if (itHeaderType == 2) + { + // filler packet + } + else if (itHeaderType == 0) + { + uint32 registerBase = (itHeader & 0xFFFF); + uint32 registerCount = ((itHeader >> 16) & 0x3FFF) + 1; + LatteCP_skipWords(registerCount); + cemuLog_log(LogType::Force, "[LatteCP] itType=0 registerBase={:04x}", registerBase); + } + else + { + cemuLog_log(LogType::Force, "Invalid itHeaderType %08x\n", itHeaderType); + return; + } + } +} +#endif \ No newline at end of file diff --git a/src/Cafe/HW/Latte/Core/LatteConst.h b/src/Cafe/HW/Latte/Core/LatteConst.h index ffbead1c..ebe741e9 100644 --- a/src/Cafe/HW/Latte/Core/LatteConst.h +++ b/src/Cafe/HW/Latte/Core/LatteConst.h @@ -1,21 +1,27 @@ #pragma once #include "Cafe/HW/Latte/ISA/LatteReg.h" -// this file contains legacy C-style defines, modernize and merge into LatteReg.h +// todo - this file contains legacy C-style defines, modernize and merge into LatteReg.h // GPU7/Latte hardware info -#define LATTE_NUM_GPR (128) -#define LATTE_NUM_STREAMOUT_BUFFER (4) -#define LATTE_NUM_COLOR_TARGET (8) +#define LATTE_NUM_GPR 128 +#define LATTE_NUM_STREAMOUT_BUFFER 4 +#define LATTE_NUM_COLOR_TARGET 8 -#define LATTE_NUM_MAX_TEX_UNITS (18) // number of available texture units per shader stage (this might be higher than 18? BotW is the only game which uses more than 16?) -#define LATTE_NUM_MAX_UNIFORM_BUFFERS (16) // number of supported uniform buffer binding locations +#define LATTE_NUM_MAX_TEX_UNITS 18 // number of available texture units per shader stage (this might be higher than 18? BotW is the only game which uses more than 16?) +#define LATTE_NUM_MAX_UNIFORM_BUFFERS 16 // number of supported uniform buffer binding locations -#define LATTE_VS_ATTRIBUTE_LIMIT (32) // todo: verify -#define LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS (256) // should this be 128 since there are only 128 GPRs? +#define LATTE_VS_ATTRIBUTE_LIMIT 32 // todo: verify +#define LATTE_NUM_MAX_ATTRIBUTE_LOCATIONS 256 // should this be 128 since there are only 128 GPRs? -#define LATTE_MAX_VERTEX_BUFFERS (16) +#define LATTE_MAX_VERTEX_BUFFERS 16 + +// Cemu-specific constants + +#define LATTE_CEMU_PS_TEX_UNIT_BASE 0 +#define LATTE_CEMU_VS_TEX_UNIT_BASE 32 +#define LATTE_CEMU_GS_TEX_UNIT_BASE 64 // vertex formats @@ -76,8 +82,6 @@ #define GLVENDOR_UNKNOWN (0) #define GLVENDOR_AMD (1) // AMD/ATI #define GLVENDOR_NVIDIA (2) -#define GLVENDOR_INTEL_LEGACY (3) -#define GLVENDOR_INTEL_NOLEGACY (4) #define GLVENDOR_INTEL (5) #define GLVENDOR_APPLE (6) diff --git a/src/Cafe/HW/Latte/Core/LatteGSCopyShaderParser.cpp b/src/Cafe/HW/Latte/Core/LatteGSCopyShaderParser.cpp index 7df5eecd..16ce0737 100644 --- a/src/Cafe/HW/Latte/Core/LatteGSCopyShaderParser.cpp +++ b/src/Cafe/HW/Latte/Core/LatteGSCopyShaderParser.cpp @@ -208,7 +208,7 @@ LatteParsedGSCopyShader* LatteGSCopyShaderParser_parse(uint8* programData, uint3 uint32 bufferIndex; if (cf_inst23_7 == GPU7_CF_INST_MEM_STREAM0_WRITE) bufferIndex = 0; - else if (cf_inst23_7 == GPU7_CF_INST_MEM_STREAM0_WRITE) + else if (cf_inst23_7 == GPU7_CF_INST_MEM_STREAM1_WRITE) bufferIndex = 1; else cemu_assert_debug(false); @@ -222,7 +222,7 @@ LatteParsedGSCopyShader* LatteGSCopyShaderParser_parse(uint8* programData, uint3 } else { - forceLog_printf("Copyshader: Unknown 23_7 clause 0x%x found\n", cf_inst23_7); + cemuLog_log(LogType::Force, "Copyshader: Unknown 23_7 clause 0x{:x} found", cf_inst23_7); cemu_assert_debug(false); } if( isEndOfProgram ) diff --git a/src/Cafe/HW/Latte/Core/LatteIndices.cpp b/src/Cafe/HW/Latte/Core/LatteIndices.cpp index e5f3364a..2bbb617d 100644 --- a/src/Cafe/HW/Latte/Core/LatteIndices.cpp +++ b/src/Cafe/HW/Latte/Core/LatteIndices.cpp @@ -1,48 +1,64 @@ #include "Cafe/HW/Latte/Core/LatteConst.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" - #include "Cafe/HW/Latte/ISA/RegDefines.h" +#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h" +#include "Common/cpu_features.h" -#if __GNUC__ +#if defined(ARCH_X86_64) && defined(__GNUC__) #include -#endif - -#ifdef __GNUC__ -#define ATTRIBUTE_AVX2 __attribute__((target("avx2"))) -#define ATTRIBUTE_SSE41 __attribute__((target("sse4.1"))) -#else -#define ATTRIBUTE_AVX2 -#define ATTRIBUTE_SSE41 +#elif defined(__aarch64__) +#include #endif struct { - const void* lastPtr; - uint32 lastCount; - LattePrimitiveMode lastPrimitiveMode; - LatteIndexType lastIndexType; - // output - uint32 indexMin; - uint32 indexMax; - Renderer::INDEX_TYPE renderIndexType; - uint32 outputCount; - uint32 indexBufferOffset; - uint32 indexBufferIndex; + struct CacheEntry + { + // input data + const void* lastPtr; + uint32 lastCount; + LattePrimitiveMode lastPrimitiveMode; + LatteIndexType lastIndexType; + uint64 lastUsed; + // output + uint32 indexMin; + uint32 indexMax; + Renderer::INDEX_TYPE renderIndexType; + uint32 outputCount; + Renderer::IndexAllocation indexAllocation; + }; + std::array entry; + uint64 currentUsageCounter{0}; }LatteIndexCache{}; void LatteIndices_invalidate(const void* memPtr, uint32 size) { - if (LatteIndexCache.lastPtr >= memPtr && (LatteIndexCache.lastPtr < ((uint8*)memPtr + size)) ) + for(auto& entry : LatteIndexCache.entry) { - LatteIndexCache.lastPtr = nullptr; - LatteIndexCache.lastCount = 0; + if (entry.lastPtr >= memPtr && (entry.lastPtr < ((uint8*)memPtr + size)) ) + { + if(entry.lastPtr != nullptr) + g_renderer->indexData_releaseIndexMemory(entry.indexAllocation); + entry.lastPtr = nullptr; + entry.lastCount = 0; + } } } void LatteIndices_invalidateAll() { - LatteIndexCache.lastPtr = nullptr; - LatteIndexCache.lastCount = 0; + for(auto& entry : LatteIndexCache.entry) + { + if (entry.lastPtr != nullptr) + g_renderer->indexData_releaseIndexMemory(entry.indexAllocation); + entry.lastPtr = nullptr; + entry.lastCount = 0; + } +} + +uint64 LatteIndices_GetNextUsageIndex() +{ + return LatteIndexCache.currentUsageCounter++; } uint32 LatteIndices_calculateIndexOutputSize(LattePrimitiveMode primitiveMode, LatteIndexType indexType, uint32 count) @@ -292,6 +308,7 @@ void LatteIndices_generateAutoLineLoopIndices(void* indexDataOutput, uint32 coun indexMax = std::max(count, 1u) - 1; } +#if defined(ARCH_X86_64) ATTRIBUTE_AVX2 void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) { @@ -487,6 +504,115 @@ void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDat indexMax = std::max(indexMax, _maxIndex); indexMin = std::min(indexMin, _minIndex); } +#elif defined(__aarch64__) + +void LatteIndices_fastConvertU16_NEON(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) +{ + const uint16* indicesU16BE = (const uint16*)indexDataInput; + uint16* indexOutput = (uint16*)indexDataOutput; + sint32 count8 = count >> 3; + sint32 countRemaining = count & 7; + + if (count8) + { + uint16x8_t mMin = vdupq_n_u16(0xFFFF); + uint16x8_t mMax = vdupq_n_u16(0x0000); + uint16x8_t mTemp; + uint16x8_t* mRawIndices = (uint16x8_t*) indicesU16BE; + indicesU16BE += count8 * 8; + uint16x8_t* mOutputIndices = (uint16x8_t*) indexOutput; + indexOutput += count8 * 8; + + while (count8--) + { + mTemp = vld1q_u16((uint16*)mRawIndices); + mRawIndices++; + mTemp = vrev16q_u8(mTemp); + mMin = vminq_u16(mMin, mTemp); + mMax = vmaxq_u16(mMax, mTemp); + vst1q_u16((uint16*)mOutputIndices, mTemp); + mOutputIndices++; + } + + uint16* mMaxU16 = (uint16*)&mMax; + uint16* mMinU16 = (uint16*)&mMin; + + for (int i = 0; i < 8; ++i) { + indexMax = std::max(indexMax, (uint32)mMaxU16[i]); + indexMin = std::min(indexMin, (uint32)mMinU16[i]); + } + } + // process remaining indices + uint32 _minIndex = 0xFFFFFFFF; + uint32 _maxIndex = 0; + for (sint32 i = countRemaining; (--i) >= 0;) + { + uint16 idx = _swapEndianU16(*indicesU16BE); + *indexOutput = idx; + indexOutput++; + indicesU16BE++; + _maxIndex = std::max(_maxIndex, (uint32)idx); + _minIndex = std::min(_minIndex, (uint32)idx); + } + // update min/max + indexMax = std::max(indexMax, _maxIndex); + indexMin = std::min(indexMin, _minIndex); +} + +void LatteIndices_fastConvertU32_NEON(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) +{ + const uint32* indicesU32BE = (const uint32*)indexDataInput; + uint32* indexOutput = (uint32*)indexDataOutput; + sint32 count8 = count >> 2; + sint32 countRemaining = count & 3; + + if (count8) + { + uint32x4_t mMin = vdupq_n_u32(0xFFFFFFFF); + uint32x4_t mMax = vdupq_n_u32(0x00000000); + uint32x4_t mTemp; + uint32x4_t* mRawIndices = (uint32x4_t*) indicesU32BE; + indicesU32BE += count8 * 4; + uint32x4_t* mOutputIndices = (uint32x4_t*) indexOutput; + indexOutput += count8 * 4; + + while (count8--) + { + mTemp = vld1q_u32((uint32*)mRawIndices); + mRawIndices++; + mTemp = vrev32q_u8(mTemp); + mMin = vminq_u32(mMin, mTemp); + mMax = vmaxq_u32(mMax, mTemp); + vst1q_u32((uint32*)mOutputIndices, mTemp); + mOutputIndices++; + } + + uint32* mMaxU32 = (uint32*)&mMax; + uint32* mMinU32 = (uint32*)&mMin; + + for (int i = 0; i < 4; ++i) { + indexMax = std::max(indexMax, mMaxU32[i]); + indexMin = std::min(indexMin, mMinU32[i]); + } + } + // process remaining indices + uint32 _minIndex = 0xFFFFFFFF; + uint32 _maxIndex = 0; + for (sint32 i = countRemaining; (--i) >= 0;) + { + uint32 idx = _swapEndianU32(*indicesU32BE); + *indexOutput = idx; + indexOutput++; + indicesU32BE++; + _maxIndex = std::max(_maxIndex, idx); + _minIndex = std::min(_minIndex, idx); + } + // update min/max + indexMax = std::max(indexMax, _maxIndex); + indexMin = std::min(indexMin, _minIndex); +} + +#endif template void _LatteIndices_alternativeCalculateIndexMinMax(const void* indexData, uint32 count, uint32 primitiveRestartIndex, uint32& indexMin, uint32& indexMax) @@ -538,7 +664,7 @@ void LatteIndices_alternativeCalculateIndexMinMax(const void* indexData, LatteIn } } -void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 count, LattePrimitiveMode primitiveMode, uint32& indexMin, uint32& indexMax, Renderer::INDEX_TYPE& renderIndexType, uint32& outputCount, uint32& indexBufferOffset, uint32& indexBufferIndex) +void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 count, LattePrimitiveMode primitiveMode, uint32& indexMin, uint32& indexMax, Renderer::INDEX_TYPE& renderIndexType, uint32& outputCount, Renderer::IndexAllocation& indexAllocation) { // what this should do: // [x] use fast SIMD-based index decoding @@ -548,17 +674,18 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 // [ ] better cache implementation, allow to cache across frames // reuse from cache if data didn't change - if (LatteIndexCache.lastPtr == indexData && - LatteIndexCache.lastCount == count && - LatteIndexCache.lastPrimitiveMode == primitiveMode && - LatteIndexCache.lastIndexType == indexType) + auto cacheEntry = std::find_if(LatteIndexCache.entry.begin(), LatteIndexCache.entry.end(), [indexData, count, primitiveMode, indexType](const auto& entry) { - indexMin = LatteIndexCache.indexMin; - indexMax = LatteIndexCache.indexMax; - renderIndexType = LatteIndexCache.renderIndexType; - outputCount = LatteIndexCache.outputCount; - indexBufferOffset = LatteIndexCache.indexBufferOffset; - indexBufferIndex = LatteIndexCache.indexBufferIndex; + return entry.lastPtr == indexData && entry.lastCount == count && entry.lastPrimitiveMode == primitiveMode && entry.lastIndexType == indexType; + }); + if (cacheEntry != LatteIndexCache.entry.end()) + { + indexMin = cacheEntry->indexMin; + indexMax = cacheEntry->indexMax; + renderIndexType = cacheEntry->renderIndexType; + outputCount = cacheEntry->outputCount; + indexAllocation = cacheEntry->indexAllocation; + cacheEntry->lastUsed = LatteIndices_GetNextUsageIndex(); return; } @@ -582,10 +709,12 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 indexMin = 0; indexMax = std::max(count, 1u)-1; renderIndexType = Renderer::INDEX_TYPE::NONE; + indexAllocation = {}; return; // no indices } // query index buffer from renderer - void* indexOutputPtr = g_renderer->indexData_reserveIndexMemory(indexOutputSize, indexBufferOffset, indexBufferIndex); + indexAllocation = g_renderer->indexData_reserveIndexMemory(indexOutputSize); + void* indexOutputPtr = indexAllocation.mem; // decode indices indexMin = std::numeric_limits::max(); @@ -669,19 +798,31 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 { if (indexType == LatteIndexType::U16_BE) { - if (_cpuExtension_AVX2) +#if defined(ARCH_X86_64) + if (g_CPUFeatures.x86.avx2) LatteIndices_fastConvertU16_AVX2(indexData, indexOutputPtr, count, indexMin, indexMax); - else if (_cpuExtension_SSE4_1 && _cpuExtension_SSSE3) + else if (g_CPUFeatures.x86.sse4_1 && g_CPUFeatures.x86.ssse3) LatteIndices_fastConvertU16_SSE41(indexData, indexOutputPtr, count, indexMin, indexMax); else LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); +#elif defined(__aarch64__) + LatteIndices_fastConvertU16_NEON(indexData, indexOutputPtr, count, indexMin, indexMax); +#else + LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); +#endif } else if (indexType == LatteIndexType::U32_BE) { - if (_cpuExtension_AVX2) +#if defined(ARCH_X86_64) + if (g_CPUFeatures.x86.avx2) LatteIndices_fastConvertU32_AVX2(indexData, indexOutputPtr, count, indexMin, indexMax); else LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); +#elif defined(__aarch64__) + LatteIndices_fastConvertU32_NEON(indexData, indexOutputPtr, count, indexMin, indexMax); +#else + LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); +#endif } else if (indexType == LatteIndexType::U16_LE) { @@ -702,16 +843,25 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 // recalculate index range but filter out primitive restart index LatteIndices_alternativeCalculateIndexMinMax(indexData, indexType, count, indexMin, indexMax); } - g_renderer->indexData_uploadIndexMemory(indexBufferOffset, indexOutputSize); + g_renderer->indexData_uploadIndexMemory(indexAllocation); + performanceMonitor.cycle[performanceMonitor.cycleIndex].indexDataUploaded += indexOutputSize; + // get least recently used cache entry + auto lruEntry = std::min_element(LatteIndexCache.entry.begin(), LatteIndexCache.entry.end(), [](const auto& a, const auto& b) + { + return a.lastUsed < b.lastUsed; + }); + // invalidate previous allocation + if(lruEntry->lastPtr != nullptr) + g_renderer->indexData_releaseIndexMemory(lruEntry->indexAllocation); // update cache - LatteIndexCache.lastPtr = indexData; - LatteIndexCache.lastCount = count; - LatteIndexCache.lastPrimitiveMode = primitiveMode; - LatteIndexCache.lastIndexType = indexType; - LatteIndexCache.indexMin = indexMin; - LatteIndexCache.indexMax = indexMax; - LatteIndexCache.renderIndexType = renderIndexType; - LatteIndexCache.outputCount = outputCount; - LatteIndexCache.indexBufferOffset = indexBufferOffset; - LatteIndexCache.indexBufferIndex = indexBufferIndex; -} \ No newline at end of file + lruEntry->lastPtr = indexData; + lruEntry->lastCount = count; + lruEntry->lastPrimitiveMode = primitiveMode; + lruEntry->lastIndexType = indexType; + lruEntry->indexMin = indexMin; + lruEntry->indexMax = indexMax; + lruEntry->renderIndexType = renderIndexType; + lruEntry->outputCount = outputCount; + lruEntry->indexAllocation = indexAllocation; + lruEntry->lastUsed = LatteIndices_GetNextUsageIndex(); +} diff --git a/src/Cafe/HW/Latte/Core/LatteIndices.h b/src/Cafe/HW/Latte/Core/LatteIndices.h index 917d7991..8aace24e 100644 --- a/src/Cafe/HW/Latte/Core/LatteIndices.h +++ b/src/Cafe/HW/Latte/Core/LatteIndices.h @@ -4,4 +4,4 @@ void LatteIndices_invalidate(const void* memPtr, uint32 size); void LatteIndices_invalidateAll(); -void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 count, LattePrimitiveMode primitiveMode, uint32& indexMin, uint32& indexMax, Renderer::INDEX_TYPE& renderIndexType, uint32& outputCount, uint32& indexBufferOffset, uint32& indexBufferIndex); \ No newline at end of file +void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 count, LattePrimitiveMode primitiveMode, uint32& indexMin, uint32& indexMax, Renderer::INDEX_TYPE& renderIndexType, uint32& outputCount, Renderer::IndexAllocation& indexAllocation); \ No newline at end of file diff --git a/src/Cafe/HW/Latte/Core/LatteOverlay.cpp b/src/Cafe/HW/Latte/Core/LatteOverlay.cpp index cb7c9baf..e6edb904 100644 --- a/src/Cafe/HW/Latte/Core/LatteOverlay.cpp +++ b/src/Cafe/HW/Latte/Core/LatteOverlay.cpp @@ -12,34 +12,21 @@ #include "imgui/imgui_extension.h" #include "input/InputManager.h" +#include "util/SystemInfo/SystemInfo.h" #include -#if BOOST_OS_WINDOWS -#include -#include -#pragma comment(lib, "ntdll.lib") -#endif - struct OverlayStats { OverlayStats() {}; int processor_count = 1; - - // cemu cpu stats - uint64_t last_cpu{}, kernel{}, user{}; - - // global cpu stats - struct ProcessorTime - { - uint64_t idle{}, kernel{}, user{}; - }; - + ProcessorTime processor_time_cemu; std::vector processor_times; double fps{}; uint32 draw_calls_per_frame{}; + uint32 fast_draw_calls_per_frame{}; float cpu_usage{}; // cemu cpu usage in % std::vector cpu_per_core; // global cpu usage in % per core uint32 ram_usage{}; // ram usage in MB @@ -80,11 +67,11 @@ struct OverlayList const auto kPopupFlags = ImGuiWindowFlags_NoMove | ImGuiWindowFlags_NoDecoration | ImGuiWindowFlags_AlwaysAutoResize | ImGuiWindowFlags_NoSavedSettings | ImGuiWindowFlags_NoFocusOnAppearing | ImGuiWindowFlags_NoNav; const float kBackgroundAlpha = 0.65f; -void LatteOverlay_renderOverlay(ImVec2& position, ImVec2& pivot, sint32 direction) +void LatteOverlay_renderOverlay(ImVec2& position, ImVec2& pivot, sint32 direction, float fontSize, bool pad) { auto& config = GetConfig(); - - const auto font = ImGui_GetFont(14.0f * (float)config.overlay.text_scale / 100.0f); + + const auto font = ImGui_GetFont(fontSize); ImGui::PushFont(font); const ImVec4 color = ImGui::ColorConvertU32ToFloat4(config.overlay.text_color); @@ -100,7 +87,7 @@ void LatteOverlay_renderOverlay(ImVec2& position, ImVec2& pivot, sint32 directio ImGui::Text("FPS: %.2lf", g_state.fps); if (config.overlay.drawcalls) - ImGui::Text("Draws/f: %d", g_state.draw_calls_per_frame); + ImGui::Text("Draws/f: %d (fast: %d)", g_state.draw_calls_per_frame, g_state.fast_draw_calls_per_frame); if (config.overlay.cpu_usage) ImGui::Text("CPU: %.2lf%%", g_state.cpu_usage); @@ -120,22 +107,28 @@ void LatteOverlay_renderOverlay(ImVec2& position, ImVec2& pivot, sint32 directio ImGui::Text("VRAM: %dMB / %dMB", g_state.vramUsage, g_state.vramTotal); if (config.overlay.debug) + { + // general debug info + ImGui::Text("--- Debug info ---"); + ImGui::Text("IndexUploadPerFrame: %dKB", (performanceMonitor.stats.indexDataUploadPerFrame+1023)/1024); + // backend specific info g_renderer->AppendOverlayDebugInfo(); + } position.y += (ImGui::GetWindowSize().y + 10.0f) * direction; - ImGui::End(); } + ImGui::End(); } ImGui::PopStyleColor(); ImGui::PopFont(); } -void LatteOverlay_RenderNotifications(ImVec2& position, ImVec2& pivot, sint32 direction) +void LatteOverlay_RenderNotifications(ImVec2& position, ImVec2& pivot, sint32 direction, float fontSize, bool pad) { auto& config = GetConfig(); - const auto font = ImGui_GetFont(14.0f * (float)config.notification.text_scale / 100.0f); + const auto font = ImGui_GetFont(fontSize); ImGui::PushFont(font); const ImVec4 color = ImGui::ColorConvertU32ToFloat4(config.notification.text_color); @@ -170,8 +163,8 @@ void LatteOverlay_RenderNotifications(ImVec2& position, ImVec2& pivot, sint32 di ImGui::TextUnformatted(s_mii_name.c_str()); position.y += (ImGui::GetWindowSize().y + 10.0f) * direction; - ImGui::End(); } + ImGui::End(); // controller std::vector> profiles; @@ -209,8 +202,8 @@ void LatteOverlay_RenderNotifications(ImVec2& position, ImVec2& pivot, sint32 di } position.y += (ImGui::GetWindowSize().y + 10.0f) * direction; - ImGui::End(); } + ImGui::End(); } else s_init_overlay = true; @@ -254,10 +247,8 @@ void LatteOverlay_RenderNotifications(ImVec2& position, ImVec2& pivot, sint32 di } position.y += (ImGui::GetWindowSize().y + 10.0f) * direction; - ImGui::End(); } - - + ImGui::End(); } } @@ -306,8 +297,8 @@ void LatteOverlay_RenderNotifications(ImVec2& position, ImVec2& pivot, sint32 di } position.y += (ImGui::GetWindowSize().y + 10.0f) * direction; - ImGui::End(); } + ImGui::End(); } } @@ -363,8 +354,8 @@ void LatteOverlay_RenderNotifications(ImVec2& position, ImVec2& pivot, sint32 di } position.y += (ImGui::GetWindowSize().y + 10.0f) * direction; - ImGui::End(); } + ImGui::End(); } } @@ -402,7 +393,7 @@ void LatteOverlay_RenderNotifications(ImVec2& position, ImVec2& pivot, sint32 di ImRotateEnd(0.001f * ticks.time_since_epoch().count()); ImGui::SameLine(); -#ifndef PUBLIC_RELEASE +#ifdef CEMU_DEBUG_ASSERT uint64 totalTime = g_compiling_pipelines_syncTimeSum / 1000000ull; if (s_pipeline_count_async > 0) { @@ -435,8 +426,8 @@ void LatteOverlay_RenderNotifications(ImVec2& position, ImVec2& pivot, sint32 di } #endif position.y += (ImGui::GetWindowSize().y + 10.0f) * direction; - ImGui::End(); } + ImGui::End(); } } } @@ -475,10 +466,9 @@ void LatteOverlay_RenderNotifications(ImVec2& position, ImVec2& pivot, sint32 di } position.y += (ImGui::GetWindowSize().y + 10.0f) * direction; - ImGui::End(); } + ImGui::End(); } - ImGui::PopStyleColor(); ImGui::PopFont(); } @@ -530,20 +520,26 @@ void LatteOverlay_render(bool pad_view) sint32 w = 0, h = 0; if (pad_view && gui_isPadWindowOpen()) - gui_getPadWindowSize(&w, &h); + gui_getPadWindowPhysSize(w, h); else - gui_getWindowSize(&w, &h); + gui_getWindowPhysSize(w, h); if (w == 0 || h == 0) return; const Vector2f window_size{ (float)w,(float)h }; - + + float fontDPIScale = !pad_view ? gui_getWindowDPIScale() : gui_getPadDPIScale(); + + float overlayFontSize = 14.0f * (float)config.overlay.text_scale / 100.0f * fontDPIScale; + // test if fonts are already precached - if (!ImGui_GetFont(14.0f * (float)config.overlay.text_scale / 100.0f)) + if (!ImGui_GetFont(overlayFontSize)) return; + + float notificationsFontSize = 14.0f * (float)config.notification.text_scale / 100.0f * fontDPIScale; - if (!ImGui_GetFont(14.0f * (float)config.notification.text_scale / 100.0f)) + if (!ImGui_GetFont(notificationsFontSize)) return; ImVec2 position{}, pivot{}; @@ -552,7 +548,7 @@ void LatteOverlay_render(bool pad_view) if (config.overlay.position != ScreenPosition::kDisabled) { LatteOverlay_translateScreenPosition(config.overlay.position, window_size, position, pivot, direction); - LatteOverlay_renderOverlay(position, pivot, direction); + LatteOverlay_renderOverlay(position, pivot, direction, overlayFontSize, pad_view); } @@ -561,127 +557,58 @@ void LatteOverlay_render(bool pad_view) if(config.overlay.position != config.notification.position) LatteOverlay_translateScreenPosition(config.notification.position, window_size, position, pivot, direction); - LatteOverlay_RenderNotifications(position, pivot, direction); + LatteOverlay_RenderNotifications(position, pivot, direction, notificationsFontSize, pad_view); } } - void LatteOverlay_init() { -#if BOOST_OS_WINDOWS - SYSTEM_INFO sys_info; - GetSystemInfo(&sys_info); - g_state.processor_count = sys_info.dwNumberOfProcessors; + g_state.processor_count = GetProcessorCount(); g_state.processor_times.resize(g_state.processor_count); g_state.cpu_per_core.resize(g_state.processor_count); -#else - g_state.processor_count = 1; -#endif } -void LatteOverlay_updateStats(double fps, sint32 drawcalls) +static void UpdateStats_CemuCpu() +{ + ProcessorTime now; + QueryProcTime(now); + + double cpu = ProcessorTime::Compare(g_state.processor_time_cemu, now); + cpu /= g_state.processor_count; + + g_state.cpu_usage = cpu * 100; + g_state.processor_time_cemu = now; +} + +static void UpdateStats_CpuPerCore() +{ + std::vector now(g_state.processor_count); + QueryCoreTimes(g_state.processor_count, now); + + for (int32_t i = 0; i < g_state.processor_count; ++i) + { + double cpu = ProcessorTime::Compare(g_state.processor_times[i], now[i]); + + g_state.cpu_per_core[i] = cpu * 100; + g_state.processor_times[i] = now[i]; + } +} + +void LatteOverlay_updateStats(double fps, sint32 drawcalls, sint32 fastDrawcalls) { if (GetConfig().overlay.position == ScreenPosition::kDisabled) return; g_state.fps = fps; g_state.draw_calls_per_frame = drawcalls; - -#if BOOST_OS_WINDOWS - // update cemu cpu - FILETIME ftime, fkernel, fuser; - LARGE_INTEGER now, kernel, user; - GetSystemTimeAsFileTime(&ftime); - now.LowPart = ftime.dwLowDateTime; - now.HighPart = ftime.dwHighDateTime; - - GetProcessTimes(GetCurrentProcess(), &ftime, &ftime, &fkernel, &fuser); - kernel.LowPart = fkernel.dwLowDateTime; - kernel.HighPart = fkernel.dwHighDateTime; - - user.LowPart = fuser.dwLowDateTime; - user.HighPart = fuser.dwHighDateTime; - - double percent = (kernel.QuadPart - g_state.kernel) + (user.QuadPart - g_state.user); - percent /= (now.QuadPart - g_state.last_cpu); - percent /= g_state.processor_count; - g_state.cpu_usage = percent * 100; - g_state.last_cpu = now.QuadPart; - g_state.user = user.QuadPart; - g_state.kernel = kernel.QuadPart; - - // update cpu per core - std::vector sppi(g_state.processor_count); - if (NT_SUCCESS(NtQuerySystemInformation(SystemProcessorPerformanceInformation, sppi.data(), sizeof(SYSTEM_PROCESSOR_PERFORMANCE_INFORMATION) * g_state.processor_count, nullptr))) - { - for (sint32 i = 0; i < g_state.processor_count; ++i) - { - const uint64 kernel_diff = sppi[i].KernelTime.QuadPart - g_state.processor_times[i].kernel; - const uint64 user_diff = sppi[i].UserTime.QuadPart - g_state.processor_times[i].user; - const uint64 idle_diff = sppi[i].IdleTime.QuadPart - g_state.processor_times[i].idle; - - const auto total = kernel_diff + user_diff; // kernel time already includes idletime - const double cpu = total == 0 ? 0 : (1.0 - ((double)idle_diff / total)) * 100.0; - - g_state.cpu_per_core[i] = cpu; - //total_cpu += cpu; - - g_state.processor_times[i].idle = sppi[i].IdleTime.QuadPart; - g_state.processor_times[i].kernel = sppi[i].KernelTime.QuadPart; - g_state.processor_times[i].user = sppi[i].UserTime.QuadPart; - } - - //total_cpu /= g_state.processor_count; - //g_state.cpu_usage = total_cpu; - } + g_state.fast_draw_calls_per_frame = fastDrawcalls; + UpdateStats_CemuCpu(); + UpdateStats_CpuPerCore(); // update ram - PROCESS_MEMORY_COUNTERS pmc{}; - pmc.cb = sizeof(pmc); - GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc)); - g_state.ram_usage = (pmc.WorkingSetSize / 1000) / 1000; -#endif + g_state.ram_usage = (QueryRamUsage() / 1000) / 1000; // update vram g_renderer->GetVRAMInfo(g_state.vramUsage, g_state.vramTotal); } - -void LatteOverlay_updateStatsPerFrame() -{ - if (!ActiveSettings::FrameProfilerEnabled()) - return; - // update frametime graph - uint32 frameTime_total = (uint32)PPCTimer_tscToMicroseconds(performanceMonitor.gpuTime_frameTime.getPreviousFrameValue()); - uint32 frameTime_idle = (uint32)PPCTimer_tscToMicroseconds(performanceMonitor.gpuTime_idleTime.getPreviousFrameValue()); - uint32 frameTime_dcStageTextures = (uint32)PPCTimer_tscToMicroseconds(performanceMonitor.gpuTime_dcStageTextures.getPreviousFrameValue()); - uint32 frameTime_dcStageVertexMgr = (uint32)PPCTimer_tscToMicroseconds(performanceMonitor.gpuTime_dcStageVertexMgr.getPreviousFrameValue()); - uint32 frameTime_dcStageShaderAndUniformMgr = (uint32)PPCTimer_tscToMicroseconds(performanceMonitor.gpuTime_dcStageShaderAndUniformMgr.getPreviousFrameValue()); - uint32 frameTime_dcStageIndexMgr = (uint32)PPCTimer_tscToMicroseconds(performanceMonitor.gpuTime_dcStageIndexMgr.getPreviousFrameValue()); - uint32 frameTime_dcStageMRT = (uint32)PPCTimer_tscToMicroseconds(performanceMonitor.gpuTime_dcStageMRT.getPreviousFrameValue()); - uint32 frameTime_dcStageDrawcallAPI = (uint32)PPCTimer_tscToMicroseconds(performanceMonitor.gpuTime_dcStageDrawcallAPI.getPreviousFrameValue()); - uint32 frameTime_waitForAsync = (uint32)PPCTimer_tscToMicroseconds(performanceMonitor.gpuTime_waitForAsync.getPreviousFrameValue()); - - // make sure total frame time is not less than it's sums - uint32 minimumExpectedFrametime = - frameTime_idle + - frameTime_dcStageTextures + - frameTime_dcStageVertexMgr + - frameTime_dcStageShaderAndUniformMgr + - frameTime_dcStageIndexMgr + - frameTime_dcStageMRT + - frameTime_dcStageDrawcallAPI + - frameTime_waitForAsync; - frameTime_total = std::max(frameTime_total, minimumExpectedFrametime); - - //g_state.frametimeGraph.appendEntry(); - //g_state.frametimeGraph.setCurrentEntryValue(0xFF404040, frameTime_idle); - //g_state.frametimeGraph.setCurrentEntryValue(0xFFFFC0FF, frameTime_waitForAsync); - //g_state.frametimeGraph.setCurrentEntryValue(0xFF000040, frameTime_dcStageTextures); // dark red - //g_state.frametimeGraph.setCurrentEntryValue(0xFF004000, frameTime_dcStageVertexMgr); // dark green - //g_state.frametimeGraph.setCurrentEntryValue(0xFFFFFF80, frameTime_dcStageShaderAndUniformMgr); // blueish - //g_state.frametimeGraph.setCurrentEntryValue(0xFF800080, frameTime_dcStageIndexMgr); // purple - //g_state.frametimeGraph.setCurrentEntryValue(0xFF00FF00, frameTime_dcStageMRT); // green - //g_state.frametimeGraph.setCurrentEntryValue(0xFF00FFFF, frameTime_dcStageDrawcallAPI); // yellow - //g_state.frametimeGraph.setCurrentEntryValue(0xFFBBBBBB, frameTime_total - minimumExpectedFrametime); -} diff --git a/src/Cafe/HW/Latte/Core/LatteOverlay.h b/src/Cafe/HW/Latte/Core/LatteOverlay.h index 3df83d1d..824c68b2 100644 --- a/src/Cafe/HW/Latte/Core/LatteOverlay.h +++ b/src/Cafe/HW/Latte/Core/LatteOverlay.h @@ -2,7 +2,6 @@ void LatteOverlay_init(); void LatteOverlay_render(bool pad_view); -void LatteOverlay_updateStats(double fps, sint32 drawcalls); -void LatteOverlay_updateStatsPerFrame(); +void LatteOverlay_updateStats(double fps, sint32 drawcalls, sint32 fastDrawcalls); void LatteOverlay_pushNotification(const std::string& text, sint32 duration); \ No newline at end of file diff --git a/src/Cafe/HW/Latte/Core/LattePM4.h b/src/Cafe/HW/Latte/Core/LattePM4.h index 8079a89c..1f5d2129 100644 --- a/src/Cafe/HW/Latte/Core/LattePM4.h +++ b/src/Cafe/HW/Latte/Core/LattePM4.h @@ -14,6 +14,7 @@ #define IT_MEM_WRITE 0x3D #define IT_SURFACE_SYNC 0x43 #define IT_EVENT_WRITE 0x46 +#define IT_EVENT_WRITE_EOP 0x47 // end of pipe #define IT_LOAD_CONFIG_REG 0x60 #define IT_LOAD_CONTEXT_REG 0x61 @@ -47,14 +48,12 @@ #define IT_HLE_WAIT_FOR_FLIP 0xF1 #define IT_HLE_BOTTOM_OF_PIPE_CB 0xF2 #define IT_HLE_COPY_COLORBUFFER_TO_SCANBUFFER 0xF3 -#define IT_HLE_FIFO_WRAP_AROUND 0xF4 #define IT_HLE_CLEAR_COLOR_DEPTH_STENCIL 0xF5 #define IT_HLE_SAMPLE_TIMER 0xF7 #define IT_HLE_TRIGGER_SCANBUFFER_SWAP 0xF8 #define IT_HLE_SPECIAL_STATE 0xF9 #define IT_HLE_BEGIN_OCCLUSION_QUERY 0xFA #define IT_HLE_END_OCCLUSION_QUERY 0xFB -#define IT_HLE_SET_CB_RETIREMENT_TIMESTAMP 0xFD #define pm4HeaderType3(__itCode, __dataDWordCount) (0xC0000000|((uint32)(__itCode)<<8)|((uint32)((__dataDWordCount)-1)<<16)) #define pm4HeaderType2Filler() (0x80000000) diff --git a/src/Cafe/HW/Latte/Core/LattePerformanceMonitor.cpp b/src/Cafe/HW/Latte/Core/LattePerformanceMonitor.cpp index 7104dbd9..14dfe9a9 100644 --- a/src/Cafe/HW/Latte/Core/LattePerformanceMonitor.cpp +++ b/src/Cafe/HW/Latte/Core/LattePerformanceMonitor.cpp @@ -38,6 +38,7 @@ void LattePerformanceMonitor_frameEnd() uint64 indexDataCached = 0; uint32 frameCounter = 0; uint32 drawCallCounter = 0; + uint32 fastDrawCallCounter = 0; uint32 shaderBindCounter = 0; uint32 recompilerLeaveCount = 0; uint32 threadLeaveCount = 0; @@ -53,6 +54,7 @@ void LattePerformanceMonitor_frameEnd() indexDataCached += performanceMonitor.cycle[i].indexDataCached; frameCounter += performanceMonitor.cycle[i].frameCounter; drawCallCounter += performanceMonitor.cycle[i].drawCallCounter; + fastDrawCallCounter += performanceMonitor.cycle[i].fastDrawCallCounter; shaderBindCounter += performanceMonitor.cycle[i].shaderBindCount; recompilerLeaveCount += performanceMonitor.cycle[i].recompilerLeaveCount; threadLeaveCount += performanceMonitor.cycle[i].threadLeaveCount; @@ -72,19 +74,18 @@ void LattePerformanceMonitor_frameEnd() uniformBankDataUploadedPerFrame /= 1024ULL; uint32 uniformBankCountUploadedPerFrame = (uint32)(uniformBankUploadedCount / (uint64)elapsedFrames); uint64 indexDataUploadPerFrame = (indexDataUploaded / (uint64)elapsedFrames); - indexDataUploadPerFrame /= 1024ULL; double fps = (double)elapsedFrames2S * 1000.0 / (double)totalElapsedTimeFPS; - uint32 drawCallsPerFrame = drawCallCounter / elapsedFrames; uint32 shaderBindsPerFrame = shaderBindCounter / elapsedFrames; passedCycles = passedCycles * 1000ULL / totalElapsedTime; uint32 rlps = (uint32)((uint64)recompilerLeaveCount * 1000ULL / (uint64)totalElapsedTime); uint32 tlps = (uint32)((uint64)threadLeaveCount * 1000ULL / (uint64)totalElapsedTime); // set stats - + performanceMonitor.stats.indexDataUploadPerFrame = indexDataUploadPerFrame; // next counter cycle sint32 nextCycleIndex = (performanceMonitor.cycleIndex + 1) % PERFORMANCE_MONITOR_TRACK_CYCLES; performanceMonitor.cycle[nextCycleIndex].drawCallCounter = 0; + performanceMonitor.cycle[nextCycleIndex].fastDrawCallCounter = 0; performanceMonitor.cycle[nextCycleIndex].frameCounter = 0; performanceMonitor.cycle[nextCycleIndex].shaderBindCount = 0; performanceMonitor.cycle[nextCycleIndex].lastCycleCount = PPCInterpreter_getMainCoreCycleCounter(); @@ -98,30 +99,25 @@ void LattePerformanceMonitor_frameEnd() performanceMonitor.cycle[nextCycleIndex].recompilerLeaveCount = 0; performanceMonitor.cycle[nextCycleIndex].threadLeaveCount = 0; performanceMonitor.cycleIndex = nextCycleIndex; - + + // next update in 1 second + performanceMonitor.cycle[performanceMonitor.cycleIndex].lastUpdate = GetTickCount(); + if (isFirstUpdate) { - LatteOverlay_updateStats(0.0, 0); + LatteOverlay_updateStats(0.0, 0, 0); gui_updateWindowTitles(false, false, 0.0); } else { - LatteOverlay_updateStats(fps, drawCallCounter / elapsedFrames); + LatteOverlay_updateStats(fps, drawCallCounter / elapsedFrames, fastDrawCallCounter / elapsedFrames); gui_updateWindowTitles(false, false, fps); } - // next update in 1 second - performanceMonitor.cycle[performanceMonitor.cycleIndex].lastUpdate = GetTickCount(); - - // prevent hibernation and screen saver/monitor off - #if BOOST_OS_WINDOWS - SetThreadExecutionState(ES_CONTINUOUS | ES_DISPLAY_REQUIRED | ES_SYSTEM_REQUIRED); - #endif } - LatteOverlay_updateStatsPerFrame(); } void LattePerformanceMonitor_frameBegin() { performanceMonitor.vk.numDrawBarriersPerFrame.reset(); performanceMonitor.vk.numBeginRenderpassPerFrame.reset(); -} \ No newline at end of file +} diff --git a/src/Cafe/HW/Latte/Core/LattePerformanceMonitor.h b/src/Cafe/HW/Latte/Core/LattePerformanceMonitor.h index 77554e80..dbc3cff9 100644 --- a/src/Cafe/HW/Latte/Core/LattePerformanceMonitor.h +++ b/src/Cafe/HW/Latte/Core/LattePerformanceMonitor.h @@ -84,6 +84,7 @@ typedef struct uint32 lastUpdate; uint32 frameCounter; uint32 drawCallCounter; + uint32 fastDrawCallCounter; uint32 shaderBindCount; uint64 vertexDataUploaded; // amount of vertex data uploaded to GPU (bytes) uint64 vertexDataCached; // amount of vertex data reused from GPU cache (bytes) @@ -123,6 +124,7 @@ typedef struct LattePerfStatCounter numGraphicPipelines; LattePerfStatCounter numImages; LattePerfStatCounter numImageViews; + LattePerfStatCounter numSamplers; LattePerfStatCounter numRenderPass; LattePerfStatCounter numFramebuffer; @@ -130,6 +132,12 @@ typedef struct LattePerfStatCounter numDrawBarriersPerFrame; LattePerfStatCounter numBeginRenderpassPerFrame; }vk; + + // calculated stats (per frame) + struct + { + uint32 indexDataUploadPerFrame; + }stats; }performanceMonitor_t; extern performanceMonitor_t performanceMonitor; diff --git a/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp b/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp index 416245b9..2efef5bf 100644 --- a/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp +++ b/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp @@ -11,7 +11,6 @@ #include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h" #include "Cafe/GraphicPack/GraphicPack2.h" #include "config/ActiveSettings.h" -#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h" #include "gui/guiWrapper.h" #include "Cafe/OS/libs/erreula/erreula.h" #include "input/InputManager.h" @@ -221,40 +220,10 @@ void LatteMRT::BindDepthBufferOnly(LatteTextureView* view) ApplyCurrentState(); } -/***************************************************/ - -LatteTextureView* LatteMRT_FindColorBufferForClearing(MPTR colorBufferPtr, sint32 colorBufferWidth, sint32 colorBufferHeight, sint32 colorBufferPitch, uint32 format, sint32 sliceIndex, sint32* searchIndex) -{ - LatteTextureView* view = LatteTC_LookupTextureByData(colorBufferPtr, colorBufferWidth, colorBufferHeight, colorBufferPitch, 0, 1, sliceIndex, 1, searchIndex); - if (view == nullptr) - return nullptr; - return view; -} - -LatteTextureView* LatteMRT_CreateColorBuffer(MPTR colorBufferPhysMem, uint32 width, uint32 height, uint32 pitch, Latte::E_GX2SURFFMT format, Latte::E_HWTILEMODE tileMode, uint32 swizzle, uint32 viewSlice) -{ - cemu_assert_debug(colorBufferPhysMem != MPTR_NULL); - LatteTextureView* textureView; - if(viewSlice != 0) - textureView = LatteTexture_CreateMapping(colorBufferPhysMem, MPTR_NULL, width, height, viewSlice+1, pitch, tileMode, swizzle, 0, 1, viewSlice, 1, format, Latte::E_DIM::DIM_2D_ARRAY, Latte::E_DIM::DIM_2D, false); - else - textureView = LatteTexture_CreateMapping(colorBufferPhysMem, MPTR_NULL, width, height, 1, pitch, tileMode, swizzle, 0, 1, viewSlice, 1, format, Latte::E_DIM::DIM_2D, Latte::E_DIM::DIM_2D, false); - // unbind texture - g_renderer->texture_bindAndActivate(nullptr, 0); - return textureView; -} - LatteTextureView* LatteMRT_CreateDepthBuffer(MPTR depthBufferPhysMem, uint32 width, uint32 height, uint32 pitch, Latte::E_HWTILEMODE tileMode, Latte::E_GX2SURFFMT format, uint32 swizzle, sint32 viewSlice) { - LatteTextureView* textureView; - if(viewSlice == 0) - textureView = LatteTexture_CreateMapping(depthBufferPhysMem, MPTR_NULL, width, height, 1, pitch, tileMode, swizzle, 0, 1, viewSlice, 1, format, Latte::E_DIM::DIM_2D, Latte::E_DIM::DIM_2D, true); - else - textureView = LatteTexture_CreateMapping(depthBufferPhysMem, MPTR_NULL, width, height, viewSlice+1, pitch, tileMode, swizzle, 0, 1, viewSlice, 1, format, Latte::E_DIM::DIM_2D_ARRAY, Latte::E_DIM::DIM_2D, true); - + LatteTextureView* textureView = LatteTexture_CreateMapping(depthBufferPhysMem, MPTR_NULL, width, height, viewSlice+1, pitch, tileMode, swizzle, 0, 1, viewSlice, 1, format, viewSlice > 0 ? Latte::E_DIM::DIM_2D_ARRAY : Latte::E_DIM::DIM_2D, Latte::E_DIM::DIM_2D, true); LatteMRT::SetDepthAndStencilAttachment(textureView, textureView->baseTexture->hasStencil); - // unbind texture - g_renderer->texture_bindAndActivate(nullptr, 0); return textureView; } @@ -295,6 +264,35 @@ LatteTextureView* LatteMRT::GetColorAttachmentTexture(uint32 index, bool createN uint32 colorBufferHeight = pitchHeight / colorBufferPitch; uint32 colorBufferWidth = colorBufferPitch; + // colorbuffer width/height has to be padded to 8/32 alignment but the actual resolution might be smaller + // use the scissor box as a clue to figure out the original resolution if possible + if(LatteGPUState.allowFramebufferSizeOptimization) + { + uint32 scissorBoxWidth = LatteGPUState.contextNew.PA_SC_GENERIC_SCISSOR_BR.get_BR_X(); + uint32 scissorBoxHeight = LatteGPUState.contextNew.PA_SC_GENERIC_SCISSOR_BR.get_BR_Y(); + if (((scissorBoxWidth + 7) & ~7) == colorBufferWidth) + colorBufferWidth = scissorBoxWidth; + if (((colorBufferHeight + 31) & ~31) == colorBufferHeight) + colorBufferHeight = scissorBoxHeight; + } + + // log resolution changes if the above heuristic takes effect + // this is useful to find resolutions which need to be updated in gfx pack texture rules +#if 0 + uint32 colorBufferHeight2 = pitchHeight / colorBufferPitch; + static std::unordered_set s_foundColorBufferResMappings; + if (colorBufferPitch != colorBufferWidth || colorBufferHeight != colorBufferHeight2) + { + // only log unique, source and dest resolution. Encode into a key with 16 bits per component + uint64 resHash = (uint64)colorBufferWidth | ((uint64)colorBufferHeight << 16) | ((uint64)colorBufferPitch << 32) | ((uint64)colorBufferHeight2 << 48); + if( !s_foundColorBufferResMappings.contains(resHash) ) + { + s_foundColorBufferResMappings.insert(resHash); + cemuLog_log(LogType::Force, "[COLORBUFFER-DBG] Using res {}x{} instead of {}x{}", colorBufferWidth, colorBufferHeight, colorBufferPitch, colorBufferHeight2); + } + } +#endif + bool colorBufferWasFound = false; sint32 viewFirstMip = 0; // todo @@ -305,7 +303,7 @@ LatteTextureView* LatteMRT::GetColorAttachmentTexture(uint32 index, bool createN if (colorBufferView == nullptr) { // create color buffer view - colorBufferView = LatteTexture_CreateMapping(colorBufferPhysMem, 0, colorBufferWidth, colorBufferHeight, (viewFirstSlice + viewNumSlices), colorBufferPitch, colorBufferTileMode, colorBufferSwizzle>>8, viewFirstMip, 1, viewFirstSlice, viewNumSlices, (Latte::E_GX2SURFFMT)colorBufferFormat, (viewFirstSlice + viewNumSlices)>1? Latte::E_DIM::DIM_2D_ARRAY: Latte::E_DIM::DIM_2D, Latte::E_DIM::DIM_2D, false); + colorBufferView = LatteTexture_CreateMapping(colorBufferPhysMem, 0, colorBufferWidth, colorBufferHeight, (viewFirstSlice + viewNumSlices), colorBufferPitch, colorBufferTileMode, colorBufferSwizzle>>8, viewFirstMip, 1, viewFirstSlice, viewNumSlices, (Latte::E_GX2SURFFMT)colorBufferFormat, (viewFirstSlice + viewNumSlices)>1? Latte::E_DIM::DIM_2D_ARRAY: Latte::E_DIM::DIM_2D, Latte::E_DIM::DIM_2D, false, true); LatteGPUState.repeatTextureInitialization = true; checkForTextureChanges = false; } @@ -341,7 +339,7 @@ uint8 LatteMRT::GetActiveColorBufferMask(const LatteDecompilerShader* pixelShade return 0; cemu_assert_debug(colorControlReg.get_DEGAMMA_ENABLE() == false); // not supported // combine color buffer mask with pixel output mask from pixel shader - colorBufferMask &= pixelShader->pixelColorOutputMask; + colorBufferMask &= (pixelShader ? pixelShader->pixelColorOutputMask : 0); // combine color buffer mask with color channel mask from mmCB_TARGET_MASK (disable render buffer if all colors are blocked) uint32 channelTargetMask = lcr.CB_TARGET_MASK.get_MASK(); for (uint32 i = 0; i < 8; i++) @@ -373,6 +371,7 @@ uint8 LatteMRT::GetActiveColorBufferMask(const LatteDecompilerShader* pixelShade if ((colorBufferWidth < (sint32)scissorAccessWidth) || (colorBufferHeight < (sint32)scissorAccessHeight)) { + // log this? colorBufferMask &= ~(1<baseTexture, &colorAttachmentWidth, &colorAttachmentHeight, nullptr, colorAttachmentView->firstMip); + sint32 colorAttachmentWidth, colorAttachmentHeight; + colorAttachmentView->baseTexture->GetSize(colorAttachmentWidth, colorAttachmentHeight, colorAttachmentView->firstMip); // set effective size sint32 effectiveWidth, effectiveHeight; - LatteTexture_getEffectiveSize(colorAttachmentView->baseTexture, &effectiveWidth, &effectiveHeight, nullptr, colorAttachmentView->firstMip); - if( rtEffectiveSize->width == 0 && rtEffectiveSize->height == 0 ) + colorAttachmentView->baseTexture->GetEffectiveSize(effectiveWidth, effectiveHeight, colorAttachmentView->firstMip); + if (rtEffectiveSize->width == 0 && rtEffectiveSize->height == 0) { rtEffectiveSize->width = effectiveWidth; rtEffectiveSize->height = effectiveHeight; } - else if( rtEffectiveSize->width != effectiveWidth && rtEffectiveSize->height != effectiveHeight ) + else if (rtEffectiveSize->width != effectiveWidth && rtEffectiveSize->height != effectiveHeight) { -#ifndef PUBLIC_RELEASE - forceLog_printf("Color buffer size mismatch (%dx%d). Effective size: %dx%d Real size: %dx%d Mismatching texture: %08x %dx%d fmt %04x", rtEffectiveSize->width, rtEffectiveSize->height, effectiveWidth, effectiveHeight, colorAttachmentView->baseTexture->width, colorAttachmentView->baseTexture->height, colorAttachmentView->baseTexture->physAddress, colorAttachmentView->baseTexture->width, colorAttachmentView->baseTexture->height, (uint32)colorAttachmentView->baseTexture->format); -#endif + cemuLog_logDebug(LogType::Force, "Color buffer size mismatch ({}x{}). Effective size: {}x{} Real size: {}x{} Mismatching texture: {:08x} {}x{} fmt {:04x}", rtEffectiveSize->width, rtEffectiveSize->height, effectiveWidth, effectiveHeight, colorAttachmentView->baseTexture->width, colorAttachmentView->baseTexture->height, colorAttachmentView->baseTexture->physAddress, colorAttachmentView->baseTexture->width, colorAttachmentView->baseTexture->height, (uint32)colorAttachmentView->baseTexture->format); } // currently the first color attachment defines the size of the current render target if (rtRealSize->width == 0 && rtRealSize->height == 0) @@ -582,16 +578,11 @@ bool LatteMRT::UpdateCurrentFBO() if (depthBufferPhysMem != MPTR_NULL) { - bool depthBufferWasFound = false; LatteTextureView* depthBufferView = LatteTextureViewLookupCache::lookupSliceEx(depthBufferPhysMem, depthBufferWidth, depthBufferHeight, depthBufferPitch, 0, depthBufferViewFirstSlice, depthBufferFormat, true); - if (depthBufferView == nullptr) + if (!depthBufferView) { - // create depth buffer view - forceLogDebug_printf("Creating depth buffer tex %08x %dx%d slice %d", depthBufferPhysMem, depthBufferHeight, depthBufferWidth, depthBufferViewFirstSlice); - if(depthBufferViewFirstSlice == 0) - depthBufferView = LatteTexture_CreateMapping(depthBufferPhysMem, 0, depthBufferWidth, depthBufferHeight, 1, depthBufferPitch, depthBufferTileMode, depthBufferSwizzle, 0, 1, 0, 1, depthBufferFormat, Latte::E_DIM::DIM_2D, Latte::E_DIM::DIM_2D, true); - else - depthBufferView = LatteTexture_CreateMapping(depthBufferPhysMem, 0, depthBufferWidth, depthBufferHeight, depthBufferViewFirstSlice+1, depthBufferPitch, depthBufferTileMode, depthBufferSwizzle, 0, 1, depthBufferViewFirstSlice, 1, depthBufferFormat, Latte::E_DIM::DIM_2D_ARRAY, Latte::E_DIM::DIM_2D, true); + // create new depth buffer view and if it doesn't exist then also create the texture + depthBufferView = LatteTexture_CreateMapping(depthBufferPhysMem, 0, depthBufferWidth, depthBufferHeight, depthBufferViewFirstSlice+1, depthBufferPitch, depthBufferTileMode, depthBufferSwizzle, 0, 1, depthBufferViewFirstSlice, 1, depthBufferFormat, depthBufferViewFirstSlice > 0 ? Latte::E_DIM::DIM_2D_ARRAY : Latte::E_DIM::DIM_2D, Latte::E_DIM::DIM_2D, true, true); LatteGPUState.repeatTextureInitialization = true; } else @@ -601,7 +592,7 @@ bool LatteMRT::UpdateCurrentFBO() } // set effective size sint32 effectiveWidth, effectiveHeight; - LatteTexture_getEffectiveSize(depthBufferView->baseTexture, &effectiveWidth, &effectiveHeight, NULL); + depthBufferView->baseTexture->GetEffectiveSize(effectiveWidth, effectiveHeight, depthBufferView->firstMip); if (rtEffectiveSize->width == 0 && rtEffectiveSize->height == 0) { rtEffectiveSize->width = effectiveWidth; @@ -611,7 +602,7 @@ bool LatteMRT::UpdateCurrentFBO() { if (_depthBufferSizeWarningCount < 100) { - forceLogDebug_printf("Depth buffer size too small. Effective size: %dx%d Real size: %dx%d Mismatching texture: %08x %dx%d fmt %04x", effectiveWidth, effectiveHeight, depthBufferView->baseTexture->width, depthBufferView->baseTexture->height, depthBufferView->baseTexture->physAddress, depthBufferView->baseTexture->width, depthBufferView->baseTexture->height, (uint32)depthBufferView->baseTexture->format); + cemuLog_logDebug(LogType::Force, "Depth buffer size too small. Effective size: {}x{} Real size: {}x{} Mismatching texture: {:08x} {}x{} fmt {:04x}", effectiveWidth, effectiveHeight, depthBufferView->baseTexture->width, depthBufferView->baseTexture->height, depthBufferView->baseTexture->physAddress, depthBufferView->baseTexture->width, depthBufferView->baseTexture->height, (uint32)depthBufferView->baseTexture->format); _depthBufferSizeWarningCount++; } } @@ -721,7 +712,7 @@ void LatteRenderTarget_applyTextureColorClear(LatteTexture* texture, uint32 slic { if (texture->isDepth) { - forceLogDebug_printf("Unsupported clear depth as color"); + cemuLog_logDebug(LogType::Force, "Unsupported clear depth as color"); } else { @@ -751,7 +742,10 @@ void LatteRenderTarget_applyTextureDepthClear(LatteTexture* texture, uint32 slic LatteTexture_MarkDynamicTextureAsChanged(texture->baseView, sliceIndex, mipIndex, eventCounter); } -void LatteRenderTarget_itHLEClearColorDepthStencil(uint32 clearMask, MPTR colorBufferMPTR, MPTR colorBufferFormat, Latte::E_HWTILEMODE colorBufferTilemode, uint32 colorBufferWidth, uint32 colorBufferHeight, uint32 colorBufferPitch, uint32 colorBufferViewFirstSlice, uint32 colorBufferViewNumSlice, MPTR depthBufferMPTR, MPTR depthBufferFormat, Latte::E_HWTILEMODE depthBufferTileMode, sint32 depthBufferWidth, sint32 depthBufferHeight, sint32 depthBufferPitch, sint32 depthBufferViewFirstSlice, sint32 depthBufferViewNumSlice, float r, float g, float b, float a, float clearDepth, uint32 clearStencil) +void LatteRenderTarget_itHLEClearColorDepthStencil(uint32 clearMask, + MPTR colorBufferMPTR, Latte::E_GX2SURFFMT colorBufferFormat, Latte::E_HWTILEMODE colorBufferTilemode, uint32 colorBufferWidth, uint32 colorBufferHeight, uint32 colorBufferPitch, uint32 colorBufferViewFirstSlice, uint32 colorBufferViewNumSlice, + MPTR depthBufferMPTR, Latte::E_GX2SURFFMT depthBufferFormat, Latte::E_HWTILEMODE depthBufferTileMode, sint32 depthBufferWidth, sint32 depthBufferHeight, sint32 depthBufferPitch, sint32 depthBufferViewFirstSlice, sint32 depthBufferViewNumSlice, + float r, float g, float b, float a, float clearDepth, uint32 clearStencil) { uint32 depthBufferMipIndex = 0; // todo uint32 colorBufferMipIndex = 0; // todo @@ -786,13 +780,11 @@ void LatteRenderTarget_itHLEClearColorDepthStencil(uint32 clearMask, MPTR colorB bool targetFound = false; while (true) { - LatteTextureView* colorView = LatteMRT_FindColorBufferForClearing(colorBufferMPTR, colorBufferWidth, colorBufferHeight, colorBufferPitch, colorBufferFormat, colorBufferViewFirstSlice, &searchIndex); + LatteTextureView* colorView = LatteTC_LookupTextureByData(colorBufferMPTR, colorBufferWidth, colorBufferHeight, colorBufferPitch, 0, 1, colorBufferViewFirstSlice, 1, &searchIndex); if (!colorView) break; - if (Latte::GetFormatBits((Latte::E_GX2SURFFMT)colorBufferFormat) != Latte::GetFormatBits(colorView->baseTexture->format)) - { + if (Latte::GetFormatBits(colorBufferFormat) != Latte::GetFormatBits(colorView->baseTexture->format)) continue; - } if (colorView->baseTexture->pitch == colorBufferPitch && colorView->baseTexture->height == colorBufferHeight) targetFound = true; @@ -804,7 +796,7 @@ void LatteRenderTarget_itHLEClearColorDepthStencil(uint32 clearMask, MPTR colorB { // create new texture with matching format cemu_assert_debug(colorBufferViewNumSlice <= 1); - LatteTextureView* newColorView = LatteMRT_CreateColorBuffer(colorBufferMPTR, colorBufferWidth, colorBufferHeight, colorBufferPitch, (Latte::E_GX2SURFFMT)colorBufferFormat, colorBufferTilemode, colorBufferSwizzle, colorBufferViewFirstSlice); + LatteTextureView* newColorView = LatteTexture_CreateMapping(colorBufferMPTR, MPTR_NULL, colorBufferWidth, colorBufferHeight, colorBufferViewFirstSlice+1, colorBufferPitch, colorBufferTilemode, colorBufferSwizzle, 0, 1, colorBufferViewFirstSlice, 1, colorBufferFormat, colorBufferViewFirstSlice > 0 ? Latte::E_DIM::DIM_2D_ARRAY : Latte::E_DIM::DIM_2D, Latte::E_DIM::DIM_2D, false); LatteRenderTarget_applyTextureColorClear(newColorView->baseTexture, colorBufferViewFirstSlice, colorBufferMipIndex, r, g, b, a, eventCounter); } } @@ -847,9 +839,9 @@ void LatteRenderTarget_getScreenImageArea(sint32* x, sint32* y, sint32* width, s { int w, h; if(padView && gui_isPadWindowOpen()) - gui_getPadWindowSize(&w, &h); + gui_getPadWindowPhysSize(w, h); else - gui_getWindowSize(&w, &h); + gui_getWindowPhysSize(w, h); sint32 scaledOutputX; sint32 scaledOutputY; @@ -882,20 +874,13 @@ void LatteRenderTarget_getScreenImageArea(sint32* x, sint32* y, sint32* width, s void LatteRenderTarget_copyToBackbuffer(LatteTextureView* textureView, bool isPadView) { - if (g_renderer->GetType() == RendererAPI::Vulkan) - { - ((VulkanRenderer*)g_renderer.get())->PreparePresentationFrame(!isPadView); - } - // make sure texture is updated to latest data in cache LatteTexture_UpdateDataToLatest(textureView->baseTexture); // mark source texture as still in use LatteTC_MarkTextureStillInUse(textureView->baseTexture); - sint32 effectiveWidth; - sint32 effectiveHeight; - sint32 effectiveDepth; - LatteTexture_getEffectiveSize(textureView->baseTexture, &effectiveWidth, &effectiveHeight, &effectiveDepth, 0); + sint32 effectiveWidth, effectiveHeight; + textureView->baseTexture->GetEffectiveSize(effectiveWidth, effectiveHeight, 0); _currentOutputImageWidth = effectiveWidth; _currentOutputImageHeight = effectiveHeight; @@ -947,13 +932,6 @@ void LatteRenderTarget_copyToBackbuffer(LatteTextureView* textureView, bool isPa if (shader == nullptr) { sint32 scaling_filter = downscaling ? GetConfig().downscale_filter : GetConfig().upscale_filter; - - if (g_renderer->GetType() == RendererAPI::Vulkan) - { - // force linear or nearest neighbor filter - if(scaling_filter != kLinearFilter && scaling_filter != kNearestNeighborFilter) - scaling_filter = kLinearFilter; - } if (scaling_filter == kLinearFilter) { @@ -971,7 +949,7 @@ void LatteRenderTarget_copyToBackbuffer(LatteTextureView* textureView, bool isPa else shader = RendererOutputShader::s_bicubic_shader; - filter = LatteTextureView::MagFilter::kNearestNeighbor; + filter = LatteTextureView::MagFilter::kLinear; } else if (scaling_filter == kBicubicHermiteFilter) { @@ -1003,9 +981,6 @@ void LatteRenderTarget_copyToBackbuffer(LatteTextureView* textureView, bool isPa g_renderer->ImguiEnd(); } -bool alwaysDisplayDRC = false; -bool ctrlTabHotkeyPressed = false; - void LatteRenderTarget_itHLECopyColorBufferToScanBuffer(MPTR colorBufferPtr, uint32 colorBufferWidth, uint32 colorBufferHeight, uint32 colorBufferSliceIndex, uint32 colorBufferFormat, uint32 colorBufferPitch, Latte::E_HWTILEMODE colorBufferTilemode, uint32 colorBufferSwizzle, uint32 renderTarget) { cemu_assert_debug(colorBufferSliceIndex == 0); // todo - support for non-zero slice @@ -1015,40 +990,35 @@ void LatteRenderTarget_itHLECopyColorBufferToScanBuffer(MPTR colorBufferPtr, uin return; } - const bool tabPressed = gui_isKeyDown(WXK_TAB); - const bool ctrlPressed = gui_isKeyDown(0xA2); // VK_LCONTROL - bool showDRC = swkbd_hasKeyboardInputHook() == false && tabPressed; + auto getVPADScreenActive = [](size_t n) -> std::pair { + auto controller = InputManager::instance().get_vpad_controller(n); + if (!controller) + return {false,false}; + auto pressed = controller->is_screen_active(); + auto toggle = controller->is_screen_active_toggle(); + return {pressed && !toggle, pressed && toggle}; + }; - if (ctrlPressed && tabPressed) - { - if (ctrlTabHotkeyPressed == false) - { - alwaysDisplayDRC = !alwaysDisplayDRC; - ctrlTabHotkeyPressed = true; - } - } - else - ctrlTabHotkeyPressed = false; + const bool tabPressed = gui_isKeyDown(PlatformKeyCodes::TAB); + const bool ctrlPressed = gui_isKeyDown(PlatformKeyCodes::LCONTROL); + const auto [vpad0Active, vpad0Toggle] = getVPADScreenActive(0); + const auto [vpad1Active, vpad1Toggle] = getVPADScreenActive(1); - if (alwaysDisplayDRC) - showDRC = !tabPressed; + const bool altScreenRequested = (!ctrlPressed && tabPressed) || vpad0Active || vpad1Active; + const bool togglePressed = (ctrlPressed && tabPressed) || vpad0Toggle || vpad1Toggle; + static bool togglePressedLast = false; - if (!showDRC) - { - auto controller = InputManager::instance().get_vpad_controller(0); - if (controller && controller->is_screen_active()) - showDRC = true; - if (!showDRC) - { - controller = InputManager::instance().get_vpad_controller(1); - if (controller && controller->is_screen_active()) - showDRC = true; - } - } + bool& isDRCPrimary = LatteGPUState.isDRCPrimary; - if (renderTarget == 4 && g_renderer->IsPadWindowActive()) + if(togglePressed && !togglePressedLast) + isDRCPrimary = !isDRCPrimary; + togglePressedLast = togglePressed; + + bool showDRC = swkbd_hasKeyboardInputHook() == false && (isDRCPrimary ^ altScreenRequested); + + if ((renderTarget & RENDER_TARGET_DRC) && g_renderer->IsPadWindowActive()) LatteRenderTarget_copyToBackbuffer(texView, true); - if ((renderTarget == 1 && !showDRC) || (renderTarget == 4 && showDRC)) + if (((renderTarget & RENDER_TARGET_TV) && !showDRC) || ((renderTarget & RENDER_TARGET_DRC) && showDRC)) LatteRenderTarget_copyToBackbuffer(texView, false); } @@ -1130,4 +1100,4 @@ void LatteRenderTarget_unloadAll() LatteMRT::DeleteCachedFBO(g_emptyFBO); g_emptyFBO = nullptr; } -} \ No newline at end of file +} diff --git a/src/Cafe/HW/Latte/Core/LatteRingBuffer.cpp b/src/Cafe/HW/Latte/Core/LatteRingBuffer.cpp index 15c2b24c..22eeea4b 100644 --- a/src/Cafe/HW/Latte/Core/LatteRingBuffer.cpp +++ b/src/Cafe/HW/Latte/Core/LatteRingBuffer.cpp @@ -11,7 +11,7 @@ LatteRingBuffer_t* LatteRingBuffer_create(uint8* data, uint32 size) uint8* LatteRingBuffer_allocate(LatteRingBuffer_t* rb, sint32 size, sint32 alignment) { -#ifndef PUBLIC_RELEASE +#ifdef CEMU_DEBUG_ASSERT cemu_assert_debug(size < rb->size); #endif // align diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index 5e97d485..d9f0a5dd 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -1,18 +1,25 @@ #include "Cafe/HW/Latte/Core/LatteConst.h" #include "Cafe/HW/Latte/Core/LatteShaderAssembly.h" #include "Cafe/HW/Latte/ISA/RegDefines.h" -#include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency #include "Cafe/HW/Latte/ISA/LatteReg.h" #include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h" +#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h" +#include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency #include "Cafe/GraphicPack/GraphicPack2.h" #include "util/helpers/StringParser.h" #include "config/ActiveSettings.h" +#include "Cafe/GameProfile/GameProfile.h" +#include "util/containers/flat_hash_map.hpp" +#include + +// experimental new decompiler (WIP) #include "util/Zir/EmitterGLSL/ZpIREmitGLSL.h" #include "util/Zir/Core/ZpIRDebug.h" -#include "util/containers/flat_hash_map.hpp" +#include "Cafe/HW/Latte/Transcompiler/LatteTC.h" +#include "Cafe/HW/Latte/ShaderInfo/ShaderInfo.h" struct _ShaderHashCache { @@ -137,7 +144,7 @@ LatteShaderPSInputTable* LatteSHRC_GetPSInputTable() return &_activePSImportTable; } -bool LatteSHRC_RemoveFromCache(LatteDecompilerShader* shader) +void LatteSHRC_RemoveFromCache(LatteDecompilerShader* shader) { bool removed = false; auto& cache = LatteSHRC_GetCacheByType(shader->shaderType); @@ -149,10 +156,14 @@ bool LatteSHRC_RemoveFromCache(LatteDecompilerShader* shader) } else if (baseIt->second == shader) { - if (baseIt->second->next) - cache.emplace(shader->baseHash, baseIt->second->next); - else - cache.erase(baseIt); + cemu_assert_debug(baseIt->second == shader); + cache.erase(baseIt); + if (shader->next) + { + cemu_assert_debug(shader->baseHash == shader->next->baseHash); + cache.emplace(shader->baseHash, shader->next); + } + shader->next = 0; removed = true; } else @@ -169,7 +180,7 @@ bool LatteSHRC_RemoveFromCache(LatteDecompilerShader* shader) } } } - return removed; + cemu_assert(removed); } void LatteSHRC_RemoveFromCacheByHash(uint64 shader_base_hash, uint64 shader_aux_hash, LatteConst::ShaderType type) @@ -236,7 +247,7 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters) } // semantic imports from vertex shader -#ifndef PUBLIC_RELEASE +#ifdef CEMU_DEBUG_ASSERT uint8 semanticMask[256 / 8] = { 0 }; #endif cemu_assert_debug(numPSInputs <= GPU7_PS_MAX_INPUTS); @@ -273,10 +284,10 @@ void LatteShader_UpdatePSInputs(uint32* contextRegisters) } else { -#ifndef PUBLIC_RELEASE +#ifdef CEMU_DEBUG_ASSERT if (semanticMask[psSemanticId >> 3] & (1 << (psSemanticId & 7))) { - forceLogDebug_printf("SemanticId already used"); + cemuLog_logDebug(LogType::Force, "SemanticId already used"); } semanticMask[psSemanticId >> 3] |= (1 << (psSemanticId & 7)); #endif @@ -294,7 +305,7 @@ void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compil { if (shader->hasError ) { - forceLog_printf("Unable to compile shader %I64x", shader->baseHash); + cemuLog_log(LogType::Force, "Unable to compile shader {:016x}", shader->baseHash); return; } @@ -332,7 +343,7 @@ void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compil (shader->baseHash == 0x15bc7edf9de2ed30 || shader->baseHash == 0x83a697d61a3b9202 || shader->baseHash == 0x97bc44a5028381c6 || shader->baseHash == 0x24838b84d15a1da1)) { - forceLogDebug_printf("Filtered shader to avoid AMD crash"); + cemuLog_logDebug(LogType::Force, "Filtered shader to avoid AMD crash"); shader->shader = nullptr; shader->hasError = true; return; @@ -350,7 +361,7 @@ void LatteShader_FinishCompilation(LatteDecompilerShader* shader) { if (shader->hasError) { - forceLogDebug_printf("LatteShader_finishCompilation(): Skipped because of error in shader %llx", shader->baseHash); + cemuLog_logDebug(LogType::Force, "LatteShader_finishCompilation(): Skipped because of error in shader {:x}", shader->baseHash); return; } shader->shader->WaitForCompiled(); @@ -440,9 +451,8 @@ void LatteShader_DumpShader(uint64 baseHash, uint64 auxHash, LatteDecompilerShad suffix = "gs"; else if (shader->shaderType == LatteConst::ShaderType::Pixel) suffix = "ps"; - fs::path dumpPath = "dump/shaders"; - dumpPath /= fmt::format("{:016x}_{:016x}_{}.txt", baseHash, auxHash, suffix); - FileStream* fs = FileStream::createFile2(dumpPath); + + FileStream* fs = FileStream::createFile2(ActiveSettings::GetUserDataPath("dump/shaders/{:016x}_{:016x}_{}.txt", baseHash, auxHash, suffix)); if (fs) { if (shader->strBuf_shaderSource) @@ -468,9 +478,8 @@ void LatteShader_DumpRawShader(uint64 baseHash, uint64 auxHash, uint32 type, uin suffix = "copy"; else if (type == SHADER_DUMP_TYPE_COMPUTE) suffix = "compute"; - fs::path dumpPath = "dump/shaders"; - dumpPath /= fmt::format("{:016x}_{:016x}_{}.bin", baseHash, auxHash, suffix); - FileStream* fs = FileStream::createFile2(dumpPath); + + FileStream* fs = FileStream::createFile2(ActiveSettings::GetUserDataPath("dump/shaders/{:016x}_{:016x}_{}.bin", baseHash, auxHash, suffix)); if (fs) { fs->writeData(programCode, programLen); @@ -513,7 +522,7 @@ void LatteSHRC_UpdateGSBaseHash(uint8* geometryShaderPtr, uint32 geometryShaderS // update hash from geometry shader data uint64 gsHash1 = 0; uint64 gsHash2 = 0; - _calculateShaderProgramHash((uint32*)geometryShaderPtr, geometryShaderSize, &hashCacheVS, &gsHash1, &gsHash2); + _calculateShaderProgramHash((uint32*)geometryShaderPtr, geometryShaderSize, &hashCacheGS, &gsHash1, &gsHash2); // get geometry shader uint64 gsHash = gsHash1 + gsHash2; gsHash += (uint64)_activeVertexShader->ringParameterCount; @@ -543,7 +552,7 @@ uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* cont // hash stride for streamout buffers for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++) { - if(!vertexShader->streamoutBufferWriteMask2[i]) + if(!vertexShader->streamoutBufferWriteMask[i]) continue; uint32 bufferStride = contextRegisters[mmVGT_STRMOUT_VTX_STRIDE_0 + i * 4]; auxHash = std::rotl(auxHash, 7); @@ -611,7 +620,7 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi // copy texture info shader->textureUnitMask2 = decompilerOutput.textureUnitMask; // copy streamout info - shader->streamoutBufferWriteMask2 = decompilerOutput.streamoutBufferWriteMask; + shader->streamoutBufferWriteMask = decompilerOutput.streamoutBufferWriteMask; shader->hasStreamoutBufferWrite = decompilerOutput.streamoutBufferWriteMask.any(); // copy uniform offsets // for OpenGL these are retrieved in _prepareSeparableUniforms() @@ -641,7 +650,7 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi } else { - shader->uniform.count_uniformRegister = decompilerOutput.uniformOffsetsVK.count_uniformRegister; + shader->uniform.count_uniformRegister = decompilerOutput.uniformOffsetsGL.count_uniformRegister; } // calculate aux hash if (calculateAuxHash) @@ -671,10 +680,19 @@ LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompi return shader; } -#include "Cafe/HW/Latte/Transcompiler/LatteTC.h" -#include "Cafe/HW/Latte/ShaderInfo/ShaderInfo.h" +void LatteShader_GetDecompilerOptions(LatteDecompilerOptions& options, LatteConst::ShaderType shaderType, bool geometryShaderEnabled) +{ + options.usesGeometryShader = geometryShaderEnabled; + options.spirvInstrinsics.hasRoundingModeRTEFloat32 = false; + if (g_renderer->GetType() == RendererAPI::Vulkan) + { + options.useTFViaSSBO = VulkanRenderer::GetInstance()->UseTFViaSSBO(); + options.spirvInstrinsics.hasRoundingModeRTEFloat32 = VulkanRenderer::GetInstance()->HasSPRIVRoundingModeRTE32(); + } + options.strictMul = g_current_game_profile->GetAccurateShaderMul() != AccurateShaderMulOption::False; +} -LatteDecompilerShader* LatteShader_compileSeparableVertexShader(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader) +LatteDecompilerShader* LatteShader_CompileSeparableVertexShader2(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader) { /* Analyze shader to gather general information about inputs/outputs */ Latte::ShaderDescription shaderDescription; @@ -724,14 +742,15 @@ LatteDecompilerShader* LatteShader_compileSeparableVertexShader(uint64 baseHash, // compile new vertex shader (relies partially on current state) LatteDecompilerShader* LatteShader_CompileSeparableVertexShader(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader) { - // new decompiler - //LatteShader_compileSeparableVertexShader(baseHash, vsAuxHash, vertexShaderPtr, vertexShaderSize, usesGeometryShader, fetchShader); + // new decompiler test + //LatteShader_CompileSeparableVertexShader2(baseHash, vsAuxHash, vertexShaderPtr, vertexShaderSize, usesGeometryShader, fetchShader); // legacy decompiler + LatteDecompilerOptions options; + LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Vertex, usesGeometryShader); + LatteDecompilerOutput_t decompilerOutput{}; - LatteFetchShader* fetchShaderList[1]; - fetchShaderList[0] = fetchShader; - LatteDecompiler_DecompileVertexShader(_shaderBaseHash_vs, LatteGPUState.contextRegister, vertexShaderPtr, vertexShaderSize, fetchShaderList, 1, LatteGPUState.contextNew.GetSpecialStateValues(), usesGeometryShader, &decompilerOutput); + LatteDecompiler_DecompileVertexShader(_shaderBaseHash_vs, LatteGPUState.contextRegister, vertexShaderPtr, vertexShaderSize, fetchShader, options, &decompilerOutput); LatteDecompilerShader* vertexShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister); vsAuxHash = vertexShader->auxHash; if (vertexShader->hasError == false) @@ -758,10 +777,11 @@ LatteDecompilerShader* LatteShader_CompileSeparableVertexShader(uint64 baseHash, LatteDecompilerShader* LatteShader_CompileSeparableGeometryShader(uint64 baseHash, uint8* geometryShaderPtr, uint32 geometryShaderSize, uint8* geometryCopyShader, uint32 geometryCopyShaderSize) { + LatteDecompilerOptions options; + LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Geometry, true); + LatteDecompilerOutput_t decompilerOutput{}; - LatteFetchShader* fetchShaderList[1]; - fetchShaderList[0] = _activeFetchShader; - LatteDecompiler_DecompileGeometryShader(_shaderBaseHash_gs, LatteGPUState.contextRegister, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), _activeVertexShader->ringParameterCount, &decompilerOutput); + LatteDecompiler_DecompileGeometryShader(_shaderBaseHash_gs, LatteGPUState.contextRegister, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, _activeVertexShader->ringParameterCount, options, &decompilerOutput); LatteDecompilerShader* geometryShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister); if (geometryShader->hasError == false) { @@ -786,8 +806,11 @@ LatteDecompilerShader* LatteShader_CompileSeparableGeometryShader(uint64 baseHas LatteDecompilerShader* LatteShader_CompileSeparablePixelShader(uint64 baseHash, uint64& psAuxHash, uint8* pixelShaderPtr, uint32 pixelShaderSize, bool usesGeometryShader) { + LatteDecompilerOptions options; + LatteShader_GetDecompilerOptions(options, LatteConst::ShaderType::Pixel, usesGeometryShader); + LatteDecompilerOutput_t decompilerOutput{}; - LatteDecompiler_DecompilePixelShader(baseHash, LatteGPUState.contextRegister, pixelShaderPtr, pixelShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), usesGeometryShader, &decompilerOutput); + LatteDecompiler_DecompilePixelShader(baseHash, LatteGPUState.contextRegister, pixelShaderPtr, pixelShaderSize, options, &decompilerOutput); LatteDecompilerShader* pixelShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister); psAuxHash = pixelShader->auxHash; LatteShader_DumpShader(_shaderBaseHash_ps, psAuxHash, pixelShader); @@ -812,6 +835,7 @@ LatteDecompilerShader* LatteShader_CompileSeparablePixelShader(uint64 baseHash, void LatteSHRC_UpdateVertexShader(uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader) { + // todo - should include VTX_SEMANTIC table in state LatteSHRC_UpdateVSBaseHash(vertexShaderPtr, vertexShaderSize, usesGeometryShader); uint64 vsAuxHash = 0; auto itBaseShader = sVertexShaders.find(_shaderBaseHash_vs); @@ -828,15 +852,13 @@ void LatteSHRC_UpdateVertexShader(uint8* vertexShaderPtr, uint32 vertexShaderSiz LatteGPUState.activeShaderHasError = true; return; } - g_renderer->shader_bind(vertexShader->shader); _activeVertexShader = vertexShader; } void LatteSHRC_UpdateGeometryShader(bool usesGeometryShader, uint8* geometryShaderPtr, uint32 geometryShaderSize, uint8* geometryCopyShader, uint32 geometryCopyShaderSize) { - if (usesGeometryShader == false || _activeVertexShader == nullptr) + if (!usesGeometryShader || !_activeVertexShader) { - g_renderer->shader_unbind(RendererShader::ShaderType::kGeometry); _shaderBaseHash_gs = 0; _activeGeometryShader = nullptr; return; @@ -860,21 +882,11 @@ void LatteSHRC_UpdateGeometryShader(bool usesGeometryShader, uint8* geometryShad LatteGPUState.activeShaderHasError = true; return; } - g_renderer->shader_bind(geometryShader->shader); _activeGeometryShader = geometryShader; } void LatteSHRC_UpdatePixelShader(uint8* pixelShaderPtr, uint32 pixelShaderSize, bool usesGeometryShader) { - if (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] != 0 && g_renderer->GetType() == RendererAPI::OpenGL) - { - if (_activePixelShader) - { - g_renderer->shader_unbind(RendererShader::ShaderType::kFragment); - _activePixelShader = nullptr; - } - return; - } LatteSHRC_UpdatePSBaseHash(pixelShaderPtr, pixelShaderSize, usesGeometryShader); uint64 psAuxHash = 0; auto itBaseShader = sPixelShaders.find(_shaderBaseHash_ps); @@ -891,7 +903,6 @@ void LatteSHRC_UpdatePixelShader(uint8* pixelShaderPtr, uint32 pixelShaderSize, LatteGPUState.activeShaderHasError = true; return; } - g_renderer->shader_bind(pixelShader->shader); _activePixelShader = pixelShader; } @@ -986,3 +997,16 @@ void LatteSHRC_Init() cemu_assert_debug(sGeometryShaders.empty()); cemu_assert_debug(sPixelShaders.empty()); } + +void LatteSHRC_UnloadAll() +{ + while(!sVertexShaders.empty()) + LatteShader_free(sVertexShaders.begin()->second); + cemu_assert_debug(sVertexShaders.empty()); + while(!sGeometryShaders.empty()) + LatteShader_free(sGeometryShaders.begin()->second); + cemu_assert_debug(sGeometryShaders.empty()); + while(!sPixelShaders.empty()) + LatteShader_free(sPixelShaders.begin()->second); + cemu_assert_debug(sPixelShaders.empty()); +} \ No newline at end of file diff --git a/src/Cafe/HW/Latte/Core/LatteShader.h b/src/Cafe/HW/Latte/Core/LatteShader.h index 0fba0322..f8dc6d1a 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.h +++ b/src/Cafe/HW/Latte/Core/LatteShader.h @@ -3,6 +3,7 @@ #include "Cafe/HW/Latte/ISA/RegDefines.h" void LatteSHRC_Init(); +void LatteSHRC_UnloadAll(); void LatteSHRC_ResetCachedShaderHash(); void LatteShaderSHRC_UpdateFetchShader(); @@ -94,6 +95,7 @@ extern uint64 _shaderBaseHash_vs; extern uint64 _shaderBaseHash_gs; extern uint64 _shaderBaseHash_ps; +void LatteShader_GetDecompilerOptions(struct LatteDecompilerOptions& options, LatteConst::ShaderType shaderType, bool geometryShaderEnabled); LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompilerOutput_t& decompilerOutput, uint64 baseHash, bool calculateAuxHash, uint64 optionalAuxHash, uint32* contextRegister); void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compileAsync); @@ -116,11 +118,12 @@ void LatteShader_DumpShader(uint64 baseHash, uint64 auxHash, LatteDecompilerShad void LatteShader_DumpRawShader(uint64 baseHash, uint64 auxHash, uint32 type, uint8* programCode, uint32 programLen); // shader cache file -void LatteShaderCache_load(); +void LatteShaderCache_Load(); +void LatteShaderCache_Close(); void LatteShaderCache_writeSeparableVertexShader(uint64 shaderBaseHash, uint64 shaderAuxHash, uint8* fetchShader, uint32 fetchShaderSize, uint8* vertexShader, uint32 vertexShaderSize, uint32* contextRegisters, bool usesGeometryShader); void LatteShaderCache_writeSeparableGeometryShader(uint64 shaderBaseHash, uint64 shaderAuxHash, uint8* geometryShader, uint32 geometryShaderSize, uint8* gsCopyShader, uint32 gsCopyShaderSize, uint32* contextRegisters, uint32* hleSpecialState, uint32 vsRingParameterCount); void LatteShaderCache_writeSeparablePixelShader(uint64 shaderBaseHash, uint64 shaderAuxHash, uint8* pixelShader, uint32 pixelShaderSize, uint32* contextRegisters, bool usesGeometryShader); -// todo - sort this +// todo - refactor this sint32 LatteDecompiler_getTextureSamplerBaseIndex(LatteConst::ShaderType shaderType); \ No newline at end of file diff --git a/src/Cafe/HW/Latte/Core/LatteShaderAssembly.h b/src/Cafe/HW/Latte/Core/LatteShaderAssembly.h index df636689..d2314a53 100644 --- a/src/Cafe/HW/Latte/Core/LatteShaderAssembly.h +++ b/src/Cafe/HW/Latte/Core/LatteShaderAssembly.h @@ -12,7 +12,7 @@ #define GPU7_CF_INST_VTX (0x02) // used only in GS copy program? #define GPU7_CF_INST_LOOP_END (0x05) #define GPU7_CF_INST_LOOP_START_DX10 (0x06) -#define GPU7_CF_INST_LOOP_START_NO_AL (0x07) // (Seen in Project Zero) +#define GPU7_CF_INST_LOOP_START_NO_AL (0x07) // (Seen in Project Zero, Injustice: Gods Among Us) #define GPU7_CF_INST_LOOP_BREAK (0x09) #define GPU7_CF_INST_JUMP (0x0A) diff --git a/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp b/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp index ebf425bc..86035973 100644 --- a/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShaderCache.cpp @@ -25,6 +25,9 @@ #include "util/helpers/Serializer.h" #include +#include