diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e798c1a7..c7cbc202 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -177,9 +177,6 @@ jobs: build-macos: runs-on: macos-14 - strategy: - matrix: - arch: [x86_64, arm64] steps: - name: "Checkout repo" uses: actions/checkout@v4 @@ -239,7 +236,7 @@ jobs: cd build cmake .. ${{ env.BUILD_FLAGS }} \ -DCMAKE_BUILD_TYPE=${{ env.BUILD_MODE }} \ - -DCMAKE_OSX_ARCHITECTURES=${{ matrix.arch }} \ + -DCMAKE_OSX_ARCHITECTURES=x86_64 \ -DMACOS_BUNDLE=ON \ -G Ninja @@ -262,5 +259,5 @@ jobs: - name: Upload artifact uses: actions/upload-artifact@v4 with: - name: cemu-bin-macos-${{ matrix.arch }} + name: cemu-bin-macos-x64 path: ./bin/Cemu.dmg diff --git a/CMakeLists.txt b/CMakeLists.txt index c70b0a40..eb848ce7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -222,12 +222,7 @@ endif() add_subdirectory("dependencies/ih264d" EXCLUDE_FROM_ALL) -if (CMAKE_OSX_ARCHITECTURES) - set(CEMU_ARCHITECTURE ${CMAKE_OSX_ARCHITECTURES}) -else() - set(CEMU_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR}) -endif() -if(CEMU_ARCHITECTURE MATCHES "(aarch64)|(AARCH64)|(arm64)|(ARM64)") +if(CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(AARCH64)") add_subdirectory("dependencies/xbyak_aarch64" EXCLUDE_FROM_ALL) endif() @@ -236,4 +231,4 @@ if (NOT ZArchive_FOUND) add_subdirectory("dependencies/ZArchive" EXCLUDE_FROM_ALL) endif() -add_subdirectory(src) \ No newline at end of file +add_subdirectory(src) diff --git a/dependencies/ih264d/CMakeLists.txt b/dependencies/ih264d/CMakeLists.txt index 64ac0931..686a9d08 100644 --- a/dependencies/ih264d/CMakeLists.txt +++ b/dependencies/ih264d/CMakeLists.txt @@ -183,9 +183,6 @@ target_sources(ih264d PRIVATE "decoder/arm/ih264d_function_selector.c" ) target_compile_options(ih264d PRIVATE -DARMV8) -if(APPLE) - target_sources(ih264d PRIVATE "common/armv8/macos_arm_symbol_aliases.s") -endif() else() message(FATAL_ERROR "ih264d unknown architecture: ${IH264D_ARCHITECTURE}") endif() diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s index c0d9cf99..39c02560 100644 --- a/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s +++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_chroma_av8.s @@ -429,13 +429,8 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8: rev64 v7.4h, v2.4h ld1 {v3.2s}, [x10] sub x5, x3, #8 -#ifdef __APPLE__ - adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGE - ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGEOFF] -#else adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs1 ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs1] -#endif usubl v10.8h, v5.8b, v1.8b ld1 {v8.8b, v9.8b}, [x12] // Load multiplication factors 1 to 8 into D3 mov v8.d[1], v9.d[0] @@ -489,13 +484,10 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8: zip1 v1.8h, v0.8h, v2.8h zip2 v2.8h, v0.8h, v2.8h mov v0.16b, v1.16b -#ifdef __APPLE__ - adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGE - ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGEOFF] -#else + adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs2 ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs2] -#endif + ld1 {v8.2s, v9.2s}, [x12] mov v8.d[1], v9.d[0] mov v10.16b, v8.16b diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s index 2422d8cd..fa19c121 100644 --- a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s +++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_16x16_av8.s @@ -431,13 +431,10 @@ ih264_intra_pred_luma_16x16_mode_plane_av8: mov x10, x1 //top_left mov x4, #-1 ld1 {v2.2s}, [x1], x8 -#ifdef __APPLE__ - adrp x7, _ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGE - ldr x7, [x7, _ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGEOFF] -#else + adrp x7, :got:ih264_gai1_intrapred_luma_plane_coeffs ldr x7, [x7, #:got_lo12:ih264_gai1_intrapred_luma_plane_coeffs] -#endif + ld1 {v0.2s}, [x1] rev64 v2.8b, v2.8b ld1 {v6.2s, v7.2s}, [x7] diff --git a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s index 6fa31ded..273aa81b 100644 --- a/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s +++ b/dependencies/ih264d/common/armv8/ih264_intra_pred_luma_8x8_av8.s @@ -1029,13 +1029,9 @@ ih264_intra_pred_luma_8x8_mode_horz_u_av8: mov v3.d[0], v2.d[1] ext v4.16b, v2.16b , v2.16b , #1 mov v5.d[0], v4.d[1] -#ifdef __APPLE__ - adrp x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGE - ldr x12, [x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGEOFF] -#else + adrp x12, :got:ih264_gai1_intrapred_luma_8x8_horz_u ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_luma_8x8_horz_u] -#endif uaddl v20.8h, v0.8b, v2.8b uaddl v22.8h, v1.8b, v3.8b uaddl v24.8h, v2.8b, v4.8b diff --git a/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s b/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s index 8d6aa995..475f690e 100644 --- a/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s +++ b/dependencies/ih264d/common/armv8/ih264_weighted_bi_pred_av8.s @@ -142,22 +142,14 @@ ih264_weighted_bi_pred_luma_av8: sxtw x4, w4 sxtw x5, w5 stp x19, x20, [sp, #-16]! -#ifndef __APPLE__ ldr w8, [sp, #80] //Load wt2 in w8 ldr w9, [sp, #88] //Load ofst1 in w9 - ldr w10, [sp, #96] //Load ofst2 in w10 - ldr w11, [sp, #104] //Load ht in w11 - ldr w12, [sp, #112] //Load wd in w12 -#else - ldr w8, [sp, #80] //Load wt2 in w8 - ldr w9, [sp, #84] //Load ofst1 in w9 - ldr w10, [sp, #88] //Load ofst2 in w10 - ldr w11, [sp, #92] //Load ht in w11 - ldr w12, [sp, #96] //Load wd in w12 -#endif add w6, w6, #1 //w6 = log_WD + 1 neg w10, w6 //w10 = -(log_WD + 1) dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit) + ldr w10, [sp, #96] //Load ofst2 in w10 + ldr w11, [sp, #104] //Load ht in w11 + ldr w12, [sp, #112] //Load wd in w12 add w9, w9, #1 //w9 = ofst1 + 1 add w9, w9, w10 //w9 = ofst1 + ofst2 + 1 mov v2.s[0], w7 @@ -432,24 +424,17 @@ ih264_weighted_bi_pred_chroma_av8: sxtw x5, w5 stp x19, x20, [sp, #-16]! -#ifndef __APPLE__ + ldr w8, [sp, #80] //Load wt2 in w8 - ldr w9, [sp, #88] //Load ofst1 in w9 - ldr w10, [sp, #96] //Load ofst2 in w10 - ldr w11, [sp, #104] //Load ht in w11 - ldr w12, [sp, #112] //Load wd in w12 -#else - ldr w8, [sp, #80] //Load wt2 in w8 - ldr w9, [sp, #84] //Load ofst1 in w9 - ldr w10, [sp, #88] //Load ofst2 in w10 - ldr w11, [sp, #92] //Load ht in w11 - ldr w12, [sp, #96] //Load wd in w12 -#endif dup v4.4s, w8 //Q2 = (wt2_u, wt2_v) (32-bit) dup v2.4s, w7 //Q1 = (wt1_u, wt1_v) (32-bit) add w6, w6, #1 //w6 = log_WD + 1 + ldr w9, [sp, #88] //Load ofst1 in w9 + ldr w10, [sp, #96] //Load ofst2 in w10 neg w20, w6 //w20 = -(log_WD + 1) dup v0.8h, w20 //Q0 = -(log_WD + 1) (16-bit) + ldr w11, [sp, #104] //Load ht in x11 + ldr w12, [sp, #112] //Load wd in x12 dup v20.8h, w9 //0ffset1 dup v21.8h, w10 //0ffset2 srhadd v6.8b, v20.8b, v21.8b diff --git a/dependencies/ih264d/common/armv8/macos_arm_symbol_aliases.s b/dependencies/ih264d/common/armv8/macos_arm_symbol_aliases.s deleted file mode 100644 index 3639f1b3..00000000 --- a/dependencies/ih264d/common/armv8/macos_arm_symbol_aliases.s +++ /dev/null @@ -1,185 +0,0 @@ -// macOS clang compilers append preceding underscores to function names, this is to prevent -// mismatches with the assembly function names and the C functions as defined in the header. - -.global _ih264_deblk_chroma_horz_bs4_av8 -_ih264_deblk_chroma_horz_bs4_av8 = ih264_deblk_chroma_horz_bs4_av8 - -.global _ih264_deblk_chroma_horz_bslt4_av8 -_ih264_deblk_chroma_horz_bslt4_av8 = ih264_deblk_chroma_horz_bslt4_av8 - -.global _ih264_deblk_chroma_vert_bs4_av8 -_ih264_deblk_chroma_vert_bs4_av8 = ih264_deblk_chroma_vert_bs4_av8 - -.global _ih264_deblk_chroma_vert_bslt4_av8 -_ih264_deblk_chroma_vert_bslt4_av8 = ih264_deblk_chroma_vert_bslt4_av8 - -.global _ih264_deblk_luma_horz_bs4_av8 -_ih264_deblk_luma_horz_bs4_av8 = ih264_deblk_luma_horz_bs4_av8 - -.global _ih264_deblk_luma_horz_bslt4_av8 -_ih264_deblk_luma_horz_bslt4_av8 = ih264_deblk_luma_horz_bslt4_av8 - -.global _ih264_deblk_luma_vert_bs4_av8 -_ih264_deblk_luma_vert_bs4_av8 = ih264_deblk_luma_vert_bs4_av8 - -.global _ih264_deblk_luma_vert_bslt4_av8 -_ih264_deblk_luma_vert_bslt4_av8 = ih264_deblk_luma_vert_bslt4_av8 - -.global _ih264_default_weighted_pred_chroma_av8 -_ih264_default_weighted_pred_chroma_av8 = ih264_default_weighted_pred_chroma_av8 - -.global _ih264_default_weighted_pred_luma_av8 -_ih264_default_weighted_pred_luma_av8 = ih264_default_weighted_pred_luma_av8 - -.global _ih264_ihadamard_scaling_4x4_av8 -_ih264_ihadamard_scaling_4x4_av8 = ih264_ihadamard_scaling_4x4_av8 - -.global _ih264_inter_pred_chroma_av8 -_ih264_inter_pred_chroma_av8 = ih264_inter_pred_chroma_av8 - -.global _ih264_inter_pred_luma_copy_av8 -_ih264_inter_pred_luma_copy_av8 = ih264_inter_pred_luma_copy_av8 - -.global _ih264_inter_pred_luma_horz_av8 -_ih264_inter_pred_luma_horz_av8 = ih264_inter_pred_luma_horz_av8 - -.global _ih264_inter_pred_luma_horz_hpel_vert_hpel_av8 -_ih264_inter_pred_luma_horz_hpel_vert_hpel_av8 = ih264_inter_pred_luma_horz_hpel_vert_hpel_av8 - -.global _ih264_inter_pred_luma_horz_hpel_vert_qpel_av8 -_ih264_inter_pred_luma_horz_hpel_vert_qpel_av8 = ih264_inter_pred_luma_horz_hpel_vert_qpel_av8 - -.global _ih264_inter_pred_luma_horz_qpel_av8 -_ih264_inter_pred_luma_horz_qpel_av8 = ih264_inter_pred_luma_horz_qpel_av8 - -.global _ih264_inter_pred_luma_horz_qpel_vert_hpel_av8 -_ih264_inter_pred_luma_horz_qpel_vert_hpel_av8 = ih264_inter_pred_luma_horz_qpel_vert_hpel_av8 - -.global _ih264_inter_pred_luma_horz_qpel_vert_qpel_av8 -_ih264_inter_pred_luma_horz_qpel_vert_qpel_av8 = ih264_inter_pred_luma_horz_qpel_vert_qpel_av8 - -.global _ih264_inter_pred_luma_vert_av8 -_ih264_inter_pred_luma_vert_av8 = ih264_inter_pred_luma_vert_av8 - -.global _ih264_inter_pred_luma_vert_qpel_av8 -_ih264_inter_pred_luma_vert_qpel_av8 = ih264_inter_pred_luma_vert_qpel_av8 - -.global _ih264_intra_pred_chroma_8x8_mode_horz_av8 -_ih264_intra_pred_chroma_8x8_mode_horz_av8 = ih264_intra_pred_chroma_8x8_mode_horz_av8 - -.global _ih264_intra_pred_chroma_8x8_mode_plane_av8 -_ih264_intra_pred_chroma_8x8_mode_plane_av8 = ih264_intra_pred_chroma_8x8_mode_plane_av8 - -.global _ih264_intra_pred_chroma_8x8_mode_vert_av8 -_ih264_intra_pred_chroma_8x8_mode_vert_av8 = ih264_intra_pred_chroma_8x8_mode_vert_av8 - -.global _ih264_intra_pred_luma_16x16_mode_dc_av8 -_ih264_intra_pred_luma_16x16_mode_dc_av8 = ih264_intra_pred_luma_16x16_mode_dc_av8 - -.global _ih264_intra_pred_luma_16x16_mode_horz_av8 -_ih264_intra_pred_luma_16x16_mode_horz_av8 = ih264_intra_pred_luma_16x16_mode_horz_av8 - -.global _ih264_intra_pred_luma_16x16_mode_plane_av8 -_ih264_intra_pred_luma_16x16_mode_plane_av8 = ih264_intra_pred_luma_16x16_mode_plane_av8 - -.global _ih264_intra_pred_luma_16x16_mode_vert_av8 -_ih264_intra_pred_luma_16x16_mode_vert_av8 = ih264_intra_pred_luma_16x16_mode_vert_av8 - -.global _ih264_intra_pred_luma_4x4_mode_dc_av8 -_ih264_intra_pred_luma_4x4_mode_dc_av8 = ih264_intra_pred_luma_4x4_mode_dc_av8 - -.global _ih264_intra_pred_luma_4x4_mode_diag_dl_av8 -_ih264_intra_pred_luma_4x4_mode_diag_dl_av8 = ih264_intra_pred_luma_4x4_mode_diag_dl_av8 - -.global _ih264_intra_pred_luma_4x4_mode_diag_dr_av8 -_ih264_intra_pred_luma_4x4_mode_diag_dr_av8 = ih264_intra_pred_luma_4x4_mode_diag_dr_av8 - -.global _ih264_intra_pred_luma_4x4_mode_horz_av8 -_ih264_intra_pred_luma_4x4_mode_horz_av8 = ih264_intra_pred_luma_4x4_mode_horz_av8 - -.global _ih264_intra_pred_luma_4x4_mode_horz_d_av8 -_ih264_intra_pred_luma_4x4_mode_horz_d_av8 = ih264_intra_pred_luma_4x4_mode_horz_d_av8 - -.global _ih264_intra_pred_luma_4x4_mode_horz_u_av8 -_ih264_intra_pred_luma_4x4_mode_horz_u_av8 = ih264_intra_pred_luma_4x4_mode_horz_u_av8 - -.global _ih264_intra_pred_luma_4x4_mode_vert_av8 -_ih264_intra_pred_luma_4x4_mode_vert_av8 = ih264_intra_pred_luma_4x4_mode_vert_av8 - -.global _ih264_intra_pred_luma_4x4_mode_vert_l_av8 -_ih264_intra_pred_luma_4x4_mode_vert_l_av8 = ih264_intra_pred_luma_4x4_mode_vert_l_av8 - -.global _ih264_intra_pred_luma_4x4_mode_vert_r_av8 -_ih264_intra_pred_luma_4x4_mode_vert_r_av8 = ih264_intra_pred_luma_4x4_mode_vert_r_av8 - -.global _ih264_intra_pred_luma_8x8_mode_dc_av8 -_ih264_intra_pred_luma_8x8_mode_dc_av8 = ih264_intra_pred_luma_8x8_mode_dc_av8 - -.global _ih264_intra_pred_luma_8x8_mode_diag_dl_av8 -_ih264_intra_pred_luma_8x8_mode_diag_dl_av8 = ih264_intra_pred_luma_8x8_mode_diag_dl_av8 - -.global _ih264_intra_pred_luma_8x8_mode_diag_dr_av8 -_ih264_intra_pred_luma_8x8_mode_diag_dr_av8 = ih264_intra_pred_luma_8x8_mode_diag_dr_av8 - -.global _ih264_intra_pred_luma_8x8_mode_horz_av8 -_ih264_intra_pred_luma_8x8_mode_horz_av8 = ih264_intra_pred_luma_8x8_mode_horz_av8 - -.global _ih264_intra_pred_luma_8x8_mode_horz_d_av8 -_ih264_intra_pred_luma_8x8_mode_horz_d_av8 = ih264_intra_pred_luma_8x8_mode_horz_d_av8 - -.global _ih264_intra_pred_luma_8x8_mode_horz_u_av8 -_ih264_intra_pred_luma_8x8_mode_horz_u_av8 = ih264_intra_pred_luma_8x8_mode_horz_u_av8 - -.global _ih264_intra_pred_luma_8x8_mode_vert_av8 -_ih264_intra_pred_luma_8x8_mode_vert_av8 = ih264_intra_pred_luma_8x8_mode_vert_av8 - -.global _ih264_intra_pred_luma_8x8_mode_vert_l_av8 -_ih264_intra_pred_luma_8x8_mode_vert_l_av8 = ih264_intra_pred_luma_8x8_mode_vert_l_av8 - -.global _ih264_intra_pred_luma_8x8_mode_vert_r_av8 -_ih264_intra_pred_luma_8x8_mode_vert_r_av8 = ih264_intra_pred_luma_8x8_mode_vert_r_av8 - -.global _ih264_iquant_itrans_recon_4x4_av8 -_ih264_iquant_itrans_recon_4x4_av8 = ih264_iquant_itrans_recon_4x4_av8 - -.global _ih264_iquant_itrans_recon_4x4_dc_av8 -_ih264_iquant_itrans_recon_4x4_dc_av8 = ih264_iquant_itrans_recon_4x4_dc_av8 - -.global _ih264_iquant_itrans_recon_8x8_av8 -_ih264_iquant_itrans_recon_8x8_av8 = ih264_iquant_itrans_recon_8x8_av8 - -.global _ih264_iquant_itrans_recon_8x8_dc_av8 -_ih264_iquant_itrans_recon_8x8_dc_av8 = ih264_iquant_itrans_recon_8x8_dc_av8 - -.global _ih264_iquant_itrans_recon_chroma_4x4_av8 -_ih264_iquant_itrans_recon_chroma_4x4_av8 = ih264_iquant_itrans_recon_chroma_4x4_av8 - -.global _ih264_iquant_itrans_recon_chroma_4x4_dc_av8 -_ih264_iquant_itrans_recon_chroma_4x4_dc_av8 = ih264_iquant_itrans_recon_chroma_4x4_dc_av8 - -.global _ih264_pad_left_chroma_av8 -_ih264_pad_left_chroma_av8 = ih264_pad_left_chroma_av8 - -.global _ih264_pad_left_luma_av8 -_ih264_pad_left_luma_av8 = ih264_pad_left_luma_av8 - -.global _ih264_pad_right_chroma_av8 -_ih264_pad_right_chroma_av8 = ih264_pad_right_chroma_av8 - -.global _ih264_pad_right_luma_av8 -_ih264_pad_right_luma_av8 = ih264_pad_right_luma_av8 - -.global _ih264_pad_top_av8 -_ih264_pad_top_av8 = ih264_pad_top_av8 - -.global _ih264_weighted_bi_pred_chroma_av8 -_ih264_weighted_bi_pred_chroma_av8 = ih264_weighted_bi_pred_chroma_av8 - -.global _ih264_weighted_bi_pred_luma_av8 -_ih264_weighted_bi_pred_luma_av8 = ih264_weighted_bi_pred_luma_av8 - -.global _ih264_weighted_pred_chroma_av8 -_ih264_weighted_pred_chroma_av8 = ih264_weighted_pred_chroma_av8 - -.global _ih264_weighted_pred_luma_av8 -_ih264_weighted_pred_luma_av8 = ih264_weighted_pred_luma_av8 \ No newline at end of file diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 04b6dfdd..ee7f8610 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -101,21 +101,13 @@ if (MACOS_BUNDLE) endforeach(folder) if(CMAKE_BUILD_TYPE STREQUAL "Debug") - set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/debug/lib/libusb-1.0.0.dylib") + set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/x64-osx/debug/lib/libusb-1.0.0.dylib") else() - set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib/libusb-1.0.0.dylib") + set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/x64-osx/lib/libusb-1.0.0.dylib") endif() - if (EXISTS "/usr/local/lib/libMoltenVK.dylib") - set(MOLTENVK_PATH "/usr/local/lib/libMoltenVK.dylib") - elseif (EXISTS "/opt/homebrew/lib/libMoltenVK.dylib") - set(MOLTENVK_PATH "/opt/homebrew/lib/libMoltenVK.dylib") - else() - message(FATAL_ERROR "failed to find libMoltenVK.dylib") - endif () - add_custom_command (TARGET CemuBin POST_BUILD - COMMAND ${CMAKE_COMMAND} ARGS -E copy "${MOLTENVK_PATH}" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libMoltenVK.dylib" + COMMAND ${CMAKE_COMMAND} ARGS -E copy "/usr/local/lib/libMoltenVK.dylib" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libMoltenVK.dylib" COMMAND ${CMAKE_COMMAND} ARGS -E copy "${LIBUSB_PATH}" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libusb-1.0.0.dylib" COMMAND ${CMAKE_COMMAND} ARGS -E copy "${CMAKE_SOURCE_DIR}/src/resource/update.sh" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/update.sh" COMMAND bash -c "install_name_tool -add_rpath @executable_path/../Frameworks ${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/${OUTPUT_NAME}" diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 64baa337..71866b21 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -537,7 +537,7 @@ if(APPLE) target_sources(CemuCafe PRIVATE "HW/Latte/Renderer/Vulkan/CocoaSurface.mm") endif() -if(CEMU_ARCHITECTURE MATCHES "(aarch64)|(AARCH64)|(arm64)|(ARM64)") +if(CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(AARCH64)") target_sources(CemuCafe PRIVATE HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h diff --git a/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp b/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp index 728460a4..cb71234d 100644 --- a/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp +++ b/src/Cafe/HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp @@ -169,10 +169,8 @@ struct AArch64GenContext_t : CodeGenerator bool processAllJumps() { - for (auto jump : jumps) + for (auto&& [jumpStart, jumpInfo] : jumps) { - auto jumpStart = jump.first; - auto jumpInfo = jump.second; bool success = std::visit( [&, this](const auto& jump) { setSize(jumpStart); diff --git a/src/Cafe/HW/Latte/Core/LatteIndices.cpp b/src/Cafe/HW/Latte/Core/LatteIndices.cpp index 2bbb617d..aec51725 100644 --- a/src/Cafe/HW/Latte/Core/LatteIndices.cpp +++ b/src/Cafe/HW/Latte/Core/LatteIndices.cpp @@ -6,8 +6,6 @@ #if defined(ARCH_X86_64) && defined(__GNUC__) #include -#elif defined(__aarch64__) -#include #endif struct @@ -504,114 +502,6 @@ void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDat indexMax = std::max(indexMax, _maxIndex); indexMin = std::min(indexMin, _minIndex); } -#elif defined(__aarch64__) - -void LatteIndices_fastConvertU16_NEON(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) -{ - const uint16* indicesU16BE = (const uint16*)indexDataInput; - uint16* indexOutput = (uint16*)indexDataOutput; - sint32 count8 = count >> 3; - sint32 countRemaining = count & 7; - - if (count8) - { - uint16x8_t mMin = vdupq_n_u16(0xFFFF); - uint16x8_t mMax = vdupq_n_u16(0x0000); - uint16x8_t mTemp; - uint16x8_t* mRawIndices = (uint16x8_t*) indicesU16BE; - indicesU16BE += count8 * 8; - uint16x8_t* mOutputIndices = (uint16x8_t*) indexOutput; - indexOutput += count8 * 8; - - while (count8--) - { - mTemp = vld1q_u16((uint16*)mRawIndices); - mRawIndices++; - mTemp = vrev16q_u8(mTemp); - mMin = vminq_u16(mMin, mTemp); - mMax = vmaxq_u16(mMax, mTemp); - vst1q_u16((uint16*)mOutputIndices, mTemp); - mOutputIndices++; - } - - uint16* mMaxU16 = (uint16*)&mMax; - uint16* mMinU16 = (uint16*)&mMin; - - for (int i = 0; i < 8; ++i) { - indexMax = std::max(indexMax, (uint32)mMaxU16[i]); - indexMin = std::min(indexMin, (uint32)mMinU16[i]); - } - } - // process remaining indices - uint32 _minIndex = 0xFFFFFFFF; - uint32 _maxIndex = 0; - for (sint32 i = countRemaining; (--i) >= 0;) - { - uint16 idx = _swapEndianU16(*indicesU16BE); - *indexOutput = idx; - indexOutput++; - indicesU16BE++; - _maxIndex = std::max(_maxIndex, (uint32)idx); - _minIndex = std::min(_minIndex, (uint32)idx); - } - // update min/max - indexMax = std::max(indexMax, _maxIndex); - indexMin = std::min(indexMin, _minIndex); -} - -void LatteIndices_fastConvertU32_NEON(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) -{ - const uint32* indicesU32BE = (const uint32*)indexDataInput; - uint32* indexOutput = (uint32*)indexDataOutput; - sint32 count8 = count >> 2; - sint32 countRemaining = count & 3; - - if (count8) - { - uint32x4_t mMin = vdupq_n_u32(0xFFFFFFFF); - uint32x4_t mMax = vdupq_n_u32(0x00000000); - uint32x4_t mTemp; - uint32x4_t* mRawIndices = (uint32x4_t*) indicesU32BE; - indicesU32BE += count8 * 4; - uint32x4_t* mOutputIndices = (uint32x4_t*) indexOutput; - indexOutput += count8 * 4; - - while (count8--) - { - mTemp = vld1q_u32((uint32*)mRawIndices); - mRawIndices++; - mTemp = vrev32q_u8(mTemp); - mMin = vminq_u32(mMin, mTemp); - mMax = vmaxq_u32(mMax, mTemp); - vst1q_u32((uint32*)mOutputIndices, mTemp); - mOutputIndices++; - } - - uint32* mMaxU32 = (uint32*)&mMax; - uint32* mMinU32 = (uint32*)&mMin; - - for (int i = 0; i < 4; ++i) { - indexMax = std::max(indexMax, mMaxU32[i]); - indexMin = std::min(indexMin, mMinU32[i]); - } - } - // process remaining indices - uint32 _minIndex = 0xFFFFFFFF; - uint32 _maxIndex = 0; - for (sint32 i = countRemaining; (--i) >= 0;) - { - uint32 idx = _swapEndianU32(*indicesU32BE); - *indexOutput = idx; - indexOutput++; - indicesU32BE++; - _maxIndex = std::max(_maxIndex, idx); - _minIndex = std::min(_minIndex, idx); - } - // update min/max - indexMax = std::max(indexMax, _maxIndex); - indexMin = std::min(indexMin, _minIndex); -} - #endif template @@ -798,31 +688,27 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 { if (indexType == LatteIndexType::U16_BE) { -#if defined(ARCH_X86_64) + #if defined(ARCH_X86_64) if (g_CPUFeatures.x86.avx2) LatteIndices_fastConvertU16_AVX2(indexData, indexOutputPtr, count, indexMin, indexMax); else if (g_CPUFeatures.x86.sse4_1 && g_CPUFeatures.x86.ssse3) LatteIndices_fastConvertU16_SSE41(indexData, indexOutputPtr, count, indexMin, indexMax); else LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); -#elif defined(__aarch64__) - LatteIndices_fastConvertU16_NEON(indexData, indexOutputPtr, count, indexMin, indexMax); -#else + #else LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); -#endif + #endif } else if (indexType == LatteIndexType::U32_BE) { -#if defined(ARCH_X86_64) + #if defined(ARCH_X86_64) if (g_CPUFeatures.x86.avx2) LatteIndices_fastConvertU32_AVX2(indexData, indexOutputPtr, count, indexMin, indexMax); else LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); -#elif defined(__aarch64__) - LatteIndices_fastConvertU32_NEON(indexData, indexOutputPtr, count, indexMin, indexMax); -#else + #else LatteIndices_convertBE(indexData, indexOutputPtr, count, indexMin, indexMax); -#endif + #endif } else if (indexType == LatteIndexType::U16_LE) { diff --git a/src/Cafe/IOSU/legacy/iosu_fpd.cpp b/src/Cafe/IOSU/legacy/iosu_fpd.cpp index a667e61c..28d248ae 100644 --- a/src/Cafe/IOSU/legacy/iosu_fpd.cpp +++ b/src/Cafe/IOSU/legacy/iosu_fpd.cpp @@ -132,7 +132,7 @@ namespace iosu void convertMultiByteStringToBigEndianWidechar(const char* input, uint16be* output, sint32 maxOutputLength) { - std::vector beStr = StringHelpers::FromUtf8(input); + std::basic_string beStr = StringHelpers::FromUtf8(input); if (beStr.size() >= maxOutputLength - 1) beStr.resize(maxOutputLength-1); for (size_t i = 0; i < beStr.size(); i++) @@ -723,7 +723,7 @@ namespace iosu { if(numVecIn != 0 || numVecOut != 1) return FPResult_InvalidIPCParam; - std::vector myComment; + std::basic_string myComment; if(g_fpd.nexFriendSession) { if(vecOut->size != MY_COMMENT_LENGTH * sizeof(uint16be)) @@ -735,8 +735,8 @@ namespace iosu g_fpd.nexFriendSession->getMyComment(myNexComment); myComment = StringHelpers::FromUtf8(myNexComment.commentString); } - myComment.insert(myComment.begin(), '\0'); - memcpy(vecOut->basePhys.GetPtr(), myComment.data(), MY_COMMENT_LENGTH * sizeof(uint16be)); + myComment.insert(0, 1, '\0'); + memcpy(vecOut->basePhys.GetPtr(), myComment.c_str(), MY_COMMENT_LENGTH * sizeof(uint16be)); return FPResult_Ok; } diff --git a/src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp b/src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp index 2eef929d..870d1850 100644 --- a/src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp +++ b/src/Cafe/OS/libs/coreinit/coreinit_Thread.cpp @@ -25,11 +25,7 @@ void nnNfp_update(); namespace coreinit { -#ifdef __arm64__ - void __OSFiberThreadEntry(uint32, uint32); -#else void __OSFiberThreadEntry(void* thread); -#endif void __OSAddReadyThreadToRunQueue(OSThread_t* thread); void __OSRemoveThreadFromRunQueues(OSThread_t* thread); }; @@ -53,7 +49,7 @@ namespace coreinit struct OSHostThread { - OSHostThread(OSThread_t* thread) : m_thread(thread), m_fiber((void(*)(void*))__OSFiberThreadEntry, this, this) + OSHostThread(OSThread_t* thread) : m_thread(thread), m_fiber(__OSFiberThreadEntry, this, this) { } @@ -1308,14 +1304,8 @@ namespace coreinit __OSThreadStartTimeslice(hostThread->m_thread, &hostThread->ppcInstance); } -#ifdef __arm64__ - void __OSFiberThreadEntry(uint32 _high, uint32 _low) - { - uint64 _thread = (uint64) _high << 32 | _low; -#else void __OSFiberThreadEntry(void* _thread) { -#endif OSHostThread* hostThread = (OSHostThread*)_thread; #if defined(ARCH_X86_64) diff --git a/src/Cafe/OS/libs/nn_olv/nn_olv_DownloadCommunityTypes.cpp b/src/Cafe/OS/libs/nn_olv/nn_olv_DownloadCommunityTypes.cpp index 6e7632e9..db1885af 100644 --- a/src/Cafe/OS/libs/nn_olv/nn_olv_DownloadCommunityTypes.cpp +++ b/src/Cafe/OS/libs/nn_olv/nn_olv_DownloadCommunityTypes.cpp @@ -145,8 +145,7 @@ namespace nn if (name.size() != 0) { - auto name_utf16 = StringHelpers::FromUtf8(name); - name_utf16.resize(std::min(name_utf16.size(), 128)); + auto name_utf16 = StringHelpers::FromUtf8(name).substr(0, 128); if (name_utf16.size() != 0) { for (int i = 0; i < name_utf16.size(); i++) @@ -161,8 +160,7 @@ namespace nn if (description.size() != 0) { - auto description_utf16 = StringHelpers::FromUtf8(description); - description_utf16.resize(std::min(description_utf16.size(), 256)); + auto description_utf16 = StringHelpers::FromUtf8(description).substr(0, 256); if (description_utf16.size() != 0) { for (int i = 0; i < description_utf16.size(); i++) @@ -208,8 +206,7 @@ namespace nn if (screen_name.size() != 0) { - auto screen_name_utf16 = StringHelpers::FromUtf8(screen_name); - screen_name_utf16.resize(std::min(screen_name_utf16.size(), 32)); + auto screen_name_utf16 = StringHelpers::FromUtf8(screen_name).substr(0, 32); if (screen_name_utf16.size() != 0) { for (int i = 0; i < screen_name_utf16.size(); i++) diff --git a/src/Cafe/OS/libs/nn_olv/nn_olv_UploadCommunityTypes.cpp b/src/Cafe/OS/libs/nn_olv/nn_olv_UploadCommunityTypes.cpp index 21952ceb..6f3c43b9 100644 --- a/src/Cafe/OS/libs/nn_olv/nn_olv_UploadCommunityTypes.cpp +++ b/src/Cafe/OS/libs/nn_olv/nn_olv_UploadCommunityTypes.cpp @@ -250,8 +250,7 @@ namespace nn if (name.size() != 0) { - auto name_utf16 = StringHelpers::FromUtf8(name); - name_utf16.resize(std::min(name_utf16.size(), 128)); + auto name_utf16 = StringHelpers::FromUtf8(name).substr(0, 128); if (name_utf16.size() != 0) { for (int i = 0; i < name_utf16.size(); i++) @@ -266,8 +265,7 @@ namespace nn if (description.size() != 0) { - auto description_utf16 = StringHelpers::FromUtf8(description); - description_utf16.resize(std::min(description_utf16.size(), 256)); + auto description_utf16 = StringHelpers::FromUtf8(description).substr(0, 256); if (description_utf16.size() != 0) { for (int i = 0; i < description_utf16.size(); i++) diff --git a/src/Cafe/OS/libs/nn_olv/nn_olv_UploadFavoriteTypes.cpp b/src/Cafe/OS/libs/nn_olv/nn_olv_UploadFavoriteTypes.cpp index 912e7a11..1e2d40ab 100644 --- a/src/Cafe/OS/libs/nn_olv/nn_olv_UploadFavoriteTypes.cpp +++ b/src/Cafe/OS/libs/nn_olv/nn_olv_UploadFavoriteTypes.cpp @@ -1,6 +1,5 @@ #include "nn_olv_UploadFavoriteTypes.h" #include -#include namespace nn { @@ -116,8 +115,7 @@ namespace nn if (name.size() != 0) { - auto name_utf16 = StringHelpers::FromUtf8(name); - name_utf16.resize(std::min(name_utf16.size(), 128)); + auto name_utf16 = StringHelpers::FromUtf8(name).substr(0, 128); if (name_utf16.size() != 0) { for (int i = 0; i < name_utf16.size(); i++) @@ -132,8 +130,7 @@ namespace nn if (description.size() != 0) { - auto description_utf16 = StringHelpers::FromUtf8(description); - description_utf16.resize(std::min(description_utf16.size(), 256)); + auto description_utf16 = StringHelpers::FromUtf8(description).substr(0, 256); if (description_utf16.size() != 0) { for (int i = 0; i < description_utf16.size(); i++) diff --git a/src/Common/CafeString.h b/src/Common/CafeString.h index 57fc72da..d902d721 100644 --- a/src/Common/CafeString.h +++ b/src/Common/CafeString.h @@ -51,15 +51,15 @@ class CafeWideString // fixed buffer size, null-terminated, PPC wchar_t (16bit b bool assignFromUTF8(std::string_view sv) { - std::vector beStr = StringHelpers::FromUtf8(sv); - if(beStr.size() > N-1) + std::basic_string beStr = StringHelpers::FromUtf8(sv); + if(beStr.length() > N-1) { memcpy(data, beStr.data(), (N-1)*sizeof(uint16be)); data[N-1] = 0; return false; } - memcpy(data, beStr.data(), beStr.size()*sizeof(uint16be)); - data[beStr.size()] = '\0'; + memcpy(data, beStr.data(), beStr.length()*sizeof(uint16be)); + data[beStr.length()] = '\0'; return true; } diff --git a/src/Common/precompiled.h b/src/Common/precompiled.h index 26fdfd28..1185a34b 100644 --- a/src/Common/precompiled.h +++ b/src/Common/precompiled.h @@ -310,8 +310,7 @@ inline uint64 __rdtsc() inline void _mm_mfence() { - asm volatile("" ::: "memory"); - std::atomic_thread_fence(std::memory_order_seq_cst); + } inline unsigned char _addcarry_u64(unsigned char carry, unsigned long long a, unsigned long long b, unsigned long long *result) diff --git a/src/gui/MainWindow.cpp b/src/gui/MainWindow.cpp index c3d800e0..882c6eab 100644 --- a/src/gui/MainWindow.cpp +++ b/src/gui/MainWindow.cpp @@ -140,7 +140,6 @@ enum MAINFRAME_MENU_ID_DEBUG_VK_ACCURATE_BARRIERS, // debug->logging - MAINFRAME_MENU_ID_DEBUG_LOGGING_MESSAGE = 21499, MAINFRAME_MENU_ID_DEBUG_LOGGING0 = 21500, MAINFRAME_MENU_ID_DEBUG_ADVANCED_PPC_INFO = 21599, // debug->dump @@ -2235,7 +2234,7 @@ void MainWindow::RecreateMenu() debugLoggingMenu->AppendSeparator(); wxMenu* logCosModulesMenu = new wxMenu(); - logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING_MESSAGE, _("&Options below are for experts. Leave off if unsure"), wxEmptyString)->Enable(false); + logCosModulesMenu->AppendCheckItem(0, _("&Options below are for experts. Leave off if unsure"), wxEmptyString)->Enable(false); logCosModulesMenu->AppendSeparator(); logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING0 + stdx::to_underlying(LogType::CoreinitFile), _("coreinit File-Access API"), wxEmptyString)->Check(cemuLog_isLoggingEnabled(LogType::CoreinitFile)); logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING0 + stdx::to_underlying(LogType::CoreinitThreadSync), _("coreinit Thread-Synchronization API"), wxEmptyString)->Check(cemuLog_isLoggingEnabled(LogType::CoreinitThreadSync)); diff --git a/src/util/Fiber/FiberUnix.cpp b/src/util/Fiber/FiberUnix.cpp index 36430449..0d527069 100644 --- a/src/util/Fiber/FiberUnix.cpp +++ b/src/util/Fiber/FiberUnix.cpp @@ -15,12 +15,7 @@ Fiber::Fiber(void(*FiberEntryPoint)(void* userParam), void* userParam, void* pri ctx->uc_stack.ss_sp = m_stackPtr; ctx->uc_stack.ss_size = stackSize; ctx->uc_link = &ctx[0]; -#ifdef __arm64__ - // https://www.man7.org/linux/man-pages/man3/makecontext.3.html#NOTES - makecontext(ctx, (void(*)())FiberEntryPoint, 2, (uint64) userParam >> 32, userParam); -#else makecontext(ctx, (void(*)())FiberEntryPoint, 1, userParam); -#endif this->m_implData = (void*)ctx; } diff --git a/src/util/MemMapper/MemMapperUnix.cpp b/src/util/MemMapper/MemMapperUnix.cpp index 8e800e53..0ade291d 100644 --- a/src/util/MemMapper/MemMapperUnix.cpp +++ b/src/util/MemMapper/MemMapperUnix.cpp @@ -45,11 +45,7 @@ namespace MemMapper void* r; if(fromReservation) { - uint64 page_size = sysconf(_SC_PAGESIZE); - void* page = baseAddr; - if ( (uint64) baseAddr % page_size != 0 ) - page = (void*) ((uint64)baseAddr & ~(page_size - 1)); - if( mprotect(page, size, GetProt(permissionFlags)) == 0 ) + if( mprotect(baseAddr, size, GetProt(permissionFlags)) == 0 ) r = baseAddr; else r = nullptr; diff --git a/src/util/helpers/StringHelpers.h b/src/util/helpers/StringHelpers.h index fb858f4d..a98344d6 100644 --- a/src/util/helpers/StringHelpers.h +++ b/src/util/helpers/StringHelpers.h @@ -111,9 +111,9 @@ namespace StringHelpers } // convert utf8 string to Wii U big-endian wchar_t string - static std::vector FromUtf8(std::string_view str) + static std::basic_string FromUtf8(std::string_view str) { - std::vector tmpStr; + std::basic_string tmpStr; std::wstring w = boost::nowide::widen(str.data(), str.size()); for (auto& c : w) tmpStr.push_back((uint16)c); diff --git a/src/util/highresolutiontimer/HighResolutionTimer.cpp b/src/util/highresolutiontimer/HighResolutionTimer.cpp index bb4a40ab..67ffa349 100644 --- a/src/util/highresolutiontimer/HighResolutionTimer.cpp +++ b/src/util/highresolutiontimer/HighResolutionTimer.cpp @@ -27,8 +27,6 @@ uint64 HighResolutionTimer::m_freq = []() -> uint64 { LARGE_INTEGER freq; QueryPerformanceFrequency(&freq); return (uint64)(freq.QuadPart); -#elif BOOST_OS_MACOS - return 1000000000; #else timespec pc; clock_getres(CLOCK_MONOTONIC_RAW, &pc);