mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-02 13:01:18 +12:00
General aarch64 improvements & Apple Silicon support (#1255)
This commit is contained in:
parent
c8ffff8f41
commit
00ff5549d9
18 changed files with 405 additions and 32 deletions
3
dependencies/ih264d/CMakeLists.txt
vendored
3
dependencies/ih264d/CMakeLists.txt
vendored
|
@ -183,6 +183,9 @@ target_sources(ih264d PRIVATE
|
|||
"decoder/arm/ih264d_function_selector.c"
|
||||
)
|
||||
target_compile_options(ih264d PRIVATE -DARMV8)
|
||||
if(APPLE)
|
||||
target_sources(ih264d PRIVATE "common/armv8/macos_arm_symbol_aliases.s")
|
||||
endif()
|
||||
else()
|
||||
message(FATAL_ERROR "ih264d unknown architecture: ${IH264D_ARCHITECTURE}")
|
||||
endif()
|
||||
|
|
|
@ -429,8 +429,13 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8:
|
|||
rev64 v7.4h, v2.4h
|
||||
ld1 {v3.2s}, [x10]
|
||||
sub x5, x3, #8
|
||||
#ifdef __APPLE__
|
||||
adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGE
|
||||
ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs1@GOTPAGEOFF]
|
||||
#else
|
||||
adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs1
|
||||
ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs1]
|
||||
#endif
|
||||
usubl v10.8h, v5.8b, v1.8b
|
||||
ld1 {v8.8b, v9.8b}, [x12] // Load multiplication factors 1 to 8 into D3
|
||||
mov v8.d[1], v9.d[0]
|
||||
|
@ -484,10 +489,13 @@ ih264_intra_pred_chroma_8x8_mode_plane_av8:
|
|||
zip1 v1.8h, v0.8h, v2.8h
|
||||
zip2 v2.8h, v0.8h, v2.8h
|
||||
mov v0.16b, v1.16b
|
||||
|
||||
#ifdef __APPLE__
|
||||
adrp x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGE
|
||||
ldr x12, [x12, _ih264_gai1_intrapred_chroma_plane_coeffs2@GOTPAGEOFF]
|
||||
#else
|
||||
adrp x12, :got:ih264_gai1_intrapred_chroma_plane_coeffs2
|
||||
ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_chroma_plane_coeffs2]
|
||||
|
||||
#endif
|
||||
ld1 {v8.2s, v9.2s}, [x12]
|
||||
mov v8.d[1], v9.d[0]
|
||||
mov v10.16b, v8.16b
|
||||
|
|
|
@ -431,10 +431,13 @@ ih264_intra_pred_luma_16x16_mode_plane_av8:
|
|||
mov x10, x1 //top_left
|
||||
mov x4, #-1
|
||||
ld1 {v2.2s}, [x1], x8
|
||||
|
||||
#ifdef __APPLE__
|
||||
adrp x7, _ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGE
|
||||
ldr x7, [x7, _ih264_gai1_intrapred_luma_plane_coeffs@GOTPAGEOFF]
|
||||
#else
|
||||
adrp x7, :got:ih264_gai1_intrapred_luma_plane_coeffs
|
||||
ldr x7, [x7, #:got_lo12:ih264_gai1_intrapred_luma_plane_coeffs]
|
||||
|
||||
#endif
|
||||
ld1 {v0.2s}, [x1]
|
||||
rev64 v2.8b, v2.8b
|
||||
ld1 {v6.2s, v7.2s}, [x7]
|
||||
|
|
|
@ -1029,9 +1029,13 @@ ih264_intra_pred_luma_8x8_mode_horz_u_av8:
|
|||
mov v3.d[0], v2.d[1]
|
||||
ext v4.16b, v2.16b , v2.16b , #1
|
||||
mov v5.d[0], v4.d[1]
|
||||
|
||||
#ifdef __APPLE__
|
||||
adrp x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGE
|
||||
ldr x12, [x12, _ih264_gai1_intrapred_luma_8x8_horz_u@GOTPAGEOFF]
|
||||
#else
|
||||
adrp x12, :got:ih264_gai1_intrapred_luma_8x8_horz_u
|
||||
ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_luma_8x8_horz_u]
|
||||
#endif
|
||||
uaddl v20.8h, v0.8b, v2.8b
|
||||
uaddl v22.8h, v1.8b, v3.8b
|
||||
uaddl v24.8h, v2.8b, v4.8b
|
||||
|
|
|
@ -142,14 +142,22 @@ ih264_weighted_bi_pred_luma_av8:
|
|||
sxtw x4, w4
|
||||
sxtw x5, w5
|
||||
stp x19, x20, [sp, #-16]!
|
||||
#ifndef __APPLE__
|
||||
ldr w8, [sp, #80] //Load wt2 in w8
|
||||
ldr w9, [sp, #88] //Load ofst1 in w9
|
||||
add w6, w6, #1 //w6 = log_WD + 1
|
||||
neg w10, w6 //w10 = -(log_WD + 1)
|
||||
dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit)
|
||||
ldr w10, [sp, #96] //Load ofst2 in w10
|
||||
ldr w11, [sp, #104] //Load ht in w11
|
||||
ldr w12, [sp, #112] //Load wd in w12
|
||||
#else
|
||||
ldr w8, [sp, #80] //Load wt2 in w8
|
||||
ldr w9, [sp, #84] //Load ofst1 in w9
|
||||
ldr w10, [sp, #88] //Load ofst2 in w10
|
||||
ldr w11, [sp, #92] //Load ht in w11
|
||||
ldr w12, [sp, #96] //Load wd in w12
|
||||
#endif
|
||||
add w6, w6, #1 //w6 = log_WD + 1
|
||||
neg w10, w6 //w10 = -(log_WD + 1)
|
||||
dup v0.8h, w10 //Q0 = -(log_WD + 1) (32-bit)
|
||||
add w9, w9, #1 //w9 = ofst1 + 1
|
||||
add w9, w9, w10 //w9 = ofst1 + ofst2 + 1
|
||||
mov v2.s[0], w7
|
||||
|
@ -424,17 +432,24 @@ ih264_weighted_bi_pred_chroma_av8:
|
|||
sxtw x5, w5
|
||||
stp x19, x20, [sp, #-16]!
|
||||
|
||||
|
||||
#ifndef __APPLE__
|
||||
ldr w8, [sp, #80] //Load wt2 in w8
|
||||
ldr w9, [sp, #88] //Load ofst1 in w9
|
||||
ldr w10, [sp, #96] //Load ofst2 in w10
|
||||
ldr w11, [sp, #104] //Load ht in w11
|
||||
ldr w12, [sp, #112] //Load wd in w12
|
||||
#else
|
||||
ldr w8, [sp, #80] //Load wt2 in w8
|
||||
ldr w9, [sp, #84] //Load ofst1 in w9
|
||||
ldr w10, [sp, #88] //Load ofst2 in w10
|
||||
ldr w11, [sp, #92] //Load ht in w11
|
||||
ldr w12, [sp, #96] //Load wd in w12
|
||||
#endif
|
||||
dup v4.4s, w8 //Q2 = (wt2_u, wt2_v) (32-bit)
|
||||
dup v2.4s, w7 //Q1 = (wt1_u, wt1_v) (32-bit)
|
||||
add w6, w6, #1 //w6 = log_WD + 1
|
||||
ldr w9, [sp, #88] //Load ofst1 in w9
|
||||
ldr w10, [sp, #96] //Load ofst2 in w10
|
||||
neg w20, w6 //w20 = -(log_WD + 1)
|
||||
dup v0.8h, w20 //Q0 = -(log_WD + 1) (16-bit)
|
||||
ldr w11, [sp, #104] //Load ht in x11
|
||||
ldr w12, [sp, #112] //Load wd in x12
|
||||
dup v20.8h, w9 //0ffset1
|
||||
dup v21.8h, w10 //0ffset2
|
||||
srhadd v6.8b, v20.8b, v21.8b
|
||||
|
|
185
dependencies/ih264d/common/armv8/macos_arm_symbol_aliases.s
vendored
Normal file
185
dependencies/ih264d/common/armv8/macos_arm_symbol_aliases.s
vendored
Normal file
|
@ -0,0 +1,185 @@
|
|||
// macOS clang compilers append preceding underscores to function names, this is to prevent
|
||||
// mismatches with the assembly function names and the C functions as defined in the header.
|
||||
|
||||
.global _ih264_deblk_chroma_horz_bs4_av8
|
||||
_ih264_deblk_chroma_horz_bs4_av8 = ih264_deblk_chroma_horz_bs4_av8
|
||||
|
||||
.global _ih264_deblk_chroma_horz_bslt4_av8
|
||||
_ih264_deblk_chroma_horz_bslt4_av8 = ih264_deblk_chroma_horz_bslt4_av8
|
||||
|
||||
.global _ih264_deblk_chroma_vert_bs4_av8
|
||||
_ih264_deblk_chroma_vert_bs4_av8 = ih264_deblk_chroma_vert_bs4_av8
|
||||
|
||||
.global _ih264_deblk_chroma_vert_bslt4_av8
|
||||
_ih264_deblk_chroma_vert_bslt4_av8 = ih264_deblk_chroma_vert_bslt4_av8
|
||||
|
||||
.global _ih264_deblk_luma_horz_bs4_av8
|
||||
_ih264_deblk_luma_horz_bs4_av8 = ih264_deblk_luma_horz_bs4_av8
|
||||
|
||||
.global _ih264_deblk_luma_horz_bslt4_av8
|
||||
_ih264_deblk_luma_horz_bslt4_av8 = ih264_deblk_luma_horz_bslt4_av8
|
||||
|
||||
.global _ih264_deblk_luma_vert_bs4_av8
|
||||
_ih264_deblk_luma_vert_bs4_av8 = ih264_deblk_luma_vert_bs4_av8
|
||||
|
||||
.global _ih264_deblk_luma_vert_bslt4_av8
|
||||
_ih264_deblk_luma_vert_bslt4_av8 = ih264_deblk_luma_vert_bslt4_av8
|
||||
|
||||
.global _ih264_default_weighted_pred_chroma_av8
|
||||
_ih264_default_weighted_pred_chroma_av8 = ih264_default_weighted_pred_chroma_av8
|
||||
|
||||
.global _ih264_default_weighted_pred_luma_av8
|
||||
_ih264_default_weighted_pred_luma_av8 = ih264_default_weighted_pred_luma_av8
|
||||
|
||||
.global _ih264_ihadamard_scaling_4x4_av8
|
||||
_ih264_ihadamard_scaling_4x4_av8 = ih264_ihadamard_scaling_4x4_av8
|
||||
|
||||
.global _ih264_inter_pred_chroma_av8
|
||||
_ih264_inter_pred_chroma_av8 = ih264_inter_pred_chroma_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_copy_av8
|
||||
_ih264_inter_pred_luma_copy_av8 = ih264_inter_pred_luma_copy_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_horz_av8
|
||||
_ih264_inter_pred_luma_horz_av8 = ih264_inter_pred_luma_horz_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_horz_hpel_vert_hpel_av8
|
||||
_ih264_inter_pred_luma_horz_hpel_vert_hpel_av8 = ih264_inter_pred_luma_horz_hpel_vert_hpel_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_horz_hpel_vert_qpel_av8
|
||||
_ih264_inter_pred_luma_horz_hpel_vert_qpel_av8 = ih264_inter_pred_luma_horz_hpel_vert_qpel_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_horz_qpel_av8
|
||||
_ih264_inter_pred_luma_horz_qpel_av8 = ih264_inter_pred_luma_horz_qpel_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_horz_qpel_vert_hpel_av8
|
||||
_ih264_inter_pred_luma_horz_qpel_vert_hpel_av8 = ih264_inter_pred_luma_horz_qpel_vert_hpel_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_horz_qpel_vert_qpel_av8
|
||||
_ih264_inter_pred_luma_horz_qpel_vert_qpel_av8 = ih264_inter_pred_luma_horz_qpel_vert_qpel_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_vert_av8
|
||||
_ih264_inter_pred_luma_vert_av8 = ih264_inter_pred_luma_vert_av8
|
||||
|
||||
.global _ih264_inter_pred_luma_vert_qpel_av8
|
||||
_ih264_inter_pred_luma_vert_qpel_av8 = ih264_inter_pred_luma_vert_qpel_av8
|
||||
|
||||
.global _ih264_intra_pred_chroma_8x8_mode_horz_av8
|
||||
_ih264_intra_pred_chroma_8x8_mode_horz_av8 = ih264_intra_pred_chroma_8x8_mode_horz_av8
|
||||
|
||||
.global _ih264_intra_pred_chroma_8x8_mode_plane_av8
|
||||
_ih264_intra_pred_chroma_8x8_mode_plane_av8 = ih264_intra_pred_chroma_8x8_mode_plane_av8
|
||||
|
||||
.global _ih264_intra_pred_chroma_8x8_mode_vert_av8
|
||||
_ih264_intra_pred_chroma_8x8_mode_vert_av8 = ih264_intra_pred_chroma_8x8_mode_vert_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_16x16_mode_dc_av8
|
||||
_ih264_intra_pred_luma_16x16_mode_dc_av8 = ih264_intra_pred_luma_16x16_mode_dc_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_16x16_mode_horz_av8
|
||||
_ih264_intra_pred_luma_16x16_mode_horz_av8 = ih264_intra_pred_luma_16x16_mode_horz_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_16x16_mode_plane_av8
|
||||
_ih264_intra_pred_luma_16x16_mode_plane_av8 = ih264_intra_pred_luma_16x16_mode_plane_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_16x16_mode_vert_av8
|
||||
_ih264_intra_pred_luma_16x16_mode_vert_av8 = ih264_intra_pred_luma_16x16_mode_vert_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_dc_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_dc_av8 = ih264_intra_pred_luma_4x4_mode_dc_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_diag_dl_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_diag_dl_av8 = ih264_intra_pred_luma_4x4_mode_diag_dl_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_diag_dr_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_diag_dr_av8 = ih264_intra_pred_luma_4x4_mode_diag_dr_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_horz_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_horz_av8 = ih264_intra_pred_luma_4x4_mode_horz_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_horz_d_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_horz_d_av8 = ih264_intra_pred_luma_4x4_mode_horz_d_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_horz_u_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_horz_u_av8 = ih264_intra_pred_luma_4x4_mode_horz_u_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_vert_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_vert_av8 = ih264_intra_pred_luma_4x4_mode_vert_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_vert_l_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_vert_l_av8 = ih264_intra_pred_luma_4x4_mode_vert_l_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_4x4_mode_vert_r_av8
|
||||
_ih264_intra_pred_luma_4x4_mode_vert_r_av8 = ih264_intra_pred_luma_4x4_mode_vert_r_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_dc_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_dc_av8 = ih264_intra_pred_luma_8x8_mode_dc_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_diag_dl_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_diag_dl_av8 = ih264_intra_pred_luma_8x8_mode_diag_dl_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_diag_dr_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_diag_dr_av8 = ih264_intra_pred_luma_8x8_mode_diag_dr_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_horz_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_horz_av8 = ih264_intra_pred_luma_8x8_mode_horz_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_horz_d_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_horz_d_av8 = ih264_intra_pred_luma_8x8_mode_horz_d_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_horz_u_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_horz_u_av8 = ih264_intra_pred_luma_8x8_mode_horz_u_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_vert_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_vert_av8 = ih264_intra_pred_luma_8x8_mode_vert_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_vert_l_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_vert_l_av8 = ih264_intra_pred_luma_8x8_mode_vert_l_av8
|
||||
|
||||
.global _ih264_intra_pred_luma_8x8_mode_vert_r_av8
|
||||
_ih264_intra_pred_luma_8x8_mode_vert_r_av8 = ih264_intra_pred_luma_8x8_mode_vert_r_av8
|
||||
|
||||
.global _ih264_iquant_itrans_recon_4x4_av8
|
||||
_ih264_iquant_itrans_recon_4x4_av8 = ih264_iquant_itrans_recon_4x4_av8
|
||||
|
||||
.global _ih264_iquant_itrans_recon_4x4_dc_av8
|
||||
_ih264_iquant_itrans_recon_4x4_dc_av8 = ih264_iquant_itrans_recon_4x4_dc_av8
|
||||
|
||||
.global _ih264_iquant_itrans_recon_8x8_av8
|
||||
_ih264_iquant_itrans_recon_8x8_av8 = ih264_iquant_itrans_recon_8x8_av8
|
||||
|
||||
.global _ih264_iquant_itrans_recon_8x8_dc_av8
|
||||
_ih264_iquant_itrans_recon_8x8_dc_av8 = ih264_iquant_itrans_recon_8x8_dc_av8
|
||||
|
||||
.global _ih264_iquant_itrans_recon_chroma_4x4_av8
|
||||
_ih264_iquant_itrans_recon_chroma_4x4_av8 = ih264_iquant_itrans_recon_chroma_4x4_av8
|
||||
|
||||
.global _ih264_iquant_itrans_recon_chroma_4x4_dc_av8
|
||||
_ih264_iquant_itrans_recon_chroma_4x4_dc_av8 = ih264_iquant_itrans_recon_chroma_4x4_dc_av8
|
||||
|
||||
.global _ih264_pad_left_chroma_av8
|
||||
_ih264_pad_left_chroma_av8 = ih264_pad_left_chroma_av8
|
||||
|
||||
.global _ih264_pad_left_luma_av8
|
||||
_ih264_pad_left_luma_av8 = ih264_pad_left_luma_av8
|
||||
|
||||
.global _ih264_pad_right_chroma_av8
|
||||
_ih264_pad_right_chroma_av8 = ih264_pad_right_chroma_av8
|
||||
|
||||
.global _ih264_pad_right_luma_av8
|
||||
_ih264_pad_right_luma_av8 = ih264_pad_right_luma_av8
|
||||
|
||||
.global _ih264_pad_top_av8
|
||||
_ih264_pad_top_av8 = ih264_pad_top_av8
|
||||
|
||||
.global _ih264_weighted_bi_pred_chroma_av8
|
||||
_ih264_weighted_bi_pred_chroma_av8 = ih264_weighted_bi_pred_chroma_av8
|
||||
|
||||
.global _ih264_weighted_bi_pred_luma_av8
|
||||
_ih264_weighted_bi_pred_luma_av8 = ih264_weighted_bi_pred_luma_av8
|
||||
|
||||
.global _ih264_weighted_pred_chroma_av8
|
||||
_ih264_weighted_pred_chroma_av8 = ih264_weighted_pred_chroma_av8
|
||||
|
||||
.global _ih264_weighted_pred_luma_av8
|
||||
_ih264_weighted_pred_luma_av8 = ih264_weighted_pred_luma_av8
|
Loading…
Add table
Add a link
Reference in a new issue