From e2df71d87c8015a0f19467cb196ec50ed11860bc Mon Sep 17 00:00:00 2001 From: Vestral <16190165+Vestrel@users.noreply.github.com> Date: Sat, 15 Mar 2025 10:02:46 +0900 Subject: [PATCH] Enable ASLR --- CMakeLists.txt | 1 + Utilities/JITASM.cpp | 10 +---- buildfiles/cmake/ConfigureCompiler.cmake | 19 +------- rpcs3/Emu/Cell/PPUFunction.cpp | 5 ++- rpcs3/Emu/Cell/PPUThread.cpp | 38 +++++++++------- rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp | 14 ++++-- rpcs3/Emu/Cell/SPUThread.cpp | 56 +++++++++++++++++------- rpcs3/rpcs3.vcxproj | 10 ++--- 8 files changed, 84 insertions(+), 69 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 40f48a6d5d..ea1a194aec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.28) project(rpcs3 LANGUAGES C CXX) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 11) diff --git a/Utilities/JITASM.cpp b/Utilities/JITASM.cpp index 63628f448b..acb5f40b04 100644 --- a/Utilities/JITASM.cpp +++ b/Utilities/JITASM.cpp @@ -344,15 +344,7 @@ jit_runtime_base& asmjit::get_global_runtime() { custom_runtime() noexcept { - // Search starting in first 2 GiB of memory - for (u64 addr = size;; addr += size) - { - if (auto ptr = utils::memory_reserve(size, reinterpret_cast(addr))) - { - m_pos.raw() = static_cast(ptr); - break; - } - } + ensure(m_pos.raw() = static_cast(utils::memory_reserve(size))); // Initialize "end" pointer m_max = m_pos + size; diff --git a/buildfiles/cmake/ConfigureCompiler.cmake b/buildfiles/cmake/ConfigureCompiler.cmake index d58baab29b..c1a30d4f93 100644 --- a/buildfiles/cmake/ConfigureCompiler.cmake +++ b/buildfiles/cmake/ConfigureCompiler.cmake @@ -5,13 +5,12 @@ if(MSVC) add_compile_definitions( _CRT_SECURE_NO_DEPRECATE=1 _CRT_NON_CONFORMING_SWPRINTFS=1 _SCL_SECURE_NO_WARNINGS=1 NOMINMAX _ENABLE_EXTENDED_ALIGNED_STORAGE=1 _HAS_EXCEPTIONS=0) - add_link_options(/DYNAMICBASE:NO /BASE:0x10000 /FIXED) + add_link_options(/DYNAMICBASE:YES) #TODO: Some of these could be cleaned up add_compile_options(/wd4805) # Comparing boolean and int add_compile_options(/wd4804) # Using integer operators with booleans add_compile_options(/wd4200) # Zero-sized array in struct/union - add_link_options(/ignore:4281) # Undesirable base address 0x10000 # MSVC 2017 uses iterator as base class internally, causing a lot of warning spam add_compile_definitions(_SILENCE_CXX17_ITERATOR_BASE_CLASS_DEPRECATION_WARNING=1) @@ -19,8 +18,6 @@ if(MSVC) # Increase stack limit to 8 MB add_link_options(/STACK:8388608,1048576) else() - # Some distros have the compilers set to use PIE by default, but RPCS3 doesn't work with PIE, so we need to disable it. - check_cxx_compiler_flag("-no-pie" HAS_NO_PIE) check_cxx_compiler_flag("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE) check_cxx_compiler_flag("-msse -msse2 -mcx16" COMPILER_X86) if (APPLE) @@ -99,15 +96,6 @@ else() if(NOT APPLE AND NOT WIN32) # This hides our LLVM from mesa's LLVM, otherwise we get some unresolvable conflicts. add_link_options(-Wl,--exclude-libs,ALL) - - if(HAS_NO_PIE) - add_link_options(-no-pie) - endif() - elseif(APPLE) - if (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") - add_link_options(-Wl,-image_base,0x10000 -Wl,-pagezero_size,0x10000) - add_link_options(-Wl,-no_pie) - endif() elseif(WIN32) add_compile_definitions(__STDC_FORMAT_MACROS=1) @@ -116,11 +104,6 @@ else() # Increase stack limit to 8 MB add_link_options(-Wl,--stack -Wl,8388608) - - # For arm64 windows, the image base cannot be below 4GB or the OS rejects the binary without much explanation. - if(COMPILER_X86) - add_link_options(-Wl,--image-base,0x10000) - endif() endif() # Specify C++ library to use as standard C++ when using clang (not required on linux due to GNU) diff --git a/rpcs3/Emu/Cell/PPUFunction.cpp b/rpcs3/Emu/Cell/PPUFunction.cpp index 5ac681d6af..1cc15440c7 100644 --- a/rpcs3/Emu/Cell/PPUFunction.cpp +++ b/rpcs3/Emu/Cell/PPUFunction.cpp @@ -1902,8 +1902,9 @@ auto gen_ghc_cpp_trampoline(ppu_intrp_func_t fn_target) // Take second ghc arg c.mov(args[0], x86::rbp); c.mov(args[2].r32(), x86::dword_ptr(args[0], ::offset32(&ppu_thread::cia))); - c.add(args[2], x86::qword_ptr(reinterpret_cast(&vm::g_base_addr))); - c.jmp(fn_target); + c.movabs(args[1], reinterpret_cast(&vm::g_base_addr)); + c.add(args[2], x86::qword_ptr(args[1])); + c.jmp(Imm(fn_target)); }; } diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 04268f3ca1..9045bd3fe3 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -220,7 +220,8 @@ const auto ppu_gateway = build_function_asm("ppu_gateway", c.mov(x86::qword_ptr(args[0], ::offset32(&ppu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs)), x86::rsp); // Initialize args - c.mov(x86::r13, x86::qword_ptr(reinterpret_cast(&vm::g_exec_addr))); + c.movabs(x86::r13, reinterpret_cast(&vm::g_exec_addr)); + c.mov(x86::r13, x86::qword_ptr(x86::r13)); c.mov(x86::rbp, args[0]); c.mov(x86::edx, x86::dword_ptr(x86::rbp, ::offset32(&ppu_thread::cia))); // Load PC @@ -232,7 +233,8 @@ const auto ppu_gateway = build_function_asm("ppu_gateway", c.shl(x86::edx, 13); c.mov(x86::r12d, x86::edx); // Load relocation base - c.mov(x86::rbx, x86::qword_ptr(reinterpret_cast(&vm::g_base_addr))); + c.movabs(x86::rbx, reinterpret_cast(&vm::g_base_addr)); + c.mov(x86::rbx, x86::qword_ptr(x86::rbx)); c.mov(x86::r14, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 0))); // Load some registers c.mov(x86::rsi, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 1))); c.mov(x86::rdi, x86::qword_ptr(x86::rbp, ::offset32(&ppu_thread::gpr, 2))); @@ -3164,8 +3166,9 @@ const auto ppu_stcx_accurate_tx = build_function_asm(&vm::g_sudo_addr))); + c.movabs(x86::rbp, reinterpret_cast(&vm::g_sudo_addr)); + c.mov(x86::rbp, x86::qword_ptr(x86::rbp)); c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0])); c.and_(x86::rbp, -128); c.prefetchw(x86::byte_ptr(x86::rbp, 0)); c.prefetchw(x86::byte_ptr(x86::rbp, 64)); c.movzx(args[0].r32(), args[0].r16()); c.shr(args[0].r32(), 1); - c.lea(x86::r11, x86::qword_ptr(reinterpret_cast(+vm::g_reservations), args[0])); + c.movabs(x86::r11, reinterpret_cast(+vm::g_reservations)); + c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0])); c.and_(x86::r11, -128 / 2); c.and_(args[0].r32(), 63); @@ -3217,7 +3222,8 @@ const auto ppu_stcx_accurate_tx = build_function_asm(&g_rtm_tx_limit2))); + c.movabs(x86::r13, reinterpret_cast(&g_rtm_tx_limit2)); + c.cmp(x86::rax, x86::qword_ptr(x86::r13)); c.jae(fall); }); @@ -3342,8 +3348,9 @@ const auto ppu_stcx_accurate_tx = build_function_asm& dir_queue, std::vector 14GB // The growth in memory requirements of LLVM is not linear with file size of course // But these estimates should hopefully protect RPCS3 in the coming years - // Especially when thread count is on the rise with each CPU generation + // Especially when thread count is on the rise with each CPU generation atomic_t file_size_limit = static_cast(std::clamp(utils::aligned_div(utils::get_total_memory(), 2000), 65536, u32{umax})); const u32 software_thread_limit = std::min(g_cfg.core.llvm_threads ? g_cfg.core.llvm_threads : u32{umax}, ::size32(file_queue)); @@ -4301,8 +4308,8 @@ extern void ppu_precompile(std::vector& dir_queue, std::vector(&Emu.klic[0])); - - if (src) + + if (src) { ppu_log.error("Possible missed KLIC for precompilation of '%s', please report to developers.", path); @@ -4333,7 +4340,7 @@ extern void ppu_precompile(std::vector& dir_queue, std::vector(utils::sub_saturate(value, file_size)); restore_mem = value - new_val; value = new_val; @@ -4506,8 +4513,8 @@ extern void ppu_precompile(std::vector& dir_queue, std::vector(&Emu.klic[0])); - - if (src) + + if (src) { ppu_log.error("Possible missed KLIC for precompilation of '%s', please report to developers.", path); } @@ -5079,7 +5086,8 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_s code_size_until_jump = buf_end - buf_start; c.add(x86::edx, seg0); - c.mov(x86::rax, x86::qword_ptr(reinterpret_cast(&vm::g_exec_addr))); + c.movabs(x86::rax, reinterpret_cast(&vm::g_exec_addr)); + c.mov(x86::rax, x86::qword_ptr(x86::rax)); c.mov(x86::dword_ptr(x86::rbp, ::offset32(&ppu_thread::cia)), x86::edx); c.mov(x86::rax, x86::qword_ptr(x86::rax, x86::rdx, 1, 0)); // Load call target @@ -5340,7 +5348,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only, u64 file_s sha1_update(&ctx, reinterpret_cast(addrs.data()), addrs.size() * sizeof(be_t)); } - part.jit_bounds = std::move(local_jit_bounds); + part.jit_bounds = std::move(local_jit_bounds); local_jit_bounds = std::make_shared>(u32{umax}, 0); } diff --git a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp index 3eb75082a2..6b4b456f3d 100644 --- a/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp +++ b/rpcs3/Emu/Cell/SPUASMJITRecompiler.cpp @@ -2770,14 +2770,17 @@ void spu_recompiler::FREST(spu_opcode_t op) const u64 fraction_lut_addr = reinterpret_cast(spu_frest_fraction_lut); const u64 exponent_lut_addr = reinterpret_cast(spu_frest_exponent_lut); + c->movabs(*arg0, fraction_lut_addr); + c->movabs(*arg1, exponent_lut_addr); + for (u32 index = 0; index < 4; index++) { c->pextrd(*qw0, v_fraction, index); - c->mov(*qw1, asmjit::x86::dword_ptr(fraction_lut_addr, *qw0, 2)); + c->mov(*qw1, asmjit::x86::dword_ptr(*arg0, *qw0, 2)); c->pinsrd(v_fraction, *qw1, index); c->pextrd(*qw0, v_exponent, index); - c->mov(*qw1, asmjit::x86::dword_ptr(exponent_lut_addr, *qw0, 2)); + c->mov(*qw1, asmjit::x86::dword_ptr(*arg1, *qw0, 2)); c->pinsrd(v_exponent, *qw1, index); } @@ -2810,14 +2813,17 @@ void spu_recompiler::FRSQEST(spu_opcode_t op) const u64 fraction_lut_addr = reinterpret_cast(spu_frsqest_fraction_lut); const u64 exponent_lut_addr = reinterpret_cast(spu_frsqest_exponent_lut); + c->movabs(*arg0, fraction_lut_addr); + c->movabs(*arg1, exponent_lut_addr); + for (u32 index = 0; index < 4; index++) { c->pextrd(*qw0, v_fraction, index); - c->mov(*qw1, asmjit::x86::dword_ptr(fraction_lut_addr, *qw0, 2)); + c->mov(*qw1, asmjit::x86::dword_ptr(*arg0, *qw0, 2)); c->pinsrd(v_fraction, *qw1, index); c->pextrd(*qw0, v_exponent, index); - c->mov(*qw1, asmjit::x86::dword_ptr(exponent_lut_addr, *qw0, 2)); + c->mov(*qw1, asmjit::x86::dword_ptr(*arg1, *qw0, 2)); c->pinsrd(v_exponent, *qw1, index); } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 589961eb92..94b715a01b 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -628,6 +628,8 @@ const auto spu_putllc_tx = build_function_asm(&vm::g_sudo_addr))); + c.movabs(args[1], reinterpret_cast(&vm::g_sudo_addr)); + c.mov(args[1], x86::qword_ptr(args[1])); c.lea(args[1], x86::qword_ptr(args[1], args[0])); c.prefetchw(x86::byte_ptr(args[1], 0)); c.prefetchw(x86::byte_ptr(args[1], 64)); c.and_(args[0].r32(), 0xff80); c.shr(args[0].r32(), 1); - c.lea(x86::r11, x86::qword_ptr(reinterpret_cast(+vm::g_reservations), args[0])); + c.movabs(x86::r11, reinterpret_cast(+vm::g_reservations)); + c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0])); // Prepare data if (s_tsx_avx) @@ -703,7 +709,8 @@ const auto spu_putllc_tx = build_function_asm(&g_rtm_tx_limit2))); + c.movabs(x86::rbx, reinterpret_cast(&g_rtm_tx_limit2)); + c.cmp(x86::rax, x86::qword_ptr(x86::rbx)); c.jae(fall); }); @@ -853,8 +860,13 @@ const auto spu_putllc_tx = build_function_asm(&vm::g_sudo_addr))); + c.movabs(x86::r11, reinterpret_cast(&vm::g_sudo_addr)); + c.mov(x86::r11, x86::qword_ptr(x86::r11)); c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0])); c.prefetchw(x86::byte_ptr(x86::r11, 0)); c.prefetchw(x86::byte_ptr(x86::r11, 64)); @@ -921,7 +936,8 @@ const auto spu_putlluc_tx = build_function_asm(+vm::g_reservations), args[0])); + c.movabs(args[1], reinterpret_cast(+vm::g_reservations)); + c.lea(args[1], x86::qword_ptr(args[1], args[0])); // Alloc args[0] to stamp0 const auto stamp0 = args[0]; @@ -933,7 +949,8 @@ const auto spu_putlluc_tx = build_function_asm(&g_rtm_tx_limit2))); + c.movabs(x86::rbx, reinterpret_cast(&g_rtm_tx_limit2)); + c.cmp(x86::rax, x86::qword_ptr(x86::rbx)); c.jae(fall); }); @@ -986,6 +1003,10 @@ const auto spu_putlluc_tx = build_function_asm(&vm::g_sudo_addr))); + c.movabs(x86::rbp, reinterpret_cast(&vm::g_sudo_addr)); + c.mov(x86::rbp, x86::qword_ptr(x86::rbp)); c.lea(x86::rbp, x86::qword_ptr(x86::rbp, args[0])); c.and_(args[0].r32(), 0xff80); c.shr(args[0].r32(), 1); - c.lea(x86::r11, x86::qword_ptr(reinterpret_cast(+vm::g_reservations), args[0])); + c.movabs(x86::r11, reinterpret_cast(+vm::g_reservations)); + c.lea(x86::r11, x86::qword_ptr(x86::r11, args[0])); // Alloc args[0] to stamp0 const auto stamp0 = args[0]; @@ -1039,7 +1062,8 @@ const auto spu_getllar_tx = build_function_asm(&g_rtm_tx_limit1))); + c.movabs(x86::rbx, reinterpret_cast(&g_rtm_tx_limit1)); + c.cmp(x86::rax, x86::qword_ptr(x86::rbx)); c.jae(fall); }); @@ -4443,7 +4467,7 @@ bool spu_thread::is_exec_code(u32 addr, std::span ls_ptr, u32 base_add // Detect "invalid" relative branches // Branch offsets that, although are the only way to get X code address using relative address // Rely on overflow/underflow of SPU memory bounds - // Thus they would behave differently if SPU LS memory size was to increase (evolving the CELL architecture was the original plan) + // Thus they would behave differently if SPU LS memory size was to increase (evolving the CELL architecture was the original plan) // Making them highly unlikely to be valid code if (rel < 0) @@ -4664,7 +4688,7 @@ bool spu_thread::process_mfc_cmd() // Add to chance if previous wait was long enough const u32 add_count = zero_count == 3 && total_wait >= 40 ? (total_wait - 39) * 40 - : zero_count == 2 && total_wait >= 11 ? (total_wait - 10) * 40 + : zero_count == 2 && total_wait >= 11 ? (total_wait - 10) * 40 : zero_count == 1 && total_wait >= 8 ? (total_wait - 7) * 40 : zero_count == 0 && total_wait >= 6 ? (total_wait - 5) * 40 : 0; @@ -5002,7 +5026,7 @@ bool spu_thread::process_mfc_cmd() if (group->spurs_running == max_run - 1) { - // Try to let another thread slip in and take over execution + // Try to let another thread slip in and take over execution thread_ctrl::wait_for(300); // Update value @@ -5027,7 +5051,7 @@ bool spu_thread::process_mfc_cmd() if (spurs_last_task_timestamp) { const u64 avg_entry = spurs_average_task_duration / spurs_task_count_to_calculate; - spurs_average_task_duration -= avg_entry; + spurs_average_task_duration -= avg_entry; spurs_average_task_duration += std::min(45'000, current - spurs_last_task_timestamp); spu_log.trace("duration: %d, avg=%d", current - spurs_last_task_timestamp, spurs_average_task_duration / spurs_task_count_to_calculate); spurs_last_task_timestamp = 0; @@ -5048,7 +5072,7 @@ bool spu_thread::process_mfc_cmd() } max_run = group->max_run; - + prev_running = group->spurs_running.fetch_op([max_run](u32& x) { if (x < max_run) @@ -5113,7 +5137,7 @@ bool spu_thread::process_mfc_cmd() if (spurs_last_task_timestamp) { const u64 avg_entry = spurs_average_task_duration / spurs_task_count_to_calculate; - spurs_average_task_duration -= avg_entry; + spurs_average_task_duration -= avg_entry; spurs_average_task_duration += std::min(45'000, current - spurs_last_task_timestamp); spu_log.trace("duration: %d, avg=%d", current - spurs_last_task_timestamp, spurs_average_task_duration / spurs_task_count_to_calculate); spurs_last_task_timestamp = 0; diff --git a/rpcs3/rpcs3.vcxproj b/rpcs3/rpcs3.vcxproj index 025c3d0a15..6540926d9c 100644 --- a/rpcs3/rpcs3.vcxproj +++ b/rpcs3/rpcs3.vcxproj @@ -96,10 +96,9 @@ true false $(OutDir)\rpcs3.exe - false + true Windows true - 0x10000 mainCRTStartup @@ -156,10 +155,11 @@ Debug true $(OutDir)\rpcs3d.exe - false + true Windows true - 0x10000 + + mainCRTStartup @@ -2173,4 +2173,4 @@ - \ No newline at end of file +