diff --git a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp index c7489a0b68..040c9ab832 100644 --- a/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp +++ b/rpcs3/Emu/Cell/Modules/cellGcmSys.cpp @@ -12,6 +12,7 @@ #include "sysPrxForUser.h" #include +#include LOG_CHANNEL(cellGcmSys); @@ -40,10 +41,11 @@ struct CellGcmSysConfig { }; u64 system_mode = 0; -u32 reserved_size = 0; u32 local_size = 0; u32 local_addr = 0; +atomic_t reserved_size = 0; + // Auxiliary functions /* @@ -72,7 +74,7 @@ u32 gcmGetLocalMemorySize(u32 sdk_version) } CellGcmOffsetTable offsetTable; -u16 IoMapTable[0xC00]; +atomic_t IoMapTable[0xC00]{}; void InitOffsetTable() { @@ -1013,8 +1015,6 @@ s32 gcmMapEaIoAddress(u32 ea, u32 io, u32 size, bool is_strict) ea >>= 20, io >>= 20, size >>= 20; - IoMapTable[ea] = size; - // Fill the offset table for (u32 i = 0; i < size; i++) { @@ -1022,6 +1022,7 @@ s32 gcmMapEaIoAddress(u32 ea, u32 io, u32 size, bool is_strict) offsetTable.eaAddress[io + i] = ea + i; } + IoMapTable[ea] = size; return CELL_OK; } @@ -1079,8 +1080,6 @@ s32 cellGcmMapMainMemory(u32 ea, u32 size, vm::ptr offset) ea >>= 20, size >>= 20; - IoMapTable[ea] = size; - // Fill the offset table for (u32 i = 0; i < size; i++) { @@ -1088,6 +1087,8 @@ s32 cellGcmMapMainMemory(u32 ea, u32 size, vm::ptr offset) offsetTable.eaAddress[io + i] = ea + i; } + IoMapTable[ea] = size; + *offset = io << 20; return CELL_OK; } @@ -1127,15 +1128,17 @@ s32 cellGcmUnmapEaIoAddress(u32 ea) { cellGcmSys.trace("cellGcmUnmapEaIoAddress(ea=0x%x)", ea); - if (const u32 size = std::exchange(IoMapTable[ea >>= 20], 0)) + if (const u32 size = IoMapTable[ea >>= 20].exchange(0)) { const u32 io = offsetTable.ioAddress[ea]; for (u32 i = 0; i < size; i++) { - RSXIOMem.io[ea + i].release(offsetTable.ioAddress[ea + i] = 0xFFFF); - RSXIOMem.ea[io + i].release(offsetTable.eaAddress[io + i] = 0xFFFF); + RSXIOMem.io[ea + i].raw() = offsetTable.ioAddress[ea + i] = 0xFFFF; + RSXIOMem.ea[io + i].raw() = offsetTable.eaAddress[io + i] = 0xFFFF; } + + std::atomic_thread_fence(std::memory_order_seq_cst); } else { @@ -1150,15 +1153,17 @@ s32 cellGcmUnmapIoAddress(u32 io) { cellGcmSys.trace("cellGcmUnmapIoAddress(io=0x%x)", io); - if (u32 size = std::exchange(IoMapTable[RSXIOMem.ea[io >>= 20]], 0)) + if (u32 size = IoMapTable[RSXIOMem.ea[io >>= 20]].exchange(0)) { const u32 ea = offsetTable.eaAddress[io]; for (u32 i = 0; i < size; i++) { - RSXIOMem.io[ea + i].release(offsetTable.ioAddress[ea + i] = 0xFFFF); - RSXIOMem.ea[io + i].release(offsetTable.eaAddress[io + i] = 0xFFFF); + RSXIOMem.io[ea + i].raw() = offsetTable.ioAddress[ea + i] = 0xFFFF; + RSXIOMem.ea[io + i].raw() = offsetTable.eaAddress[io + i] = 0xFFFF; } + + std::atomic_thread_fence(std::memory_order_seq_cst); } else { diff --git a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp index 9b8292db27..369e49f784 100644 --- a/rpcs3/Emu/Cell/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSpurs.cpp @@ -15,6 +15,8 @@ #include "sysPrxForUser.h" #include "cellSpurs.h" +#include + LOG_CHANNEL(cellSpurs); error_code sys_spu_image_close(vm::ptr img); @@ -2575,7 +2577,7 @@ s32 _cellSpursWorkloadFlagReceiver(vm::ptr spurs, u32 wid, u32 is_set return CELL_SPURS_POLICY_MODULE_ERROR_STAT; } - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_seq_cst); if (s32 res = spurs->wklFlag.flag.atomic_op([spurs, wid, is_set](be_t& flag) -> s32 { diff --git a/rpcs3/Emu/Cell/Modules/cellSync.cpp b/rpcs3/Emu/Cell/Modules/cellSync.cpp index 82aa1eb4f3..4dd5f38b4e 100644 --- a/rpcs3/Emu/Cell/Modules/cellSync.cpp +++ b/rpcs3/Emu/Cell/Modules/cellSync.cpp @@ -6,6 +6,8 @@ #include "Emu/Cell/lv2/sys_process.h" #include "cellSync.h" +#include + LOG_CHANNEL(cellSync); template<> @@ -85,8 +87,7 @@ error_code cellSyncMutexLock(ppu_thread& ppu, vm::ptr mutex) } } - _mm_mfence(); - + std::atomic_thread_fence(std::memory_order_release); return CELL_OK; } @@ -195,7 +196,7 @@ error_code cellSyncBarrierTryNotify(vm::ptr barrier) return CELL_SYNC_ERROR_ALIGN; } - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_release); if (!barrier->ctrl.atomic_op<&CellSyncBarrier::try_notify>()) { @@ -219,7 +220,7 @@ error_code cellSyncBarrierWait(ppu_thread& ppu, vm::ptr barrier return CELL_SYNC_ERROR_ALIGN; } - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_release); while (!barrier->ctrl.atomic_op<&CellSyncBarrier::try_wait>()) { @@ -246,7 +247,7 @@ error_code cellSyncBarrierTryWait(vm::ptr barrier) return CELL_SYNC_ERROR_ALIGN; } - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_release); if (!barrier->ctrl.atomic_op<&CellSyncBarrier::try_wait>()) { @@ -280,7 +281,7 @@ error_code cellSyncRwmInitialize(vm::ptr rwm, vm::ptr buffer, rwm->size = buffer_size; rwm->buffer = buffer; - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_release); return CELL_OK; } @@ -452,7 +453,7 @@ error_code cellSyncQueueInitialize(vm::ptr queue, vm::ptr buf queue->depth = depth; queue->buffer = buffer; - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_release); return CELL_OK; } @@ -865,7 +866,7 @@ error_code cellSyncLFQueueInitialize(vm::ptr queue, vm::cptrpush1.load(); _mm_lfence(); + const auto old = queue->push1.load(); auto push = old; if (var1) @@ -989,9 +990,10 @@ error_code _cellSyncLFQueueCompletePushPointer(ppu_thread& ppu, vm::ptrpush2.load(); _mm_lfence(); + const auto old = queue->push2.load(); auto push2 = old; + // Loads must be in this order const auto old2 = queue->push3.load(); auto push3 = old2; @@ -1192,7 +1194,7 @@ error_code _cellSyncLFQueueGetPopPointer(ppu_thread& ppu, vm::ptrpop1.load(); _mm_lfence(); + const auto old = queue->pop1.load(); auto pop = old; if (var1) @@ -1288,9 +1290,10 @@ error_code _cellSyncLFQueueCompletePopPointer(ppu_thread& ppu, vm::ptrpop2.load(); _mm_lfence(); + const auto old = queue->pop2.load(); auto pop2 = old; + // Loads must be in this order const auto old2 = queue->pop3.load(); auto pop3 = old2; @@ -1489,9 +1492,10 @@ error_code cellSyncLFQueueClear(vm::ptr queue) while (true) { - const auto old = queue->pop1.load(); _mm_lfence(); + const auto old = queue->pop1.load(); auto pop = old; + // Loads must be in this order const auto push = queue->push1.load(); s32 var1, var2; @@ -1540,8 +1544,9 @@ error_code cellSyncLFQueueSize(vm::ptr queue, vm::ptr size while (true) { - const auto old = queue->pop3.load(); _mm_lfence(); + const auto old = queue->pop3.load(); + // Loads must be in this order u32 var1 = (u16)queue->pop1.load().m_h1; u32 var2 = (u16)queue->push1.load().m_h5; diff --git a/rpcs3/Emu/Cell/Modules/sys_lwmutex_.cpp b/rpcs3/Emu/Cell/Modules/sys_lwmutex_.cpp index e8feda368c..f3f1811897 100644 --- a/rpcs3/Emu/Cell/Modules/sys_lwmutex_.cpp +++ b/rpcs3/Emu/Cell/Modules/sys_lwmutex_.cpp @@ -8,6 +8,8 @@ #include "Emu/Cell/lv2/sys_mutex.h" #include "sysPrxForUser.h" +#include + extern logs::channel sysPrxForUser; error_code sys_lwmutex_create(ppu_thread& ppu, vm::ptr lwmutex, vm::ptr attr) @@ -128,7 +130,7 @@ error_code sys_lwmutex_lock(ppu_thread& ppu, vm::ptr lwmutex, u64 // recursive locking succeeded lwmutex->recursive_count++; - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_release); return CELL_OK; } @@ -288,7 +290,7 @@ error_code sys_lwmutex_trylock(ppu_thread& ppu, vm::ptr lwmutex) // recursive locking succeeded lwmutex->recursive_count++; - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_release); return CELL_OK; } diff --git a/rpcs3/Emu/Cell/PPUInterpreter.cpp b/rpcs3/Emu/Cell/PPUInterpreter.cpp index 007a5d93be..98428b20ea 100644 --- a/rpcs3/Emu/Cell/PPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/PPUInterpreter.cpp @@ -8,6 +8,7 @@ #include "Emu/Cell/Common.h" #include +#include #if !defined(_MSC_VER) && !defined(__SSSE3__) #define _mm_shuffle_epi8(opa, opb) opb @@ -2966,7 +2967,7 @@ bool ppu_interpreter::CRANDC(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::ISYNC(ppu_thread& ppu, ppu_opcode_t op) { - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_acquire); return true; } @@ -4046,7 +4047,7 @@ bool ppu_interpreter::LFSUX(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::SYNC(ppu_thread& ppu, ppu_opcode_t op) { - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_seq_cst); return true; } @@ -4280,7 +4281,7 @@ bool ppu_interpreter::SRADI(ppu_thread& ppu, ppu_opcode_t op) bool ppu_interpreter::EIEIO(ppu_thread& ppu, ppu_opcode_t op) { - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_seq_cst); return true; } diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 8d8f1d5ba5..9fefa7bd17 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -1848,7 +1848,7 @@ void PPUTranslator::CRANDC(ppu_opcode_t op) void PPUTranslator::ISYNC(ppu_opcode_t op) { - m_ir->CreateFence(AtomicOrdering::SequentiallyConsistent); + m_ir->CreateFence(AtomicOrdering::Acquire); } void PPUTranslator::CRXOR(ppu_opcode_t op) @@ -3105,7 +3105,9 @@ void PPUTranslator::LFSUX(ppu_opcode_t op) void PPUTranslator::SYNC(ppu_opcode_t op) { - m_ir->CreateFence(AtomicOrdering::SequentiallyConsistent); + // sync: Full seq cst barrier + // lwsync: Release barrier + m_ir->CreateFence(op.l10 ? AtomicOrdering::Release : AtomicOrdering::SequentiallyConsistent); } void PPUTranslator::LFDX(ppu_opcode_t op) diff --git a/rpcs3/Emu/Cell/RawSPUThread.cpp b/rpcs3/Emu/Cell/RawSPUThread.cpp index 20a3c7df82..7c9e03dee5 100644 --- a/rpcs3/Emu/Cell/RawSPUThread.cpp +++ b/rpcs3/Emu/Cell/RawSPUThread.cpp @@ -6,6 +6,8 @@ #include "Emu/Cell/RawSPUThread.h" +#include + // Originally, SPU MFC registers are accessed externally in a concurrent manner (don't mix with channels, SPU MFC channels are isolated) thread_local spu_mfc_cmd g_tls_mfc[8] = {}; @@ -173,7 +175,7 @@ bool spu_thread::write_reg(const u32 addr, const u32 value) case MFC_SYNC_CMD: { g_tls_mfc[index] = {}; - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_seq_cst); return true; } } diff --git a/rpcs3/Emu/Cell/SPUInterpreter.cpp b/rpcs3/Emu/Cell/SPUInterpreter.cpp index a4b7080610..65f7a34138 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.cpp +++ b/rpcs3/Emu/Cell/SPUInterpreter.cpp @@ -140,14 +140,14 @@ bool spu_interpreter::LNOP(spu_thread& spu, spu_opcode_t op) // This instruction must be used following a store instruction that modifies the instruction stream. bool spu_interpreter::SYNC(spu_thread& spu, spu_opcode_t op) { - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_seq_cst); return true; } // This instruction forces all earlier load, store, and channel instructions to complete before proceeding. bool spu_interpreter::DSYNC(spu_thread& spu, spu_opcode_t op) { - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_seq_cst); return true; } diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 5b1c794568..110e976434 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1662,7 +1662,7 @@ void spu_thread::do_mfc(bool wait) if (&args - mfc_queue <= removed) { // Remove barrier-class command if it's the first in the queue - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_seq_cst); removed++; return true; } @@ -2086,7 +2086,7 @@ bool spu_thread::process_mfc_cmd() { if (mfc_size == 0) { - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_seq_cst); } else { @@ -3025,12 +3025,13 @@ bool spu_thread::stop_and_signal(u32 code) case 0x100: { + // SPU thread group yield (TODO) if (ch_out_mbox.get_count()) { fmt::throw_exception("STOP code 0x100: Out_MBox is not empty" HERE); } - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_seq_cst); return true; } diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index 5dbc4ba032..6ea72f6836 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -284,7 +284,7 @@ public: // push unconditionally (overwriting latest value), returns true if needs signaling void push(cpu_thread& spu, u32 value) { - value3 = value; _mm_sfence(); + value3.store(value); if (values.atomic_op([=](sync_var_t& data) -> bool { @@ -325,7 +325,6 @@ public: data.value0 = data.value1; data.value1 = data.value2; - _mm_lfence(); data.value2 = this->value3; } else diff --git a/rpcs3/Emu/Cell/lv2/sys_rsx.cpp b/rpcs3/Emu/Cell/lv2/sys_rsx.cpp index 01110ad1ef..b218bf6164 100644 --- a/rpcs3/Emu/Cell/lv2/sys_rsx.cpp +++ b/rpcs3/Emu/Cell/lv2/sys_rsx.cpp @@ -1,6 +1,7 @@ #include "stdafx.h" #include "sys_rsx.h" +#include #include "Emu/System.h" #include "Emu/Cell/PPUModule.h" #include "Emu/RSX/GSRender.h" @@ -175,7 +176,7 @@ error_code sys_rsx_context_iomap(u32 context_id, u32 io, u32 ea, u32 size, u64 f { sys_rsx.warning("sys_rsx_context_iomap(context_id=0x%x, io=0x%x, ea=0x%x, size=0x%x, flags=0x%llx)", context_id, io, ea, size, flags); - if (!size || io & 0xFFFFF || ea + u64{size} >= rsx::constants::local_mem_base || ea & 0xFFFFF || size & 0xFFFFF || + if (!size || io & 0xFFFFF || ea + u64{size} > rsx::constants::local_mem_base || ea & 0xFFFFF || size & 0xFFFFF || rsx::get_current_renderer()->main_mem_size < io + u64{size}) { return CELL_EINVAL; @@ -195,8 +196,8 @@ error_code sys_rsx_context_iomap(u32 context_id, u32 io, u32 ea, u32 size, u64 f for (u32 i = 0; i < size; i++) { - RSXIOMem.io[ea + i].release(io + i); - RSXIOMem.ea[io + i].release(ea + i); + RSXIOMem.io[ea + i].raw() = io + i; + RSXIOMem.ea[io + i].raw() = ea + i; } return CELL_OK; @@ -220,10 +221,11 @@ error_code sys_rsx_context_iounmap(u32 context_id, u32 io, u32 size) const u32 end = (io >>= 20) + (size >>= 20); for (u32 ea = RSXIOMem.ea[io]; io < end;) { - RSXIOMem.io[ea++].release(0xFFFF); - RSXIOMem.ea[io++].release(0xFFFF); + RSXIOMem.io[ea++].raw() = 0xFFFF; + RSXIOMem.ea[io++].raw() = 0xFFFF; } + std::atomic_thread_fence(std::memory_order_seq_cst); return CELL_OK; } diff --git a/rpcs3/Emu/Memory/vm.cpp b/rpcs3/Emu/Memory/vm.cpp index cafabaa7f9..029a9f20f9 100644 --- a/rpcs3/Emu/Memory/vm.cpp +++ b/rpcs3/Emu/Memory/vm.cpp @@ -750,7 +750,7 @@ namespace vm const u32 size = ::align(orig_size, min_page_size); // return if addr or size is invalid - if (!size || addr < this->addr || addr + u64{size} > this->addr + this->size || flags & 0x10) + if (!size || addr < this->addr || addr + u64{size} > this->addr + u64{this->size} || flags & 0x10) { return 0; } @@ -823,7 +823,7 @@ namespace vm std::pair> block_t::get(u32 addr, u32 size) { - if (addr < this->addr || addr + u64{size} > this->addr + this->size) + if (addr < this->addr || addr + u64{size} > this->addr + u64{this->size}) { return {addr, nullptr}; } @@ -852,7 +852,7 @@ namespace vm } // Range check - if (std::max(size, addr - found->first + size) > found->second.second->size()) + if (addr + u64{size} > found->first + u64{found->second.second->size()}) { return {addr, nullptr}; } diff --git a/rpcs3/Emu/RSX/Capture/rsx_replay.cpp b/rpcs3/Emu/RSX/Capture/rsx_replay.cpp index c7ea301d22..15585b6be8 100644 --- a/rpcs3/Emu/RSX/Capture/rsx_replay.cpp +++ b/rpcs3/Emu/RSX/Capture/rsx_replay.cpp @@ -8,6 +8,7 @@ #include "Emu/RSX/GSRender.h" #include +#include #include namespace rsx @@ -179,7 +180,7 @@ namespace rsx { // Load registers while the RSX is still idle method_registers = frame->reg_state; - _mm_mfence(); + std::atomic_thread_fence(std::memory_order_seq_cst); // start up fifo buffer by dumping the put ptr to first stop sys_rsx_context_attribute(context_id, 0x001, 0x10000000, fifo_stops[0], 0, 0); diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index 7c715160f0..1556e0254f 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -2279,8 +2279,8 @@ namespace rsx image_resource_type vram_texture = 0; image_resource_type dest_texture = 0; - const u32 dst_address = (u32)((u64)dst.pixels - (u64)vm::base(0)); - u32 src_address = (u32)((u64)src.pixels - (u64)vm::base(0)); + const u32 dst_address = vm::get_addr(dst.pixels); + u32 src_address = vm::get_addr(src.pixels); const f32 scale_x = fabsf(dst.scale_x); const f32 scale_y = fabsf(dst.scale_y); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 4bb90cd6fc..6997e5a833 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -46,7 +46,6 @@ struct work_item { while (!processed) { - _mm_lfence(); std::this_thread::yield(); } diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index 7cfa9109ff..13ad45a224 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -491,7 +491,7 @@ void GLGSRender::read_buffers() continue; rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); - u32 texaddr = (u32)((u64)color_buffer.ptr - (u64)vm::base(0)); + u32 texaddr = vm::get_addr(color_buffer.ptr); const utils::address_range range = utils::address_range::start_length(texaddr, pitch * height); bool success = m_gl_texture_cache.load_memory_from_cache(range, std::get<1>(m_rtts.m_bound_render_targets[i])); diff --git a/rpcs3/Emu/RSX/RSXOffload.cpp b/rpcs3/Emu/RSX/RSXOffload.cpp index 66347510b3..952555fff1 100644 --- a/rpcs3/Emu/RSX/RSXOffload.cpp +++ b/rpcs3/Emu/RSX/RSXOffload.cpp @@ -123,7 +123,7 @@ namespace rsx } while (m_enqueued_count.load() != m_processed_count) - _mm_lfence(); + _mm_pause(); } void dma_manager::join() diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index a221bbb20e..80c962b6cc 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1,4 +1,4 @@ -#include "stdafx.h" +#include "stdafx.h" #include "Emu/Memory/vm.h" #include "Emu/System.h" #include "Emu/IdManager.h" diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index c0955c022e..606a264499 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -275,7 +275,6 @@ struct flush_request_task { while (num_waiters.load() != 0) { - _mm_lfence(); _mm_pause(); } } @@ -284,7 +283,6 @@ struct flush_request_task { while (pending_state.load()) { - _mm_lfence(); std::this_thread::yield(); } } diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index 1296a49922..f260fbb9fd 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -1,4 +1,4 @@ -#include "stdafx.h" +#include "stdafx.h" #include "rsx_methods.h" #include "RSXThread.h" #include "Emu/Memory/vm_reservation.h" @@ -10,6 +10,7 @@ #include "Capture/rsx_capture.h" #include +#include template <> void fmt_class_string::format(std::string& out, u64 arg) @@ -66,13 +67,13 @@ namespace rsx // Get raw BE value arg = be_t{arg}.raw(); - const auto& sema = vm::_ref>(addr); + const auto& sema = vm::_ref>>(addr); // TODO: Remove vblank semaphore hack - if (sema == arg || addr == rsx->ctxt_addr + 0x30) return; + if (sema.load() == arg || addr == rsx->ctxt_addr + 0x30) return; u64 start = get_system_time(); - while (sema != arg) + while (sema.load() != arg) { if (Emu.IsStopped()) return; @@ -107,7 +108,7 @@ namespace rsx rsx->performance_counters.idle_time += (get_system_time() - start); } - void semaphore_release(thread* rsx, u32 _reg, u32 arg) + void semaphore_release(thread* rsx, u32 /*_reg*/, u32 arg) { rsx->sync(); rsx->sync_point_request = true; @@ -115,7 +116,7 @@ namespace rsx if (LIKELY(g_use_rtm)) { - vm::write32(addr, arg); + vm::_ref>(addr) = arg; } else {