diff --git a/rpcs3/Emu/RSX/RSXOffload.cpp b/rpcs3/Emu/RSX/RSXOffload.cpp new file mode 100644 index 0000000000..244701e8e5 --- /dev/null +++ b/rpcs3/Emu/RSX/RSXOffload.cpp @@ -0,0 +1,129 @@ +#include "stdafx.h" + +#include "Common/BufferUtils.h" +#include "Emu/System.h" +#include "RSXOffload.h" + +namespace rsx +{ + // initialization + void dma_manager::init() + { + m_worker_state = thread_state::created; + m_worker_thread = std::thread([this]() + { + if (!g_cfg.video.multithreaded_rsx) + { + // Abort + return; + } + + if (g_cfg.core.thread_scheduler_enabled) + { + thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::rsx)); + } + + bool idle = false; + while (m_worker_state != thread_state::finished) + { + if (m_jobs_count) + { + if (idle) + { + thread_ctrl::set_native_priority(0); + idle = false; + } + + for (auto slice = m_work_queue.pop_all(); slice; slice.pop_front()) + { + auto task = *slice; + switch (task.type) + { + case raw_copy: + memcpy(task.dst, task.src, task.length); + break; + case vector_copy: + memcpy(task.dst, task.opt_storage.data(), task.length); + break; + case index_emulate: + write_index_array_for_non_indexed_non_native_primitive_to_buffer( + reinterpret_cast(task.dst), + static_cast(task.aux_param0), + task.length); + break; + default: + ASSUME(0); + fmt::throw_exception("Unreachable" HERE); + } + + m_jobs_count--; + } + } + else + { + idle = true; + thread_ctrl::set_native_priority(-1); + std::this_thread::yield(); + } + } + }); + } + + // General transport + void dma_manager::copy(void *dst, std::vector& src, u32 length) + { + if (length <= max_immediate_transfer_size || !g_cfg.video.multithreaded_rsx) + { + std::memcpy(dst, src.data(), length); + } + else + { + ++m_jobs_count; + m_work_queue.push(dst, src, length); + } + } + + void dma_manager::copy(void *dst, void *src, u32 length) + { + if (length <= max_immediate_transfer_size || !g_cfg.video.multithreaded_rsx) + { + std::memcpy(dst, src, length); + } + else + { + ++m_jobs_count; + m_work_queue.push(dst, src, length); + } + } + + // Vertex utilities + void dma_manager::emulate_as_indexed(void *dst, rsx::primitive_type primitive, u32 count) + { + if (!g_cfg.video.multithreaded_rsx) + { + write_index_array_for_non_indexed_non_native_primitive_to_buffer( + reinterpret_cast(dst), primitive, count); + } + else + { + ++m_jobs_count; + m_work_queue.push(dst, primitive, count); + } + } + + // Synchronization + void dma_manager::sync() + { + if (g_cfg.video.multithreaded_rsx) + { + while (m_jobs_count) + _mm_lfence(); + } + } + + void dma_manager::join() + { + m_worker_state = thread_state::finished; + m_worker_thread.join(); + } +} diff --git a/rpcs3/Emu/RSX/RSXOffload.h b/rpcs3/Emu/RSX/RSXOffload.h new file mode 100644 index 0000000000..3c8315a564 --- /dev/null +++ b/rpcs3/Emu/RSX/RSXOffload.h @@ -0,0 +1,72 @@ +#pragma once + +#include "Utilities/types.h" +#include "Utilities/lockless.h" +#include "Utilities/Thread.h" +#include "gcm_enums.h" + +#include +#include + +namespace rsx +{ + class dma_manager + { + enum op + { + raw_copy = 0, + vector_copy = 1, + index_emulate = 2 + }; + + struct transport_packet + { + op type; + std::vector opt_storage; + void *src; + void *dst; + u32 length; + u32 aux_param0; + u32 aux_param1; + + transport_packet(void *_dst, void *_src, u32 len) + : src(_src), dst(_dst), length(len), type(op::raw_copy) + {} + + transport_packet(void *_dst, std::vector& _src, u32 len) + : dst(_dst), opt_storage(std::move(_src)), length(len), type(op::vector_copy) + {} + + transport_packet(void *_dst, rsx::primitive_type prim, u32 len) + : dst(_dst), aux_param0(static_cast(prim)), length(len), type(op::index_emulate) + {} + }; + + lf_queue m_work_queue; + atomic_t m_jobs_count; + std::thread m_worker_thread; + thread_state m_worker_state; + + // TODO: Improved benchmarks here; value determined by profiling on a Ryzen CPU, rounded to the nearest 512 bytes + const u32 max_immediate_transfer_size = 3584; + + public: + dma_manager() = default; + + // initialization + void init(); + + // General tranport + void copy(void *dst, std::vector& src, u32 length); + void copy(void *dst, void *src, u32 length); + + // Vertex utilities + void emulate_as_indexed(void *dst, rsx::primitive_type primitive, u32 count); + + // Synchronization + void sync(); + void join(); + }; + + extern dma_manager g_dma_manager; +} diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index caaed33974..a6c6befa07 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -251,126 +251,6 @@ namespace rsx } } - // initialization - void dma_manager::init() - { - m_worker_state = thread_state::created; - m_worker_thread = std::thread([this]() - { - if (!g_cfg.video.multithreaded_rsx) - { - // Abort - return; - } - - if (g_cfg.core.thread_scheduler_enabled) - { - thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::rsx)); - } - - bool idle = false; - while (m_worker_state != thread_state::finished) - { - if (!m_work_queue.empty()) - { - m_queue_mutex.lock(); - auto task = std::move(m_work_queue.front()); - m_work_queue.pop_front(); - m_queue_mutex.unlock(); - - if (idle) - { - thread_ctrl::set_native_priority(0); - idle = false; - } - - switch (task.type) - { - case raw_copy: - memcpy(task.dst, task.src, task.length); - break; - case vector_copy: - memcpy(task.dst, task.opt_storage.data(), task.length); - break; - case index_emulate: - write_index_array_for_non_indexed_non_native_primitive_to_buffer( - reinterpret_cast(task.dst), - static_cast(task.aux_param0), - task.length); - break; - default: - ASSUME(0); - fmt::throw_exception("Unreachable" HERE); - } - } - else - { - idle = true; - thread_ctrl::set_native_priority(-1); - std::this_thread::yield(); - } - } - }); - } - - // General tranport - void dma_manager::copy(void *dst, std::vector& src, u32 length) - { - if (!g_cfg.video.multithreaded_rsx) - { - std::memcpy(dst, src.data(), length); - } - else - { - std::lock_guard lock(m_queue_mutex); - m_work_queue.emplace_back(dst, src, length); - } - } - - void dma_manager::copy(void *dst, void *src, u32 length) - { - if (!g_cfg.video.multithreaded_rsx) - { - std::memcpy(dst, src, length); - } - else - { - std::lock_guard lock(m_queue_mutex); - m_work_queue.emplace_back(dst, src, length); - } - } - - // Vertex utilities - void dma_manager::emulate_as_indexed(void *dst, rsx::primitive_type primitive, u32 count) - { - if (!g_cfg.video.multithreaded_rsx) - { - write_index_array_for_non_indexed_non_native_primitive_to_buffer( - reinterpret_cast(dst), primitive, count); - } - else - { - std::lock_guard lock(m_queue_mutex); - m_work_queue.emplace_back(dst, primitive, count); - } - } - - // Synchronization - void dma_manager::sync() - { - if (g_cfg.video.multithreaded_rsx) - { - while (!m_work_queue.empty()) - _mm_lfence(); - } - } - - void dma_manager::join() - { - m_worker_state = thread_state::finished; - m_worker_thread.join(); - } - thread::thread() { g_current_renderer = this; diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 260f66ea56..045520cf9e 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -6,6 +6,7 @@ #include "rsx_cache.h" #include "RSXFIFO.h" #include "RSXTexture.h" +#include "RSXOffload.h" #include "RSXVertexProgram.h" #include "RSXFragmentProgram.h" #include "rsx_methods.h" @@ -297,62 +298,6 @@ namespace rsx } }; - class dma_manager - { - enum op - { - raw_copy = 0, - vector_copy = 1, - index_emulate = 2 - }; - - struct transport_packet - { - op type; - std::vector opt_storage; - void *src; - void *dst; - u32 length; - u32 aux_param0; - u32 aux_param1; - - transport_packet(void *_dst, void *_src, u32 len) - : src(_src), dst(_dst), length(len), type(op::raw_copy) - {} - - transport_packet(void *_dst, std::vector& _src, u32 len) - : dst(_dst), opt_storage(std::move(_src)), length(len), type(op::vector_copy) - {} - - transport_packet(void *_dst, rsx::primitive_type prim, u32 len) - : dst(_dst), aux_param0(static_cast(prim)), length(len), type(op::index_emulate) - {} - }; - - std::deque m_work_queue; - std::thread m_worker_thread; - std::mutex m_queue_mutex; - thread_state m_worker_state; - - public: - dma_manager() = default; - - // initialization - void init(); - - // General tranport - void copy(void *dst, std::vector& src, u32 length); - void copy(void *dst, void *src, u32 length); - - // Vertex utilities - void emulate_as_indexed(void *dst, rsx::primitive_type primitive, u32 count); - - // Synchronization - void sync(); - void join(); - }; - extern dma_manager g_dma_manager; - struct framebuffer_layout { u16 width; diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index eba0b0ce61..6e2db7cfc0 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -315,6 +315,7 @@ + @@ -558,6 +559,7 @@ + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index e1541288ee..a78eb8d563 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -806,6 +806,9 @@ Emu\GPU\RSX\Overlays + + Emu\GPU\RSX + @@ -1522,5 +1525,8 @@ Emu\GPU\RSX\Common + + Emu\GPU\RSX + \ No newline at end of file