mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-14 18:58:36 +12:00
rsx: Rework RSX offloading
- Use a lockless queue - Do not enqueue small transfers
This commit is contained in:
parent
c32c1b0a62
commit
b893a75002
6 changed files with 210 additions and 176 deletions
129
rpcs3/Emu/RSX/RSXOffload.cpp
Normal file
129
rpcs3/Emu/RSX/RSXOffload.cpp
Normal file
|
@ -0,0 +1,129 @@
|
||||||
|
#include "stdafx.h"
|
||||||
|
|
||||||
|
#include "Common/BufferUtils.h"
|
||||||
|
#include "Emu/System.h"
|
||||||
|
#include "RSXOffload.h"
|
||||||
|
|
||||||
|
namespace rsx
|
||||||
|
{
|
||||||
|
// initialization
|
||||||
|
void dma_manager::init()
|
||||||
|
{
|
||||||
|
m_worker_state = thread_state::created;
|
||||||
|
m_worker_thread = std::thread([this]()
|
||||||
|
{
|
||||||
|
if (!g_cfg.video.multithreaded_rsx)
|
||||||
|
{
|
||||||
|
// Abort
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (g_cfg.core.thread_scheduler_enabled)
|
||||||
|
{
|
||||||
|
thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::rsx));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool idle = false;
|
||||||
|
while (m_worker_state != thread_state::finished)
|
||||||
|
{
|
||||||
|
if (m_jobs_count)
|
||||||
|
{
|
||||||
|
if (idle)
|
||||||
|
{
|
||||||
|
thread_ctrl::set_native_priority(0);
|
||||||
|
idle = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto slice = m_work_queue.pop_all(); slice; slice.pop_front())
|
||||||
|
{
|
||||||
|
auto task = *slice;
|
||||||
|
switch (task.type)
|
||||||
|
{
|
||||||
|
case raw_copy:
|
||||||
|
memcpy(task.dst, task.src, task.length);
|
||||||
|
break;
|
||||||
|
case vector_copy:
|
||||||
|
memcpy(task.dst, task.opt_storage.data(), task.length);
|
||||||
|
break;
|
||||||
|
case index_emulate:
|
||||||
|
write_index_array_for_non_indexed_non_native_primitive_to_buffer(
|
||||||
|
reinterpret_cast<char*>(task.dst),
|
||||||
|
static_cast<rsx::primitive_type>(task.aux_param0),
|
||||||
|
task.length);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ASSUME(0);
|
||||||
|
fmt::throw_exception("Unreachable" HERE);
|
||||||
|
}
|
||||||
|
|
||||||
|
m_jobs_count--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
idle = true;
|
||||||
|
thread_ctrl::set_native_priority(-1);
|
||||||
|
std::this_thread::yield();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// General transport
|
||||||
|
void dma_manager::copy(void *dst, std::vector<u8>& src, u32 length)
|
||||||
|
{
|
||||||
|
if (length <= max_immediate_transfer_size || !g_cfg.video.multithreaded_rsx)
|
||||||
|
{
|
||||||
|
std::memcpy(dst, src.data(), length);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
++m_jobs_count;
|
||||||
|
m_work_queue.push(dst, src, length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void dma_manager::copy(void *dst, void *src, u32 length)
|
||||||
|
{
|
||||||
|
if (length <= max_immediate_transfer_size || !g_cfg.video.multithreaded_rsx)
|
||||||
|
{
|
||||||
|
std::memcpy(dst, src, length);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
++m_jobs_count;
|
||||||
|
m_work_queue.push(dst, src, length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Vertex utilities
|
||||||
|
void dma_manager::emulate_as_indexed(void *dst, rsx::primitive_type primitive, u32 count)
|
||||||
|
{
|
||||||
|
if (!g_cfg.video.multithreaded_rsx)
|
||||||
|
{
|
||||||
|
write_index_array_for_non_indexed_non_native_primitive_to_buffer(
|
||||||
|
reinterpret_cast<char*>(dst), primitive, count);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
++m_jobs_count;
|
||||||
|
m_work_queue.push(dst, primitive, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Synchronization
|
||||||
|
void dma_manager::sync()
|
||||||
|
{
|
||||||
|
if (g_cfg.video.multithreaded_rsx)
|
||||||
|
{
|
||||||
|
while (m_jobs_count)
|
||||||
|
_mm_lfence();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void dma_manager::join()
|
||||||
|
{
|
||||||
|
m_worker_state = thread_state::finished;
|
||||||
|
m_worker_thread.join();
|
||||||
|
}
|
||||||
|
}
|
72
rpcs3/Emu/RSX/RSXOffload.h
Normal file
72
rpcs3/Emu/RSX/RSXOffload.h
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Utilities/types.h"
|
||||||
|
#include "Utilities/lockless.h"
|
||||||
|
#include "Utilities/Thread.h"
|
||||||
|
#include "gcm_enums.h"
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
|
namespace rsx
|
||||||
|
{
|
||||||
|
class dma_manager
|
||||||
|
{
|
||||||
|
enum op
|
||||||
|
{
|
||||||
|
raw_copy = 0,
|
||||||
|
vector_copy = 1,
|
||||||
|
index_emulate = 2
|
||||||
|
};
|
||||||
|
|
||||||
|
struct transport_packet
|
||||||
|
{
|
||||||
|
op type;
|
||||||
|
std::vector<u8> opt_storage;
|
||||||
|
void *src;
|
||||||
|
void *dst;
|
||||||
|
u32 length;
|
||||||
|
u32 aux_param0;
|
||||||
|
u32 aux_param1;
|
||||||
|
|
||||||
|
transport_packet(void *_dst, void *_src, u32 len)
|
||||||
|
: src(_src), dst(_dst), length(len), type(op::raw_copy)
|
||||||
|
{}
|
||||||
|
|
||||||
|
transport_packet(void *_dst, std::vector<u8>& _src, u32 len)
|
||||||
|
: dst(_dst), opt_storage(std::move(_src)), length(len), type(op::vector_copy)
|
||||||
|
{}
|
||||||
|
|
||||||
|
transport_packet(void *_dst, rsx::primitive_type prim, u32 len)
|
||||||
|
: dst(_dst), aux_param0(static_cast<u8>(prim)), length(len), type(op::index_emulate)
|
||||||
|
{}
|
||||||
|
};
|
||||||
|
|
||||||
|
lf_queue<transport_packet> m_work_queue;
|
||||||
|
atomic_t<int> m_jobs_count;
|
||||||
|
std::thread m_worker_thread;
|
||||||
|
thread_state m_worker_state;
|
||||||
|
|
||||||
|
// TODO: Improved benchmarks here; value determined by profiling on a Ryzen CPU, rounded to the nearest 512 bytes
|
||||||
|
const u32 max_immediate_transfer_size = 3584;
|
||||||
|
|
||||||
|
public:
|
||||||
|
dma_manager() = default;
|
||||||
|
|
||||||
|
// initialization
|
||||||
|
void init();
|
||||||
|
|
||||||
|
// General tranport
|
||||||
|
void copy(void *dst, std::vector<u8>& src, u32 length);
|
||||||
|
void copy(void *dst, void *src, u32 length);
|
||||||
|
|
||||||
|
// Vertex utilities
|
||||||
|
void emulate_as_indexed(void *dst, rsx::primitive_type primitive, u32 count);
|
||||||
|
|
||||||
|
// Synchronization
|
||||||
|
void sync();
|
||||||
|
void join();
|
||||||
|
};
|
||||||
|
|
||||||
|
extern dma_manager g_dma_manager;
|
||||||
|
}
|
|
@ -251,126 +251,6 @@ namespace rsx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// initialization
|
|
||||||
void dma_manager::init()
|
|
||||||
{
|
|
||||||
m_worker_state = thread_state::created;
|
|
||||||
m_worker_thread = std::thread([this]()
|
|
||||||
{
|
|
||||||
if (!g_cfg.video.multithreaded_rsx)
|
|
||||||
{
|
|
||||||
// Abort
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (g_cfg.core.thread_scheduler_enabled)
|
|
||||||
{
|
|
||||||
thread_ctrl::set_thread_affinity_mask(thread_ctrl::get_affinity_mask(thread_class::rsx));
|
|
||||||
}
|
|
||||||
|
|
||||||
bool idle = false;
|
|
||||||
while (m_worker_state != thread_state::finished)
|
|
||||||
{
|
|
||||||
if (!m_work_queue.empty())
|
|
||||||
{
|
|
||||||
m_queue_mutex.lock();
|
|
||||||
auto task = std::move(m_work_queue.front());
|
|
||||||
m_work_queue.pop_front();
|
|
||||||
m_queue_mutex.unlock();
|
|
||||||
|
|
||||||
if (idle)
|
|
||||||
{
|
|
||||||
thread_ctrl::set_native_priority(0);
|
|
||||||
idle = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (task.type)
|
|
||||||
{
|
|
||||||
case raw_copy:
|
|
||||||
memcpy(task.dst, task.src, task.length);
|
|
||||||
break;
|
|
||||||
case vector_copy:
|
|
||||||
memcpy(task.dst, task.opt_storage.data(), task.length);
|
|
||||||
break;
|
|
||||||
case index_emulate:
|
|
||||||
write_index_array_for_non_indexed_non_native_primitive_to_buffer(
|
|
||||||
reinterpret_cast<char*>(task.dst),
|
|
||||||
static_cast<rsx::primitive_type>(task.aux_param0),
|
|
||||||
task.length);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
ASSUME(0);
|
|
||||||
fmt::throw_exception("Unreachable" HERE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
idle = true;
|
|
||||||
thread_ctrl::set_native_priority(-1);
|
|
||||||
std::this_thread::yield();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// General tranport
|
|
||||||
void dma_manager::copy(void *dst, std::vector<u8>& src, u32 length)
|
|
||||||
{
|
|
||||||
if (!g_cfg.video.multithreaded_rsx)
|
|
||||||
{
|
|
||||||
std::memcpy(dst, src.data(), length);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::lock_guard lock(m_queue_mutex);
|
|
||||||
m_work_queue.emplace_back(dst, src, length);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void dma_manager::copy(void *dst, void *src, u32 length)
|
|
||||||
{
|
|
||||||
if (!g_cfg.video.multithreaded_rsx)
|
|
||||||
{
|
|
||||||
std::memcpy(dst, src, length);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::lock_guard lock(m_queue_mutex);
|
|
||||||
m_work_queue.emplace_back(dst, src, length);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Vertex utilities
|
|
||||||
void dma_manager::emulate_as_indexed(void *dst, rsx::primitive_type primitive, u32 count)
|
|
||||||
{
|
|
||||||
if (!g_cfg.video.multithreaded_rsx)
|
|
||||||
{
|
|
||||||
write_index_array_for_non_indexed_non_native_primitive_to_buffer(
|
|
||||||
reinterpret_cast<char*>(dst), primitive, count);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::lock_guard lock(m_queue_mutex);
|
|
||||||
m_work_queue.emplace_back(dst, primitive, count);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Synchronization
|
|
||||||
void dma_manager::sync()
|
|
||||||
{
|
|
||||||
if (g_cfg.video.multithreaded_rsx)
|
|
||||||
{
|
|
||||||
while (!m_work_queue.empty())
|
|
||||||
_mm_lfence();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void dma_manager::join()
|
|
||||||
{
|
|
||||||
m_worker_state = thread_state::finished;
|
|
||||||
m_worker_thread.join();
|
|
||||||
}
|
|
||||||
|
|
||||||
thread::thread()
|
thread::thread()
|
||||||
{
|
{
|
||||||
g_current_renderer = this;
|
g_current_renderer = this;
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include "rsx_cache.h"
|
#include "rsx_cache.h"
|
||||||
#include "RSXFIFO.h"
|
#include "RSXFIFO.h"
|
||||||
#include "RSXTexture.h"
|
#include "RSXTexture.h"
|
||||||
|
#include "RSXOffload.h"
|
||||||
#include "RSXVertexProgram.h"
|
#include "RSXVertexProgram.h"
|
||||||
#include "RSXFragmentProgram.h"
|
#include "RSXFragmentProgram.h"
|
||||||
#include "rsx_methods.h"
|
#include "rsx_methods.h"
|
||||||
|
@ -297,62 +298,6 @@ namespace rsx
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class dma_manager
|
|
||||||
{
|
|
||||||
enum op
|
|
||||||
{
|
|
||||||
raw_copy = 0,
|
|
||||||
vector_copy = 1,
|
|
||||||
index_emulate = 2
|
|
||||||
};
|
|
||||||
|
|
||||||
struct transport_packet
|
|
||||||
{
|
|
||||||
op type;
|
|
||||||
std::vector<u8> opt_storage;
|
|
||||||
void *src;
|
|
||||||
void *dst;
|
|
||||||
u32 length;
|
|
||||||
u32 aux_param0;
|
|
||||||
u32 aux_param1;
|
|
||||||
|
|
||||||
transport_packet(void *_dst, void *_src, u32 len)
|
|
||||||
: src(_src), dst(_dst), length(len), type(op::raw_copy)
|
|
||||||
{}
|
|
||||||
|
|
||||||
transport_packet(void *_dst, std::vector<u8>& _src, u32 len)
|
|
||||||
: dst(_dst), opt_storage(std::move(_src)), length(len), type(op::vector_copy)
|
|
||||||
{}
|
|
||||||
|
|
||||||
transport_packet(void *_dst, rsx::primitive_type prim, u32 len)
|
|
||||||
: dst(_dst), aux_param0(static_cast<u8>(prim)), length(len), type(op::index_emulate)
|
|
||||||
{}
|
|
||||||
};
|
|
||||||
|
|
||||||
std::deque<transport_packet> m_work_queue;
|
|
||||||
std::thread m_worker_thread;
|
|
||||||
std::mutex m_queue_mutex;
|
|
||||||
thread_state m_worker_state;
|
|
||||||
|
|
||||||
public:
|
|
||||||
dma_manager() = default;
|
|
||||||
|
|
||||||
// initialization
|
|
||||||
void init();
|
|
||||||
|
|
||||||
// General tranport
|
|
||||||
void copy(void *dst, std::vector<u8>& src, u32 length);
|
|
||||||
void copy(void *dst, void *src, u32 length);
|
|
||||||
|
|
||||||
// Vertex utilities
|
|
||||||
void emulate_as_indexed(void *dst, rsx::primitive_type primitive, u32 count);
|
|
||||||
|
|
||||||
// Synchronization
|
|
||||||
void sync();
|
|
||||||
void join();
|
|
||||||
};
|
|
||||||
extern dma_manager g_dma_manager;
|
|
||||||
|
|
||||||
struct framebuffer_layout
|
struct framebuffer_layout
|
||||||
{
|
{
|
||||||
u16 width;
|
u16 width;
|
||||||
|
|
|
@ -315,6 +315,7 @@
|
||||||
<ClCompile Include="Emu\RSX\Overlays\overlay_shader_compile_notification.cpp" />
|
<ClCompile Include="Emu\RSX\Overlays\overlay_shader_compile_notification.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\Overlays\overlay_trophy_notification.cpp" />
|
<ClCompile Include="Emu\RSX\Overlays\overlay_trophy_notification.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\RSXFIFO.cpp" />
|
<ClCompile Include="Emu\RSX\RSXFIFO.cpp" />
|
||||||
|
<ClCompile Include="Emu\RSX\RSXOffload.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\rsx_methods.cpp" />
|
<ClCompile Include="Emu\RSX\rsx_methods.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\rsx_utils.cpp" />
|
<ClCompile Include="Emu\RSX\rsx_utils.cpp" />
|
||||||
<ClCompile Include="Crypto\aes.cpp">
|
<ClCompile Include="Crypto\aes.cpp">
|
||||||
|
@ -558,6 +559,7 @@
|
||||||
<ClInclude Include="Emu\RSX\Overlays\overlays.h" />
|
<ClInclude Include="Emu\RSX\Overlays\overlays.h" />
|
||||||
<ClInclude Include="Emu\RSX\Overlays\overlay_controls.h" />
|
<ClInclude Include="Emu\RSX\Overlays\overlay_controls.h" />
|
||||||
<ClInclude Include="Emu\RSX\RSXFIFO.h" />
|
<ClInclude Include="Emu\RSX\RSXFIFO.h" />
|
||||||
|
<ClInclude Include="Emu\RSX\RSXOffload.h" />
|
||||||
<ClInclude Include="Emu\RSX\rsx_cache.h" />
|
<ClInclude Include="Emu\RSX\rsx_cache.h" />
|
||||||
<ClInclude Include="Emu\RSX\rsx_decode.h" />
|
<ClInclude Include="Emu\RSX\rsx_decode.h" />
|
||||||
<ClInclude Include="Emu\RSX\rsx_vertex_data.h" />
|
<ClInclude Include="Emu\RSX\rsx_vertex_data.h" />
|
||||||
|
|
|
@ -806,6 +806,9 @@
|
||||||
<ClCompile Include="Emu\RSX\Overlays\overlay_progress_bar.cpp">
|
<ClCompile Include="Emu\RSX\Overlays\overlay_progress_bar.cpp">
|
||||||
<Filter>Emu\GPU\RSX\Overlays</Filter>
|
<Filter>Emu\GPU\RSX\Overlays</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
<ClCompile Include="Emu\RSX\RSXOffload.cpp">
|
||||||
|
<Filter>Emu\GPU\RSX</Filter>
|
||||||
|
</ClCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClInclude Include="Crypto\aes.h">
|
<ClInclude Include="Crypto\aes.h">
|
||||||
|
@ -1522,5 +1525,8 @@
|
||||||
<ClInclude Include="Emu\RSX\Common\surface_utils.h">
|
<ClInclude Include="Emu\RSX\Common\surface_utils.h">
|
||||||
<Filter>Emu\GPU\RSX\Common</Filter>
|
<Filter>Emu\GPU\RSX\Common</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
<ClInclude Include="Emu\RSX\RSXOffload.h">
|
||||||
|
<Filter>Emu\GPU\RSX</Filter>
|
||||||
|
</ClInclude>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
</Project>
|
</Project>
|
Loading…
Add table
Add a link
Reference in a new issue