rsx: Fixups

- Also fix visual corruption when using disjoint indexed draws

- Refactor draw call emit again (vk)

- Improve execution barrier resolve
  - Allow vertex/index rebase inside begin/end pair
  - Add ALPHA_TEST to list of excluded methods [TODO: defer raster state]

- gl bringup

- Simplify
  - using the simple_array gets back a few more fps :)
This commit is contained in:
kd-11 2018-10-01 23:05:51 +03:00 committed by kd-11
parent e01d2f08c9
commit 677b16f5c6
19 changed files with 2242 additions and 565 deletions

View file

@ -175,20 +175,23 @@ namespace rsx
const u32 vertSize = get_vertex_type_size_on_host(info.type(), info.size()); const u32 vertSize = get_vertex_type_size_on_host(info.type(), info.size());
const u32 vertStride = info.stride(); const u32 vertStride = info.stride();
for (const auto& range : method_registers.current_draw_clause.draw_command_ranges) method_registers.current_draw_clause.begin();
do
{ {
const u32 vertCount = range.count; const auto& range = method_registers.current_draw_clause.get_range();
const u32 vertCount = range.count;
const size_t bufferSize = vertCount * vertStride + vertSize; const size_t bufferSize = vertCount * vertStride + vertSize;
frame_capture_data::memory_block block; frame_capture_data::memory_block block;
block.ioOffset = base_address; block.ioOffset = base_address;
block.location = memory_location; block.location = memory_location;
block.offset = (range.first * vertStride); block.offset = (range.first * vertStride);
frame_capture_data::memory_block_data block_data; frame_capture_data::memory_block_data block_data;
block_data.data.resize(bufferSize); block_data.data.resize(bufferSize);
std::memcpy(block_data.data.data(), vm::base(addr + block.offset), bufferSize); std::memcpy(block_data.data.data(), vm::base(addr + block.offset), bufferSize);
insert_mem_block_in_map(mem_changes, std::move(block), std::move(block_data)); insert_mem_block_in_map(mem_changes, std::move(block), std::move(block_data));
} }
while (method_registers.current_draw_clause.next());
} }
} }
// save index buffer if used // save index buffer if used
@ -211,8 +214,10 @@ namespace rsx
const bool is_primitive_restart_enabled = method_registers.restart_index_enabled(); const bool is_primitive_restart_enabled = method_registers.restart_index_enabled();
const u32 primitive_restart_index = method_registers.restart_index(); const u32 primitive_restart_index = method_registers.restart_index();
for (const auto& range : method_registers.current_draw_clause.draw_command_ranges) method_registers.current_draw_clause.begin();
do
{ {
const auto& range = method_registers.current_draw_clause.get_range();
const u32 idxFirst = range.first; const u32 idxFirst = range.first;
const u32 idxCount = range.count; const u32 idxCount = range.count;
const u32 idxAddr = base_addr + (idxFirst * type_size); const u32 idxAddr = base_addr + (idxFirst * type_size);
@ -261,6 +266,7 @@ namespace rsx
} }
} }
} }
while (method_registers.current_draw_clause.next());
if (min_index > max_index) if (min_index > max_index)
{ {

View file

@ -435,14 +435,11 @@ namespace
} }
} }
void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::span<const gsl::byte> src_ptr, const std::vector<rsx::draw_range_t>& first_count_commands, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, bool swap_endianness) void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::span<const gsl::byte> src_ptr, u32 count, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, bool swap_endianness)
{ {
verify(HERE), (vector_element_count > 0); verify(HERE), (vector_element_count > 0);
const u32 src_read_stride = rsx::get_vertex_type_size_on_host(type, vector_element_count); const u32 src_read_stride = rsx::get_vertex_type_size_on_host(type, vector_element_count);
// HACK! This is a legacy routine only used by D3D12
const u32 count = first_count_commands.front().count;
bool use_stream_no_stride = false; bool use_stream_no_stride = false;
bool use_stream_with_stride = false; bool use_stream_with_stride = false;
@ -799,7 +796,7 @@ namespace
template<typename T> template<typename T>
std::tuple<u32, u32, u32> write_index_array_data_to_buffer_impl(gsl::span<u32> dst, std::tuple<u32, u32, u32> write_index_array_data_to_buffer_impl(gsl::span<u32> dst,
gsl::span<const be_t<T>> src, gsl::span<const be_t<T>> src,
rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index, const rsx::draw_range_t &range, rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index,
u32 base_index, std::function<bool(rsx::primitive_type)> expands) u32 base_index, std::function<bool(rsx::primitive_type)> expands)
{ {
if (!expands(draw_mode)) return upload_untouched<T>(src, dst, restart_index_enabled, restart_index, base_index); if (!expands(draw_mode)) return upload_untouched<T>(src, dst, restart_index_enabled, restart_index, base_index);
@ -809,7 +806,8 @@ namespace
case rsx::primitive_type::line_loop: case rsx::primitive_type::line_loop:
{ {
const auto &returnvalue = upload_untouched<T>(src, dst, restart_index_enabled, restart_index, base_index); const auto &returnvalue = upload_untouched<T>(src, dst, restart_index_enabled, restart_index, base_index);
dst[range.count] = src[0]; const auto index_count = dst.size_bytes() / sizeof(T);
dst[index_count] = src[0];
return returnvalue; return returnvalue;
} }
case rsx::primitive_type::polygon: case rsx::primitive_type::polygon:
@ -826,51 +824,23 @@ namespace
std::tuple<u32, u32, u32> write_index_array_data_to_buffer(gsl::span<gsl::byte> dst_ptr, std::tuple<u32, u32, u32> write_index_array_data_to_buffer(gsl::span<gsl::byte> dst_ptr,
gsl::span<const gsl::byte> src_ptr, gsl::span<const gsl::byte> src_ptr,
rsx::index_array_type type, rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index, rsx::index_array_type type, rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index,
const std::vector<rsx::draw_range_t> &first_count_arguments,
u32 base_index, std::function<bool(rsx::primitive_type)> expands) u32 base_index, std::function<bool(rsx::primitive_type)> expands)
{ {
u32 read = 0;
u32 written = 0;
u32 min_index = -1u;
u32 max_index = 0;
const u32 type_size = get_index_type_size(type);
for (const auto &range : first_count_arguments)
{
auto src = src_ptr.subspan(range.command_data_offset, range.count * type_size);
auto dst = dst_ptr.subspan(written * type_size);
switch (type) switch (type)
{ {
case rsx::index_array_type::u16: case rsx::index_array_type::u16:
{ {
auto ret = write_index_array_data_to_buffer_impl<u16>(as_span_workaround<u32>(dst), return write_index_array_data_to_buffer_impl<u16>(as_span_workaround<u32>(dst_ptr),
as_const_span<const be_t<u16>>(src), draw_mode, restart_index_enabled, restart_index, range, base_index, expands); as_const_span<const be_t<u16>>(src_ptr), draw_mode, restart_index_enabled, restart_index, base_index, expands);
min_index = std::min<u32>(std::get<0>(ret), min_index);
max_index = std::min<u32>(std::get<1>(ret), max_index);
written += std::get<2>(ret);
break;
} }
case rsx::index_array_type::u32: case rsx::index_array_type::u32:
{ {
auto ret = write_index_array_data_to_buffer_impl<u32>(as_span_workaround<u32>(dst), return write_index_array_data_to_buffer_impl<u32>(as_span_workaround<u32>(dst_ptr),
as_const_span<const be_t<u32>>(src), draw_mode, restart_index_enabled, restart_index, range, base_index, expands); as_const_span<const be_t<u32>>(src_ptr), draw_mode, restart_index_enabled, restart_index, base_index, expands);
min_index = std::min<u32>(std::get<0>(ret), min_index);
max_index = std::min<u32>(std::get<1>(ret), max_index);
written += std::get<2>(ret);
break;
} }
default: default:
fmt::throw_exception("Unreachable" HERE); fmt::throw_exception("Unreachable" HERE);
} }
read += range.count;
}
return std::make_tuple(min_index, max_index, written);
} }
void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w) void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w)

View file

@ -10,7 +10,7 @@
* Write count vertex attributes from src_ptr. * Write count vertex attributes from src_ptr.
* src_ptr array layout is deduced from the type, vector element count and src_stride arguments. * src_ptr array layout is deduced from the type, vector element count and src_stride arguments.
*/ */
void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::span<const gsl::byte> src_ptr, const std::vector<rsx::draw_range_t>& first_count_commands, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, bool swap_endianness); void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::span<const gsl::byte> src_ptr, u32 count, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, bool swap_endianness);
/* /*
* If primitive mode is not supported and need to be emulated (using an index buffer) returns false. * If primitive mode is not supported and need to be emulated (using an index buffer) returns false.
@ -33,7 +33,7 @@ u32 get_index_type_size(rsx::index_array_type type);
* The function expands index buffer for non native primitive type if expands(draw_mode) return true. * The function expands index buffer for non native primitive type if expands(draw_mode) return true.
*/ */
std::tuple<u32, u32, u32> write_index_array_data_to_buffer(gsl::span<gsl::byte> dst, gsl::span<const gsl::byte> src, std::tuple<u32, u32, u32> write_index_array_data_to_buffer(gsl::span<gsl::byte> dst, gsl::span<const gsl::byte> src,
rsx::index_array_type, rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index, const std::vector<rsx::draw_range_t> &first_count_arguments, rsx::index_array_type, rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index,
u32 base_index, std::function<bool(rsx::primitive_type)> expands); u32 base_index, std::function<bool(rsx::primitive_type)> expands);
/** /**

View file

@ -158,7 +158,7 @@ namespace
m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
gsl::span<gsl::byte> mapped_buffer_span = { gsl::span<gsl::byte> mapped_buffer_span = {
(gsl::byte*)mapped_buffer, gsl::narrow_cast<int>(buffer_size)}; (gsl::byte*)mapped_buffer, gsl::narrow_cast<int>(buffer_size)};
write_vertex_array_data_to_buffer(mapped_buffer_span, vertex_array.data, rsx::method_registers.current_draw_clause.draw_command_ranges, write_vertex_array_data_to_buffer(mapped_buffer_span, vertex_array.data, vertex_count,
vertex_array.type, vertex_array.attribute_size, vertex_array.stride, element_size, vertex_array.is_be); vertex_array.type, vertex_array.attribute_size, vertex_array.stride, element_size, vertex_array.is_be);
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
@ -211,12 +211,9 @@ namespace
}; };
std::tuple<D3D12_INDEX_BUFFER_VIEW, size_t> generate_index_buffer_for_emulated_primitives_array( std::tuple<D3D12_INDEX_BUFFER_VIEW, size_t> generate_index_buffer_for_emulated_primitives_array(
const std::vector<rsx::draw_range_t> & vertex_ranges, d3d12_data_heap& m_buffer_data) u32 vertex_count, d3d12_data_heap& m_buffer_data)
{ {
size_t index_count = std::accumulate( size_t index_count = get_index_count(rsx::method_registers.current_draw_clause.primitive, vertex_count);
vertex_ranges.begin(), vertex_ranges.end(), 0ll, [](size_t acc, const auto& pair) {
return acc + get_index_count(rsx::method_registers.current_draw_clause.primitive, pair.count);
});
// Alloc // Alloc
size_t buffer_size = align(index_count * sizeof(u16), 64); size_t buffer_size = align(index_count * sizeof(u16), 64);
@ -226,10 +223,6 @@ namespace
void* mapped_buffer = void* mapped_buffer =
m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
u32 vertex_count = 0;
for (const auto& pair : vertex_ranges)
vertex_count += pair.count;
write_index_array_for_non_indexed_non_native_primitive_to_buffer((char *)mapped_buffer, rsx::method_registers.current_draw_clause.primitive, vertex_count); write_index_array_for_non_indexed_non_native_primitive_to_buffer((char *)mapped_buffer, rsx::method_registers.current_draw_clause.primitive, vertex_count);
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
@ -249,9 +242,8 @@ namespace
* range, and whose second element is the number of vertex in this range. * range, and whose second element is the number of vertex in this range.
*/ */
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> upload_vertex_attributes( std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> upload_vertex_attributes(
std::vector<rsx::draw_range_t> vertex_ranges, u32 vertex_count,
std::function<attribute_storage(std::vector<rsx::draw_range_t>)> std::function<attribute_storage()> get_vertex_buffers,
get_vertex_buffers,
ID3D12Resource* m_vertex_buffer_data, d3d12_data_heap& m_buffer_data, ID3D12Resource* m_vertex_buffer_data, d3d12_data_heap& m_buffer_data,
ID3D12GraphicsCommandList* command_list) ID3D12GraphicsCommandList* command_list)
{ {
@ -259,13 +251,9 @@ namespace
&CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data, &CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data,
D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_RESOURCE_STATE_COPY_DEST)); D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_RESOURCE_STATE_COPY_DEST));
u32 vertex_count = 0;
for (const auto &range : vertex_ranges)
vertex_count += range.count;
vertex_buffer_visitor visitor( vertex_buffer_visitor visitor(
vertex_count, command_list, m_vertex_buffer_data, m_buffer_data); vertex_count, command_list, m_vertex_buffer_data, m_buffer_data);
const auto& vertex_buffers = get_vertex_buffers(vertex_ranges); const auto& vertex_buffers = get_vertex_buffers();
for (const auto& vbo : vertex_buffers) std::visit(visitor, vbo); for (const auto& vbo : vertex_buffers) std::visit(visitor, vbo);
@ -348,7 +336,7 @@ namespace
{ {
draw_command_visitor(ID3D12GraphicsCommandList* cmd_list, d3d12_data_heap& buffer_data, draw_command_visitor(ID3D12GraphicsCommandList* cmd_list, d3d12_data_heap& buffer_data,
ID3D12Resource* vertex_buffer_data, ID3D12Resource* vertex_buffer_data,
std::function<attribute_storage(const std::vector<rsx::draw_range_t>&)> get_vertex_info_lambda) std::function<attribute_storage()> get_vertex_info_lambda)
: command_list(cmd_list), m_buffer_data(buffer_data), : command_list(cmd_list), m_buffer_data(buffer_data),
m_vertex_buffer_data(vertex_buffer_data), get_vertex_buffers(get_vertex_info_lambda) m_vertex_buffer_data(vertex_buffer_data), get_vertex_buffers(get_vertex_info_lambda)
{ {
@ -357,10 +345,10 @@ namespace
std::tuple<bool, size_t, std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC>> operator()( std::tuple<bool, size_t, std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC>> operator()(
const rsx::draw_array_command& command) const rsx::draw_array_command& command)
{ {
const auto vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
if (is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) { if (is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) {
size_t vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
return std::make_tuple(false, vertex_count, return std::make_tuple(false, vertex_count,
upload_vertex_attributes(rsx::method_registers.current_draw_clause.draw_command_ranges, upload_vertex_attributes(vertex_count,
get_vertex_buffers, get_vertex_buffers,
m_vertex_buffer_data, m_buffer_data, command_list)); m_vertex_buffer_data, m_buffer_data, command_list));
} }
@ -369,10 +357,10 @@ namespace
size_t index_count; size_t index_count;
std::tie(index_buffer_view, index_count) = std::tie(index_buffer_view, index_count) =
generate_index_buffer_for_emulated_primitives_array( generate_index_buffer_for_emulated_primitives_array(
rsx::method_registers.current_draw_clause.draw_command_ranges, m_buffer_data); vertex_count, m_buffer_data);
command_list->IASetIndexBuffer(&index_buffer_view); command_list->IASetIndexBuffer(&index_buffer_view);
return std::make_tuple(true, index_count, return std::make_tuple(true, index_count,
upload_vertex_attributes(rsx::method_registers.current_draw_clause.draw_command_ranges, upload_vertex_attributes(vertex_count,
get_vertex_buffers, get_vertex_buffers,
m_vertex_buffer_data, m_buffer_data, command_list)); m_vertex_buffer_data, m_buffer_data, command_list));
} }
@ -406,7 +394,7 @@ namespace
write_index_array_data_to_buffer(dst, command.raw_index_buffer, indexed_type, write_index_array_data_to_buffer(dst, command.raw_index_buffer, indexed_type,
rsx::method_registers.current_draw_clause.primitive, rsx::method_registers.current_draw_clause.primitive,
rsx::method_registers.restart_index_enabled(), rsx::method_registers.restart_index_enabled(),
rsx::method_registers.restart_index(), rsx::method_registers.current_draw_clause.draw_command_ranges, rsx::method_registers.restart_index(),
rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !is_primitive_native(prim); }); rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !is_primitive_native(prim); });
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
@ -417,7 +405,7 @@ namespace
command_list->IASetIndexBuffer(&index_buffer_view); command_list->IASetIndexBuffer(&index_buffer_view);
return std::make_tuple(true, index_count, return std::make_tuple(true, index_count,
upload_vertex_attributes({ {0, max_index + 1} }, get_vertex_buffers, upload_vertex_attributes(max_index + 1, get_vertex_buffers,
m_vertex_buffer_data, m_buffer_data, command_list)); m_vertex_buffer_data, m_buffer_data, command_list));
} }
@ -439,7 +427,7 @@ namespace
size_t index_count; size_t index_count;
std::tie(index_buffer_view, index_count) = std::tie(index_buffer_view, index_count) =
generate_index_buffer_for_emulated_primitives_array( generate_index_buffer_for_emulated_primitives_array(
{{0, (u32)vertex_count}}, m_buffer_data); vertex_count, m_buffer_data);
command_list->IASetIndexBuffer(&index_buffer_view); command_list->IASetIndexBuffer(&index_buffer_view);
return std::make_tuple(true, index_count, vertex_buffer_view); return std::make_tuple(true, index_count, vertex_buffer_view);
} }
@ -447,7 +435,7 @@ namespace
private: private:
ID3D12GraphicsCommandList* command_list; ID3D12GraphicsCommandList* command_list;
d3d12_data_heap& m_buffer_data; d3d12_data_heap& m_buffer_data;
std::function<attribute_storage(const std::vector<rsx::draw_range_t>&)> get_vertex_buffers; std::function<attribute_storage()> get_vertex_buffers;
ID3D12Resource* m_vertex_buffer_data; ID3D12Resource* m_vertex_buffer_data;
}; };
} // End anonymous namespace } // End anonymous namespace
@ -457,7 +445,7 @@ D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList* comma
{ {
return std::visit( return std::visit(
draw_command_visitor(command_list, m_buffer_data, m_vertex_buffer_data.Get(), draw_command_visitor(command_list, m_buffer_data, m_vertex_buffer_data.Get(),
[this](const auto& list) { return get_vertex_buffers(rsx::method_registers, list, 0); }), [this]() { return get_vertex_buffers(rsx::method_registers, 0); }),
get_draw_command(rsx::method_registers)); get_draw_command(rsx::method_registers));
} }

View file

@ -195,17 +195,6 @@ void GLGSRender::end()
std::chrono::time_point<steady_clock> state_check_end = steady_clock::now(); std::chrono::time_point<steady_clock> state_check_end = steady_clock::now();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count(); m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count();
if (manually_flush_ring_buffers)
{
//Use approximations to reserve space. This path is mostly for debug purposes anyway
u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
u32 approx_working_buffer_size = approx_vertex_count * 256;
//Allocate 256K heap if we have no approximation at this time (inlined array)
m_attrib_ring_buffer->reserve_storage_on_heap(std::max(approx_working_buffer_size, 256 * 1024U));
m_index_ring_buffer->reserve_storage_on_heap(16 * 1024);
}
const auto do_heap_cleanup = [this]() const auto do_heap_cleanup = [this]()
{ {
if (manually_flush_ring_buffers) if (manually_flush_ring_buffers)
@ -220,17 +209,6 @@ void GLGSRender::end()
} }
}; };
//Do vertex upload before RTT prep / texture lookups to give the driver time to push data
auto upload_info = set_vertex_buffer();
if (upload_info.vertex_draw_count == 0)
{
// Malformed vertex setup; abort
do_heap_cleanup();
rsx::thread::end();
return;
}
//Check if depth buffer is bound and valid //Check if depth buffer is bound and valid
//If ds is not initialized clear it; it seems new depth textures should have depth cleared //If ds is not initialized clear it; it seems new depth textures should have depth cleared
auto copy_rtt_contents = [this](gl::render_target *surface, bool is_depth) auto copy_rtt_contents = [this](gl::render_target *surface, bool is_depth)
@ -407,15 +385,11 @@ void GLGSRender::end()
if (!load_program()) if (!load_program())
{ {
// Program is not ready, skip drawing this // Program is not ready, skip drawing this
do_heap_cleanup();
std::this_thread::yield(); std::this_thread::yield();
rsx::thread::end(); rsx::thread::end();
return; return;
} }
// Load program here since it is dependent on vertex state
load_program_env(upload_info);
std::chrono::time_point<steady_clock> program_stop = steady_clock::now(); std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count(); m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
@ -490,117 +464,161 @@ void GLGSRender::end()
std::chrono::time_point<steady_clock> draw_start = steady_clock::now(); std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
do_heap_cleanup();
if (g_cfg.video.debug_output) if (g_cfg.video.debug_output)
{ {
m_program->validate(); m_program->validate();
} }
const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive); const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive);
const bool allow_multidraw = supports_multidraw && !g_cfg.video.disable_FIFO_reordering; rsx::method_registers.current_draw_clause.begin();
const bool single_draw = (!allow_multidraw || int subdraw = 0;
rsx::method_registers.current_draw_clause.draw_command_ranges.size() <= 1 || do
rsx::method_registers.current_draw_clause.is_disjoint_primitive);
if (upload_info.index_info)
{ {
const GLenum index_type = std::get<0>(*upload_info.index_info); if (!subdraw)
const u32 index_offset = std::get<1>(*upload_info.index_info);
const bool restarts_valid = gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive) && !rsx::method_registers.current_draw_clause.is_disjoint_primitive;
if (gl_state.enable(restarts_valid && rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
{ {
glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff); m_vertex_layout = analyse_inputs_interleaved();
} if (!m_vertex_layout.validate())
{
m_index_ring_buffer->bind(); break;
}
if (single_draw)
{
glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset);
} }
else else
{ {
const auto draw_count = rsx::method_registers.current_draw_clause.draw_command_ranges.size(); if (rsx::method_registers.current_draw_clause.execute_pipeline_dependencies() & rsx::vertex_base_changed)
const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2;
uintptr_t index_ptr = index_offset;
m_scratch_buffer.resize(draw_count * 16);
GLsizei *counts = (GLsizei*)m_scratch_buffer.data();
const GLvoid** offsets = (const GLvoid**)(counts + draw_count);
int dst_index = 0;
for (const auto &range : rsx::method_registers.current_draw_clause.draw_command_ranges)
{ {
const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.count); // Rebase vertex bases instead of
counts[dst_index] = index_size; for (auto &info : m_vertex_layout.interleaved_blocks)
offsets[dst_index++] = (const GLvoid*)index_ptr;
index_ptr += (index_size << type_scale);
}
glMultiDrawElements(draw_mode, counts, index_type, offsets, (GLsizei)draw_count);
}
}
else
{
if (single_draw)
{
glDrawArrays(draw_mode, 0, upload_info.vertex_draw_count);
}
else
{
const u32 base_index = rsx::method_registers.current_draw_clause.draw_command_ranges.front().first;
bool use_draw_arrays_fallback = false;
const auto draw_count = rsx::method_registers.current_draw_clause.draw_command_ranges.size();
const auto driver_caps = gl::get_driver_caps();
m_scratch_buffer.resize(draw_count * 24);
GLint* firsts = (GLint*)m_scratch_buffer.data();
GLsizei* counts = (GLsizei*)(firsts + draw_count);
const GLvoid** offsets = (const GLvoid**)(counts + draw_count);
int dst_index = 0;
for (const auto &range : rsx::method_registers.current_draw_clause.draw_command_ranges)
{
const GLint first = range.first - base_index;
const GLsizei count = range.count;
firsts[dst_index] = first;
counts[dst_index] = count;
offsets[dst_index++] = (const GLvoid*)(first << 2);
if (driver_caps.vendor_AMD && (first + count) > (0x100000 >> 2))
{ {
//Unlikely, but added here in case the identity buffer is not large enough somehow const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset();
use_draw_arrays_fallback = true; info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location);
break;
} }
} }
}
if (use_draw_arrays_fallback) ++subdraw;
if (manually_flush_ring_buffers)
{
//Use approximations to reserve space. This path is mostly for debug purposes anyway
u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
u32 approx_working_buffer_size = approx_vertex_count * 256;
//Allocate 256K heap if we have no approximation at this time (inlined array)
m_attrib_ring_buffer->reserve_storage_on_heap(std::max(approx_working_buffer_size, 256 * 1024U));
m_index_ring_buffer->reserve_storage_on_heap(16 * 1024);
}
//Do vertex upload before RTT prep / texture lookups to give the driver time to push data
auto upload_info = set_vertex_buffer();
do_heap_cleanup();
if (upload_info.vertex_draw_count == 0)
{
// Malformed vertex setup; abort
continue;
}
load_program_env(upload_info);
if (!upload_info.index_info)
{
if (rsx::method_registers.current_draw_clause.is_single_draw())
{ {
//MultiDrawArrays is broken on some primitive types using AMD. One known type is GL_TRIANGLE_STRIP but there could be more glDrawArrays(draw_mode, 0, upload_info.vertex_draw_count);
for (const auto &range : rsx::method_registers.current_draw_clause.draw_command_ranges)
{
glDrawArrays(draw_mode, range.first - base_index, range.count);
}
}
else if (driver_caps.vendor_AMD)
{
//Use identity index buffer to fix broken vertexID on AMD
m_identity_index_buffer->bind();
glMultiDrawElements(draw_mode, counts, GL_UNSIGNED_INT, offsets, (GLsizei)draw_count);
} }
else else
{ {
//Normal render const auto subranges = rsx::method_registers.current_draw_clause.get_subranges();
glMultiDrawArrays(draw_mode, firsts, counts, (GLsizei)draw_count); const auto draw_count = subranges.size();
const auto driver_caps = gl::get_driver_caps();
bool use_draw_arrays_fallback = false;
m_scratch_buffer.resize(draw_count * 24);
GLint* firsts = (GLint*)m_scratch_buffer.data();
GLsizei* counts = (GLsizei*)(firsts + draw_count);
const GLvoid** offsets = (const GLvoid**)(counts + draw_count);
u32 first = 0;
u32 dst_index = 0;
for (const auto &range : subranges)
{
firsts[dst_index] = first;
counts[dst_index] = range.count;
offsets[dst_index++] = (const GLvoid*)(first << 2);
if (driver_caps.vendor_AMD && (first + range.count) > (0x100000 >> 2))
{
//Unlikely, but added here in case the identity buffer is not large enough somehow
use_draw_arrays_fallback = true;
break;
}
first += range.count;
}
if (use_draw_arrays_fallback)
{
//MultiDrawArrays is broken on some primitive types using AMD. One known type is GL_TRIANGLE_STRIP but there could be more
for (int n = 0; n < draw_count; ++n)
{
glDrawArrays(draw_mode, firsts[n], counts[n]);
}
}
else if (driver_caps.vendor_AMD)
{
//Use identity index buffer to fix broken vertexID on AMD
m_identity_index_buffer->bind();
glMultiDrawElements(draw_mode, counts, GL_UNSIGNED_INT, offsets, (GLsizei)draw_count);
}
else
{
//Normal render
glMultiDrawArrays(draw_mode, firsts, counts, (GLsizei)draw_count);
}
} }
} }
} else
{
const GLenum index_type = std::get<0>(*upload_info.index_info);
const u32 index_offset = std::get<1>(*upload_info.index_info);
const bool restarts_valid = gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive) && !rsx::method_registers.current_draw_clause.is_disjoint_primitive;
if (gl_state.enable(restarts_valid && rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
{
glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT) ? 0xffff : 0xffffffff);
}
m_index_ring_buffer->bind();
if (rsx::method_registers.current_draw_clause.is_single_draw())
{
glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset);
}
else
{
const auto subranges = rsx::method_registers.current_draw_clause.get_subranges();
const auto draw_count = subranges.size();
const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2;
uintptr_t index_ptr = index_offset;
m_scratch_buffer.resize(draw_count * 16);
GLsizei *counts = (GLsizei*)m_scratch_buffer.data();
const GLvoid** offsets = (const GLvoid**)(counts + draw_count);
int dst_index = 0;
for (const auto &range : subranges)
{
const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.count);
counts[dst_index] = index_size;
offsets[dst_index++] = (const GLvoid*)index_ptr;
index_ptr += (index_size << type_scale);
}
glMultiDrawElements(draw_mode, counts, index_type, offsets, (GLsizei)draw_count);
}
}
} while (rsx::method_registers.current_draw_clause.next());
m_rtts.on_write(); m_rtts.on_write();

View file

@ -20,19 +20,12 @@ namespace
namespace namespace
{ {
// return vertex count if primitive type is not native (empty array otherwise) // return vertex count if primitive type is not native (empty array otherwise)
std::tuple<u32, u32> get_index_array_for_emulated_non_indexed_draw(const std::vector<rsx::draw_range_t> &first_count_commands, rsx::primitive_type primitive_mode, gl::ring_buffer &dst) std::tuple<u32, u32> get_index_array_for_emulated_non_indexed_draw(rsx::primitive_type primitive_mode, gl::ring_buffer &dst, u32 vertex_count)
{ {
//This is an emulated buffer, so our indices only range from 0->original_vertex_array_length // This is an emulated buffer, so our indices only range from 0->original_vertex_array_length
u32 vertex_count = 0; const auto element_count = get_index_count(primitive_mode, vertex_count);
u32 element_count = 0;
verify(HERE), !gl::is_primitive_native(primitive_mode); verify(HERE), !gl::is_primitive_native(primitive_mode);
for (const auto &range : first_count_commands)
{
element_count += (u32)get_index_count(primitive_mode, range.count);
vertex_count += range.count;
}
auto mapping = dst.alloc_from_heap(element_count * sizeof(u16), 256); auto mapping = dst.alloc_from_heap(element_count * sizeof(u16), 256);
char *mapped_buffer = (char *)mapping.first; char *mapped_buffer = (char *)mapping.first;
@ -40,7 +33,7 @@ namespace
return std::make_tuple(element_count, mapping.second); return std::make_tuple(element_count, mapping.second);
} }
std::tuple<u32, u32, u32> upload_index_buffer(gsl::span<const gsl::byte> raw_index_buffer, void *ptr, rsx::index_array_type type, rsx::primitive_type draw_mode, const std::vector<rsx::draw_range_t>& first_count_commands, u32 initial_vertex_count) std::tuple<u32, u32, u32> upload_index_buffer(gsl::span<const gsl::byte> raw_index_buffer, void *ptr, rsx::index_array_type type, rsx::primitive_type draw_mode, u32 initial_vertex_count)
{ {
u32 min_index, max_index, vertex_draw_count = initial_vertex_count; u32 min_index, max_index, vertex_draw_count = initial_vertex_count;
@ -51,7 +44,7 @@ namespace
gsl::span<gsl::byte> dst{ reinterpret_cast<gsl::byte*>(ptr), ::narrow<u32>(block_sz) }; gsl::span<gsl::byte> dst{ reinterpret_cast<gsl::byte*>(ptr), ::narrow<u32>(block_sz) };
std::tie(min_index, max_index, vertex_draw_count) = write_index_array_data_to_buffer(dst, raw_index_buffer, std::tie(min_index, max_index, vertex_draw_count) = write_index_array_data_to_buffer(dst, raw_index_buffer,
type, draw_mode, rsx::method_registers.restart_index_enabled(), rsx::method_registers.restart_index(), first_count_commands, type, draw_mode, rsx::method_registers.restart_index_enabled(), rsx::method_registers.restart_index(),
rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !gl::is_primitive_native(prim); }); rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !gl::is_primitive_native(prim); });
return std::make_tuple(min_index, max_index, vertex_draw_count); return std::make_tuple(min_index, max_index, vertex_draw_count);
@ -99,8 +92,8 @@ namespace
u32 index_count; u32 index_count;
u32 offset_in_index_buffer; u32 offset_in_index_buffer;
std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw( std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(
rsx::method_registers.current_draw_clause.draw_command_ranges, rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer,
rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer); rsx::method_registers.current_draw_clause.get_elements_count());
return{ index_count, vertex_count, min_index, 0, std::make_tuple(GL_UNSIGNED_SHORT, offset_in_index_buffer) }; return{ index_count, vertex_count, min_index, 0, std::make_tuple(GL_UNSIGNED_SHORT, offset_in_index_buffer) };
} }
@ -128,8 +121,7 @@ namespace
u32 offset_in_index_buffer = mapping.second; u32 offset_in_index_buffer = mapping.second;
std::tie(min_index, max_index, index_count) = upload_index_buffer( std::tie(min_index, max_index, index_count) = upload_index_buffer(
command.raw_index_buffer, ptr, type, rsx::method_registers.current_draw_clause.primitive, command.raw_index_buffer, ptr, type, rsx::method_registers.current_draw_clause.primitive, vertex_count);
rsx::method_registers.current_draw_clause.draw_command_ranges, vertex_count);
if (min_index >= max_index) if (min_index >= max_index)
{ {
@ -163,8 +155,7 @@ namespace
u32 offset_in_index_buffer; u32 offset_in_index_buffer;
u32 index_count; u32 index_count;
std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw( std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(
{ { 0, 0, vertex_count } }, rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer, vertex_count);
rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer);
return{ index_count, vertex_count, 0, 0, std::make_tuple(GL_UNSIGNED_SHORT, offset_in_index_buffer) }; return{ index_count, vertex_count, 0, 0, std::make_tuple(GL_UNSIGNED_SHORT, offset_in_index_buffer) };
} }
@ -182,11 +173,6 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
{ {
std::chrono::time_point<steady_clock> then = steady_clock::now(); std::chrono::time_point<steady_clock> then = steady_clock::now();
m_vertex_layout = analyse_inputs_interleaved();
if (!m_vertex_layout.validate())
return {};
//Write index buffers and count verts //Write index buffers and count verts
auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers)); auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers));
@ -214,6 +200,8 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + vertex_base; storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + vertex_base;
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first)) if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first))
{ {
verify(HERE), cached->local_address == storage_address;
in_cache = true; in_cache = true;
upload_info.persistent_mapping_offset = cached->offset_in_heap; upload_info.persistent_mapping_offset = cached->offset_in_heap;
} }

File diff suppressed because it is too large Load diff

View file

@ -2,8 +2,19 @@
#include <Utilities/types.h> #include <Utilities/types.h>
#include <Utilities/Atomic.h> #include <Utilities/Atomic.h>
#include <Utilities/mutex.h>
#include <Utilities/thread.h>
#include "rsx_utils.h"
#include <vector> #include <vector>
#include <string>
#include <memory>
#include <unordered_map>
#ifndef __unused
#define __unused(expression) do { (void)(expression); } while(0)
#endif
struct RsxDmaControl; struct RsxDmaControl;
@ -17,8 +28,10 @@ namespace rsx
{ {
NOP = 0, NOP = 0,
FIFO_EMPTY = 0xDEADF1F0, FIFO_EMPTY = 0xDEADF1F0,
FIFO_BUSY = 0xBABEF1F0,
FIFO_PACKET_BEGIN = 0xF1F0, FIFO_PACKET_BEGIN = 0xF1F0,
FIFO_DISABLED_COMMAND = 0xF1F4, FIFO_DISABLED_COMMAND = 0xF1F4,
FIFO_DRAW_BARRIER = 0xF1F8,
}; };
struct register_pair struct register_pair
@ -26,21 +39,149 @@ namespace rsx
u32 reg; u32 reg;
u32 value; u32 value;
u32 loc; u32 loc;
u32 reserved;
};
struct fifo_buffer_info_t
{
u32 start_loc;
u32 length;
u32 num_draw_calls;
u32 draw_call_distance_weight;
};
struct branch_target_info_t
{
u32 branch_target;
u32 branch_origin;
s64 weight;
u64 checksum_16;
u64 reserved;
}; };
struct optimization_pass struct optimization_pass
{ {
virtual void optimize(std::vector<register_pair>& commands, const u32* registers) const = 0; virtual void optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands, const u32* registers) = 0;
}; };
struct flattening_pass : public optimization_pass struct flattening_pass : public optimization_pass
{ {
void optimize(std::vector<register_pair>& commands, const u32* registers) const override; private:
std::array<bool, 0x10000 / 4> m_skippable_registers;
public:
flattening_pass();
void optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands, const u32* registers) override;
}; };
struct reordering_pass : public optimization_pass struct reordering_pass : public optimization_pass
{ {
void optimize(std::vector<register_pair>& commands, const u32* registers) const override; private:
struct instruction_buffer_t
{
std::unordered_map<u32, u32> m_storage;
simple_array<u32> m_insertion_order;
instruction_buffer_t()
{
m_insertion_order.reserve(64);
}
void add_cmd(u32 reg, u32 value)
{
const auto is_new = std::get<1>(m_storage.insert_or_assign(reg, value));
if (!is_new)
{
for (auto &loc : m_insertion_order)
{
if (loc == reg)
{
loc |= 0x80000000;
break;
}
}
}
m_insertion_order.push_back(reg);
}
void clear()
{
m_storage.clear();
m_insertion_order.clear();
}
void swap(instruction_buffer_t& other)
{
m_storage.swap(other.m_storage);
m_insertion_order.swap(other.m_insertion_order);
}
auto size() const
{
return m_storage.size();
}
inline std::pair<u32, u32> get(int index) const
{
const auto key = m_insertion_order[index];
if (key & 0x80000000)
{
// Disabled by a later write to the same register
// TODO: Track command type registers and avoid this
return { FIFO_DISABLED_COMMAND, 0 };
}
const auto value = m_storage.at(key);
return { key, value };
}
bool operator == (const instruction_buffer_t& other) const
{
if (size() == other.size())
{
for (const auto &e : other.m_storage)
{
const auto found = m_storage.find(e.first);
if (found == m_storage.end())
return false;
if (found->second != e.second)
return false;
}
return true;
}
return false;
}
};
struct draw_call
{
instruction_buffer_t prologue;
std::vector<register_pair> draws;
bool write_prologue;
u32 primitive_type;
const register_pair* start_pos;
bool matches(const instruction_buffer_t setup, u32 prim) const
{
if (prim != primitive_type)
return false;
return prologue == setup;
}
};
instruction_buffer_t registers_changed;
std::vector<draw_call> bins;
std::unordered_multimap<u32, fifo_buffer_info_t> m_results_prediction_table;
public:
void optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands, const u32* registers) override;
}; };
class FIFO_control class FIFO_control
@ -48,28 +189,58 @@ namespace rsx
RsxDmaControl* m_ctrl = nullptr; RsxDmaControl* m_ctrl = nullptr;
u32 m_internal_get = 0; u32 m_internal_get = 0;
std::shared_ptr<thread_base> m_prefetcher_thread;
u32 m_prefetch_get = 0;
atomic_t<bool> m_prefetcher_busy{ false };
atomic_t<bool> m_fifo_busy{ false };
fifo_buffer_info_t m_prefetcher_info;
bool m_prefetcher_speculating;
std::vector<std::unique_ptr<optimization_pass>> m_optimization_passes; std::vector<std::unique_ptr<optimization_pass>> m_optimization_passes;
std::vector<register_pair> m_queue; simple_array<register_pair> m_queue;
simple_array<register_pair> m_prefetched_queue;
atomic_t<u32> m_command_index{ 0 }; atomic_t<u32> m_command_index{ 0 };
bool is_blocking_cmd(u32 cmd); shared_mutex m_prefetch_mutex; // Guards prefetch queue
bool is_sync_cmd(u32 cmd); shared_mutex m_queue_mutex; // Guards primary queue
atomic_t<u64> m_ctrl_tag{ 0 }; // 'Guards' control registers
void read_ahead(); register_pair empty_cmd { FIFO_EMPTY };
void optimize(); register_pair busy_cmd { FIFO_BUSY };
u32 m_memwatch_addr = 0;
u32 m_memwatch_cmp = 0;
fifo_buffer_info_t m_fifo_info;
std::unordered_multimap<u32, branch_target_info_t> m_branch_prediction_table;
void read_ahead(fifo_buffer_info_t& info, simple_array<register_pair>& commands, u32& get_pointer);
void optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands);
void clear_buffer(); void clear_buffer();
u32 get_likely_target(u32 source);
void report_branch_miss(u32 source, u32 target, u32 actual);
void report_branch_hit(u32 source, u32 target);
bool test_prefetcher_correctness(u32 actual_target);
public: public:
FIFO_control(rsx::thread* pctrl); FIFO_control(rsx::thread* pctrl);
~FIFO_control() {} ~FIFO_control() {}
void set_get(u32 get); void set_get(u32 get, bool spinning = false);
void set_put(u32 put); void set_put(u32 put);
register_pair read(); const register_pair& read();
inline const register_pair& read_unsafe();
void register_optimization_pass(optimization_pass* pass); void register_optimization_pass(optimization_pass* pass);
void finalize();
public:
static bool is_blocking_cmd(u32 cmd);
static bool is_sync_cmd(u32 cmd);
}; };
} }
} }

View file

@ -42,7 +42,135 @@ namespace rsx
std::function<bool(u32 addr, bool is_writing)> g_access_violation_handler; std::function<bool(u32 addr, bool is_writing)> g_access_violation_handler;
thread* g_current_renderer = nullptr; thread* g_current_renderer = nullptr;
//TODO: Restore a working shaders cache #pragma optimize("", off)
void run_tests()
{
#if 0
if (0)
{
auto _get_method_name = [](u32 reg) -> std::string
{
if (reg == FIFO::FIFO_DISABLED_COMMAND)
{
return "COMMAND DISABLED";
}
if (reg == FIFO::FIFO_PACKET_BEGIN)
{
return "PACKET BEGIN";
}
return rsx::get_method_name(reg >> 2);
};
auto _dump_commands = [&](const std::vector<FIFO::register_pair>& commands)
{
LOG_ERROR(RSX, "DUMP BEGINS--------------------------------");
for (const auto &cmd : commands)
{
LOG_ERROR(RSX, "%s (0x%x)", _get_method_name(cmd.reg), cmd.value);
}
LOG_ERROR(RSX, "DUMP ENDS--------------------------------");
};
// Test
std::vector<FIFO::register_pair> fake_commands =
{
{ NV4097_SET_TEXTURE_OFFSET << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 1) << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 2) << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 3) << 2, 0xdeadbeef },
{ NV4097_SET_TEXTURE_CONTROL3 << 2, 0x100000},
{ NV4097_INVALIDATE_VERTEX_FILE << 2, 0 },
{ NV4097_SET_BEGIN_END << 2, 5 },
{ NV4097_DRAW_ARRAYS << 2, 0xff000000 },
{ NV4097_SET_BEGIN_END << 2, 0},
{ NV4097_SET_TEXTURE_OFFSET << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 1) << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 2) << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 3) << 2, 0xcafebabe },
{ NV4097_SET_TEXTURE_CONTROL3 << 2, 0x100000},
{ NV4097_INVALIDATE_VERTEX_FILE << 2, 0 },
{ NV4097_SET_BEGIN_END << 2, 5 },
{ NV4097_DRAW_ARRAYS << 2, 0xff0000ff },
{ NV4097_SET_BEGIN_END << 2, 0},
{ NV4097_SET_TEXTURE_OFFSET << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 1) << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 2) << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 3) << 2, 0xdeadbeef },
{ NV4097_SET_TEXTURE_CONTROL3 << 2, 0x100000},
{ NV4097_INVALIDATE_VERTEX_FILE << 2, 0 },
{ NV4097_SET_BEGIN_END << 2, 5 },
{ NV4097_DRAW_ARRAYS << 2, 0xff0001fe },
{ NV4097_SET_BEGIN_END << 2, 0},
{ 0xffffffff, 0 },
{ NV4097_SET_TEXTURE_OFFSET << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 1) << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 2) << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 3) << 2, 0xcafebabe },
{ NV4097_SET_TEXTURE_CONTROL3 << 2, 0x100000},
{ NV4097_INVALIDATE_VERTEX_FILE << 2, 0 },
{ NV4097_SET_BEGIN_END << 2, 5 },
{ NV4097_DRAW_ARRAYS << 2, 0xff0002fd },
{ NV4097_SET_BEGIN_END << 2, 0},
{ NV4097_SET_TEXTURE_OFFSET << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 1) << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 2) << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 3) << 2, 0xdeadbeef },
{ NV4097_SET_TEXTURE_CONTROL3 << 2, 0x100000},
{ NV4097_INVALIDATE_VERTEX_FILE << 2, 0 },
{ NV4097_SET_BEGIN_END << 2, 5 },
{ NV4097_DRAW_ARRAYS << 2, 0xff0003fc },
{ NV4097_SET_BEGIN_END << 2, 0},
{ NV4097_SET_TEXTURE_OFFSET << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 1) << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 2) << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 3) << 2, 0xcafebabe },
{ NV4097_SET_TEXTURE_CONTROL3 << 2, 0x100000},
{ NV4097_INVALIDATE_VERTEX_FILE << 2, 0 },
{ NV4097_SET_BEGIN_END << 2, 5 },
{ NV4097_DRAW_ARRAYS << 2, 0xff0004fb },
{ NV4097_SET_BEGIN_END << 2, 0},
{ NV4097_SET_TEXTURE_OFFSET << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 1) << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 2) << 2, 0xdeadbeef },
{ (NV4097_SET_TEXTURE_OFFSET + 3) << 2, 0xdeadbeef },
{ NV4097_SET_TEXTURE_CONTROL3 << 2, 0x100000},
{ NV4097_INVALIDATE_VERTEX_FILE << 2, 0 },
{ NV4097_SET_BEGIN_END << 2, 5 },
{ NV4097_DRAW_ARRAYS << 2, 0xff0005fa },
{ NV4097_SET_BEGIN_END << 2, 0},
{ 0xffffffff, 0xdead },
};
std::vector<u32> fake_registers(16384);
std::fill(fake_registers.begin(), fake_registers.end(), 0u);
FIFO::flattening_pass flattening_pass;
FIFO::reordering_pass reordering_pass;
FIFO::fifo_buffer_info_t info{ 0, fake_commands.size() * 4, /*7*/18, 0 };
flattening_pass.optimize(info, fake_commands, fake_registers.data());
_dump_commands(fake_commands);
reordering_pass.optimize(info, fake_commands, fake_registers.data());
_dump_commands(fake_commands);
LOG_ERROR(RSX, "FINISHED TEST");
}
#endif
}
#pragma optimize("", on)
u32 get_address(u32 offset, u32 location) u32 get_address(u32 offset, u32 location)
{ {
@ -97,8 +225,10 @@ namespace rsx
return get_current_renderer()->ctxt_addr + offset; return get_current_renderer()->ctxt_addr + offset;
default: default:
{
fmt::throw_exception("Invalid location (offset=0x%x, location=0x%x)" HERE, offset, location); fmt::throw_exception("Invalid location (offset=0x%x, location=0x%x)" HERE, offset, location);
} }
}
} }
u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size) u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size)
@ -289,23 +419,7 @@ namespace rsx
conditional_render_test_address = 0; conditional_render_test_address = 0;
} }
rsx::method_registers.current_draw_clause.inline_vertex_array.resize(0);
in_begin_end = true; in_begin_end = true;
switch (rsx::method_registers.current_draw_clause.primitive)
{
case rsx::primitive_type::line_loop:
case rsx::primitive_type::line_strip:
case rsx::primitive_type::polygon:
case rsx::primitive_type::quad_strip:
case rsx::primitive_type::triangle_fan:
case rsx::primitive_type::triangle_strip:
// Adjacency matters for these types
rsx::method_registers.current_draw_clause.is_disjoint_primitive = false;
break;
default:
rsx::method_registers.current_draw_clause.is_disjoint_primitive = true;
}
} }
void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value) void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value)
@ -348,15 +462,15 @@ namespace rsx
m_graphics_state |= rsx::pipeline_state::framebuffer_reads_dirty; m_graphics_state |= rsx::pipeline_state::framebuffer_reads_dirty;
ROP_sync_timestamp = get_system_time(); ROP_sync_timestamp = get_system_time();
for (u8 index = 0; index < rsx::limits::vertex_count; ++index) for (auto & push_buf : vertex_push_buffers)
{ {
//Disabled, see https://github.com/RPCS3/rpcs3/issues/1932 //Disabled, see https://github.com/RPCS3/rpcs3/issues/1932
//rsx::method_registers.register_vertex_info[index].size = 0; //rsx::method_registers.register_vertex_info[index].size = 0;
vertex_push_buffers[index].clear(); push_buf.clear();
} }
element_push_buffer.resize(0); element_push_buffer.clear();
if (zcull_ctrl->active) if (zcull_ctrl->active)
zcull_ctrl->on_draw(); zcull_ctrl->on_draw();
@ -397,6 +511,7 @@ namespace rsx
void thread::on_task() void thread::on_task()
{ {
m_rsx_thread = std::this_thread::get_id(); m_rsx_thread = std::this_thread::get_id();
run_tests();
if (supports_native_ui) if (supports_native_ui)
{ {
@ -430,8 +545,8 @@ namespace rsx
fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this); fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this);
fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass()); //fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass());
//fifo_ctrl->register_optimization_pass(new FIFO::reordering_pass()); //fifo_ctrl->register_optimization_pass(new FIFO::reordering_pass()); // R&C2 - Not working if flattening is also enabled!!!
//fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass()); //fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass());
last_flip_time = get_system_time() - 1000000; last_flip_time = get_system_time() - 1000000;
@ -539,6 +654,29 @@ namespace rsx
void thread::on_exit() void thread::on_exit()
{ {
m_rsx_thread_exiting = true; m_rsx_thread_exiting = true;
if (m_vblank_thread)
{
m_vblank_thread->join();
m_vblank_thread.reset();
}
if (m_decompiler_thread)
{
m_decompiler_thread->join();
m_decompiler_thread.reset();
}
if (fifo_ctrl)
{
fifo_ctrl->finalize();
}
}
std::string thread::get_name() const
{
return "rsx::thread";
>>>>>>> rsx: Fixups
} }
void thread::fill_scale_offset_data(void *buffer, bool flip_y) const void thread::fill_scale_offset_data(void *buffer, bool flip_y) const
@ -740,7 +878,7 @@ namespace rsx
return t + timestamp_subvalue; return t + timestamp_subvalue;
} }
gsl::span<const gsl::byte> thread::get_raw_index_array(const std::vector<draw_range_t>& draw_indexed_clause) const gsl::span<const gsl::byte> thread::get_raw_index_array(const draw_clause& draw_indexed_clause) const
{ {
if (element_push_buffer.size()) if (element_push_buffer.size())
{ {
@ -755,49 +893,29 @@ namespace rsx
bool is_primitive_restart_enabled = rsx::method_registers.restart_index_enabled(); bool is_primitive_restart_enabled = rsx::method_registers.restart_index_enabled();
u32 primitive_restart_index = rsx::method_registers.restart_index(); u32 primitive_restart_index = rsx::method_registers.restart_index();
u32 min_index = UINT32_MAX; const u32 first = draw_indexed_clause.min_index();
u32 max_index = 0; const u32 count = draw_indexed_clause.get_elements_count();
for (const auto &range : draw_indexed_clause)
{
const u32 root_index = (range.command_data_offset / type_size) + range.first;
min_index = std::min(root_index, min_index);
max_index = std::max(root_index + range.count, max_index);
}
const u32 first = min_index;
const u32 count = max_index - min_index;
const gsl::byte* ptr = static_cast<const gsl::byte*>(vm::base(address)); const gsl::byte* ptr = static_cast<const gsl::byte*>(vm::base(address));
return{ ptr + first * type_size, count * type_size }; return{ ptr + first * type_size, count * type_size };
} }
gsl::span<const gsl::byte> thread::get_raw_vertex_buffer(const rsx::data_array_format_info& vertex_array_info, u32 base_offset, const std::vector<draw_range_t>& vertex_ranges) const gsl::span<const gsl::byte> thread::get_raw_vertex_buffer(const rsx::data_array_format_info& vertex_array_info, u32 base_offset, const draw_clause& draw_array_clause) const
{ {
u32 offset = vertex_array_info.offset(); u32 offset = vertex_array_info.offset();
u32 address = rsx::get_address(rsx::get_vertex_offset_from_base(base_offset, offset & 0x7fffffff), offset >> 31); u32 address = rsx::get_address(rsx::get_vertex_offset_from_base(base_offset, offset & 0x7fffffff), offset >> 31);
u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array_info.type(), vertex_array_info.size()); u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array_info.type(), vertex_array_info.size());
u32 min_index = UINT32_MAX; const u32 first = draw_array_clause.min_index();
u32 max_index = 0; const u32 count = draw_array_clause.get_elements_count();
for (const auto &range : vertex_ranges)
{
const auto root_index = (range.command_data_offset / vertex_array_info.stride()) + range.first;
min_index = std::min(root_index, min_index);
max_index = std::max(root_index + range.count, max_index);
}
const u32 first = min_index;
const u32 count = max_index - min_index;
const gsl::byte* ptr = gsl::narrow_cast<const gsl::byte*>(vm::base(address)); const gsl::byte* ptr = gsl::narrow_cast<const gsl::byte*>(vm::base(address));
return {ptr + first * vertex_array_info.stride(), count * vertex_array_info.stride() + element_size}; return {ptr + first * vertex_array_info.stride(), count * vertex_array_info.stride() + element_size};
} }
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>> std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>>
thread::get_vertex_buffers(const rsx::rsx_state& state, const std::vector<draw_range_t>& vertex_ranges, const u64 consumed_attrib_mask) const thread::get_vertex_buffers(const rsx::rsx_state& state, const u64 consumed_attrib_mask) const
{ {
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>> result; std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>> result;
result.reserve(rsx::limits::vertex_count); result.reserve(rsx::limits::vertex_count);
@ -815,7 +933,7 @@ namespace rsx
{ {
const rsx::data_array_format_info& info = state.vertex_arrays_info[index]; const rsx::data_array_format_info& info = state.vertex_arrays_info[index];
result.push_back(vertex_array_buffer{info.type(), info.size(), info.stride(), result.push_back(vertex_array_buffer{info.type(), info.size(), info.stride(),
get_raw_vertex_buffer(info, state.vertex_data_base_offset(), vertex_ranges), index, true}); get_raw_vertex_buffer(info, state.vertex_data_base_offset(), state.current_draw_clause), index, true});
continue; continue;
} }
@ -854,7 +972,7 @@ namespace rsx
{ {
return draw_indexed_array_command return draw_indexed_array_command
{ {
get_raw_index_array( rsx::method_registers.current_draw_clause.draw_command_ranges) get_raw_index_array(state.current_draw_clause)
}; };
} }
@ -1301,7 +1419,6 @@ namespace rsx
if (state.current_draw_clause.command == rsx::draw_command::inlined_array) if (state.current_draw_clause.command == rsx::draw_command::inlined_array)
{ {
vertex_input_layout result = {}; vertex_input_layout result = {};
result.interleaved_blocks.reserve(8);
interleaved_range_info info = {}; interleaved_range_info info = {};
info.interleaved = true; info.interleaved = true;
@ -1336,8 +1453,8 @@ namespace rsx
const u32 frequency_divider_mask = rsx::method_registers.frequency_divider_operation_mask(); const u32 frequency_divider_mask = rsx::method_registers.frequency_divider_operation_mask();
vertex_input_layout result = {}; vertex_input_layout result = {};
result.interleaved_blocks.reserve(8); result.interleaved_blocks.reserve(16);
result.referenced_registers.reserve(4); result.referenced_registers.reserve(16);
for (u8 index = 0; index < rsx::limits::vertex_count; ++index) for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
{ {
@ -1430,7 +1547,7 @@ namespace rsx
block.base_offset = base_address; block.base_offset = base_address;
block.attribute_stride = info.stride(); block.attribute_stride = info.stride();
block.memory_location = info.offset() >> 31; block.memory_location = info.offset() >> 31;
block.locations.reserve(8); block.locations.reserve(16);
block.locations.push_back(index); block.locations.push_back(index);
block.min_divisor = info.frequency(); block.min_divisor = info.frequency();
block.all_modulus = !!(frequency_divider_mask & (1 << index)); block.all_modulus = !!(frequency_divider_mask & (1 << index));

View file

@ -528,6 +528,8 @@ namespace rsx
virtual void on_decompiler_exit() {} virtual void on_decompiler_exit() {}
virtual bool on_decompiler_task() { return false; } virtual bool on_decompiler_task() { return false; }
virtual void emit_geometry(u32) {}
void run_FIFO(); void run_FIFO();
public: public:
@ -554,11 +556,11 @@ namespace rsx
void read_barrier(u32 memory_address, u32 memory_range); void read_barrier(u32 memory_address, u32 memory_range);
virtual void sync_hint(FIFO_hint hint) {} virtual void sync_hint(FIFO_hint hint) {}
gsl::span<const gsl::byte> get_raw_index_array(const std::vector<draw_range_t>& draw_indexed_clause) const; gsl::span<const gsl::byte> get_raw_index_array(const draw_clause& draw_indexed_clause) const;
gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector<draw_range_t>& vertex_ranges) const; gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const draw_clause& draw_array_clause) const;
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>> std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>>
get_vertex_buffers(const rsx::rsx_state& state, const std::vector<draw_range_t>& vertex_ranges, const u64 consumed_attrib_mask) const; get_vertex_buffers(const rsx::rsx_state& state, const u64 consumed_attrib_mask) const;
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array> std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
get_draw_command(const rsx::rsx_state& state) const; get_draw_command(const rsx::rsx_state& state) const;

View file

@ -603,7 +603,7 @@ VKGSRender::VKGSRender() : GSRender()
std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device); std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device);
//Occlusion //Occlusion
m_occlusion_query_pool.create((*m_device), DESCRIPTOR_MAX_DRAW_CALLS); //Enough for 4k draw calls per pass m_occlusion_query_pool.create((*m_device), OCCLUSION_MAX_POOL_SIZE);
for (int n = 0; n < 128; ++n) for (int n = 0; n < 128; ++n)
m_occlusion_query_data[n].driver_handle = n; m_occlusion_query_data[n].driver_handle = n;
@ -619,7 +619,7 @@ VKGSRender::VKGSRender() : GSRender()
//VRAM allocation //VRAM allocation
m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000); m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000);
m_uniform_buffer_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "uniform buffer"); m_uniform_buffer_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "uniform buffer");
m_transform_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer"); m_transform_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer");
m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer"); m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer");
m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000); m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000);
@ -849,11 +849,15 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
m_flush_requests.post(sync_timestamp == 0ull); m_flush_requests.post(sync_timestamp == 0ull);
has_queue_ref = true; has_queue_ref = true;
} }
else else if (!vk::is_uninterruptible())
{ {
//Flush primary cb queue to sync pending changes (e.g image transitions!) //Flush primary cb queue to sync pending changes (e.g image transitions!)
flush_command_queue(); flush_command_queue();
} }
else
{
LOG_ERROR(RSX, "Fault in uninterruptible code!");
}
if (sync_timestamp > 0) if (sync_timestamp > 0)
{ {
@ -1110,6 +1114,145 @@ void VKGSRender::close_render_pass()
render_pass_open = false; render_pass_open = false;
} }
void VKGSRender::emit_geometry(u32 sub_index)
{
auto &draw_call = rsx::method_registers.current_draw_clause;
//std::chrono::time_point<steady_clock> vertex_start = steady_clock::now();
if (sub_index == 0)
{
m_vertex_layout = analyse_inputs_interleaved();
}
if (!m_vertex_layout.validate())
{
// No vertex inputs enabled
draw_call.end();
return;
}
if (sub_index > 0 && draw_call.execute_pipeline_dependencies() & rsx::vertex_base_changed)
{
// Rebase vertex bases instead of
for (auto &info : m_vertex_layout.interleaved_blocks)
{
const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset();
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location);
}
}
const auto old_persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
const auto old_volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
// Programs data is dependent on vertex state
auto upload_info = upload_vertex_data();
if (!upload_info.vertex_draw_count)
{
// Malformed vertex setup; abort
return;
}
//std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
//m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
auto persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
bool update_descriptors = false;
if (sub_index == 0)
{
// Load program execution environment
load_program_env(upload_info);
update_descriptors = true;
}
else
{
// Update vertex fetch environment
update_vertex_env(upload_info);
if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer)
{
/* VkDescriptorSetAllocateInfo alloc_info = {};
alloc_info.descriptorPool = m_current_frame->descriptor_pool;
alloc_info.descriptorSetCount = 1;
alloc_info.pSetLayouts = &descriptor_layouts;
alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
VkDescriptorSet new_descriptor_set;
CHECK_RESULT(vkAllocateDescriptorSets(*m_device, &alloc_info, &new_descriptor_set));
VkCopyDescriptorSet copy = {};
copy.sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET;
copy
m_current_frame->descriptor_set = new_descriptor_set;
m_current_frame->used_descriptors++;
update_descriptors = true;*/
}
}
if (update_descriptors)
{
m_program->bind_uniform(persistent_buffer, "persistent_input_stream", m_current_frame->descriptor_set);
m_program->bind_uniform(volatile_buffer, "volatile_input_stream", m_current_frame->descriptor_set);
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr);
}
//std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
//m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_start - vertex_end).count();
begin_render_pass();
if (!upload_info.index_info)
{
if (draw_call.is_single_draw())
{
vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0);
}
else
{
u32 vertex_offset = 0;
const auto subranges = draw_call.get_subranges();
for (const auto &range : subranges)
{
vkCmdDraw(*m_current_command_buffer, range.count, 1, vertex_offset, 0);
vertex_offset += range.count;
}
}
}
else
{
const VkIndexType index_type = std::get<1>(*upload_info.index_info);
const VkDeviceSize offset = std::get<0>(*upload_info.index_info);
vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type);
if (rsx::method_registers.current_draw_clause.is_single_draw())
{
const u32 index_count = upload_info.vertex_draw_count;
vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0);
}
else
{
u32 vertex_offset = 0;
const auto subranges = draw_call.get_subranges();
for (const auto &range : subranges)
{
const auto count = get_index_count(draw_call.primitive, range.count);
vkCmdDrawIndexed(*m_current_command_buffer, count, 1, vertex_offset, 0, 0);
vertex_offset += count;
}
}
}
close_render_pass();
//std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
//m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
}
void VKGSRender::end() void VKGSRender::end()
{ {
if (skip_frame || !framebuffer_status_valid || renderer_unavailable || if (skip_frame || !framebuffer_status_valid || renderer_unavailable ||
@ -1363,31 +1506,6 @@ void VKGSRender::end()
std::chrono::time_point<steady_clock> program_end = steady_clock::now(); std::chrono::time_point<steady_clock> program_end = steady_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_end - program_start).count(); m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_end - program_start).count();
// Programs data is dependent on vertex state
std::chrono::time_point<steady_clock> vertex_start = program_end;
auto upload_info = upload_vertex_data();
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
if (!upload_info.vertex_draw_count)
{
// Malformed vertex setup; abort
rsx::thread::end();
return;
}
// Load program execution environment
program_start = vertex_end;
load_program_env(upload_info);
VkBufferView persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
VkBufferView volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
m_program->bind_uniform(persistent_buffer, "persistent_input_stream", m_current_frame->descriptor_set);
m_program->bind_uniform(volatile_buffer, "volatile_input_stream", m_current_frame->descriptor_set);
program_end = steady_clock::now();
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_end - program_start).count();
textures_start = program_end; textures_start = program_end;
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
@ -1453,10 +1571,6 @@ void VKGSRender::end()
textures_end = steady_clock::now(); textures_end = steady_clock::now();
m_textures_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count(); m_textures_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
//While vertex upload is an interruptible process, if we made it this far, there's no need to sync anything that occurs past this point
//Only textures are synchronized tightly with the GPU and they have been read back above
vk::enter_uninterruptible();
u32 occlusion_id = 0; u32 occlusion_id = 0;
if (m_occlusion_query_active) if (m_occlusion_query_active)
{ {
@ -1475,21 +1589,9 @@ void VKGSRender::end()
} }
} }
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr);
update_draw_state();
begin_render_pass();
bool primitive_emulated = false; bool primitive_emulated = false;
vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated); vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated);
const bool allow_multidraw = supports_multidraw && !g_cfg.video.disable_FIFO_reordering;
const bool single_draw = (!allow_multidraw ||
rsx::method_registers.current_draw_clause.draw_command_ranges.size() <= 1 ||
rsx::method_registers.current_draw_clause.is_disjoint_primitive);
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX)) if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
{ {
//Begin query //Begin query
@ -1500,45 +1602,22 @@ void VKGSRender::end()
m_current_command_buffer->flags |= cb_has_occlusion_task; m_current_command_buffer->flags |= cb_has_occlusion_task;
} }
if (!upload_info.index_info) // While vertex upload is an interruptible process, if we made it this far, there's no need to sync anything that occurs past this point
{ // Only textures are synchronized tightly with the GPU and they have been read back above
if (single_draw) vk::enter_uninterruptible();
{
vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0);
}
else
{
const auto base_vertex = rsx::method_registers.current_draw_clause.draw_command_ranges.front().first;
for (const auto &range : rsx::method_registers.current_draw_clause.draw_command_ranges)
{
vkCmdDraw(*m_current_command_buffer, range.count, 1, range.first - base_vertex, 0);
}
}
}
else
{
VkIndexType index_type;
const u32 index_count = upload_info.vertex_draw_count;
VkDeviceSize offset;
std::tie(offset, index_type) = *upload_info.index_info; vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type); update_draw_state();
if (single_draw) u32 sub_index = 0;
{ rsx::method_registers.current_draw_clause.begin();
vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0); do
} {
else emit_geometry(sub_index++);
{
u32 first_vertex = 0;
for (const auto &range : rsx::method_registers.current_draw_clause.draw_command_ranges)
{
const auto verts = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.count);
vkCmdDrawIndexed(*m_current_command_buffer, verts, 1, first_vertex, 0, 0);
first_vertex += verts;
}
}
} }
while (rsx::method_registers.current_draw_clause.next());
vk::leave_uninterruptible();
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX)) if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
{ {
@ -1546,15 +1625,9 @@ void VKGSRender::end()
m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id); m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id);
} }
close_render_pass();
vk::leave_uninterruptible();
m_current_command_buffer->num_draws++; m_current_command_buffer->num_draws++;
m_rtts.on_write(); m_rtts.on_write();
std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - textures_end).count();
m_draw_calls++; m_draw_calls++;
rsx::thread::end(); rsx::thread::end();
@ -2479,29 +2552,38 @@ void VKGSRender::load_program_env(const vk::vertex_upload_info& vertex_info)
m_graphics_state &= ~handled_flags; m_graphics_state &= ~handled_flags;
} }
static const u32 mr_color_offset[rsx::limits::color_buffers_count] = void VKGSRender::update_vertex_env(const vk::vertex_upload_info& vertex_info)
{ {
NV4097_SET_SURFACE_COLOR_AOFFSET, // Vertex base index = vertex_offset + 132
NV4097_SET_SURFACE_COLOR_BOFFSET, // Vertex layout = vertex_offset + 160
NV4097_SET_SURFACE_COLOR_COFFSET,
NV4097_SET_SURFACE_COLOR_DOFFSET
};
static const u32 mr_color_dma[rsx::limits::color_buffers_count] = std::array<s32, 16 * 4> vertex_layout;
{ fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, vertex_layout.data(),
NV4097_SET_CONTEXT_DMA_COLOR_A, vertex_info.persistent_window_offset, vertex_info.volatile_window_offset);
NV4097_SET_CONTEXT_DMA_COLOR_B,
NV4097_SET_CONTEXT_DMA_COLOR_C,
NV4097_SET_CONTEXT_DMA_COLOR_D
};
static const u32 mr_color_pitch[rsx::limits::color_buffers_count] = vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, m_vertex_state_buffer_info.offset, 512,
{ VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
NV4097_SET_SURFACE_PITCH_A,
NV4097_SET_SURFACE_PITCH_B, vkCmdUpdateBuffer(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, m_vertex_state_buffer_info.offset + 132, 4, &vertex_info.vertex_index_base);
NV4097_SET_SURFACE_PITCH_C,
NV4097_SET_SURFACE_PITCH_D u32 write_offset = m_vertex_state_buffer_info.offset + 160;
}; s32 *src_ptr = vertex_layout.data();
for (const auto& placement : m_vertex_layout.attribute_placement)
{
constexpr u32 data_len = 4 * sizeof(s32);
if (placement != rsx::attribute_buffer_placement::none)
{
vkCmdUpdateBuffer(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, write_offset, data_len, src_ptr);
}
write_offset += data_len;
src_ptr += 4;
}
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, m_vertex_state_buffer_info.offset, 512,
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT);
}
void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool skip_reading) void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool skip_reading)
{ {
@ -3048,7 +3130,27 @@ void VKGSRender::flip(int buffer)
if (!image_to_flip) if (!image_to_flip)
{ {
//Read from cell // Read from cell
const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height);
const auto overlap = m_texture_cache.find_texture_from_range(range);
bool flush_queue = false;
for (const auto & section : overlap)
{
if (section->get_protection() == utils::protection::no)
{
section->copy_texture(false, *m_current_command_buffer, m_swapchain->get_graphics_queue());
flush_queue = true;
}
}
if (flush_queue)
{
// Submit for processing to lower hard fault penalty
flush_command_queue();
}
m_texture_cache.invalidate_range(range, rsx::invalidation_cause::read, *m_current_command_buffer, m_swapchain->get_graphics_queue());
image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height); image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height);
} }
} }

View file

@ -1,4 +1,4 @@
#pragma once #pragma once
#include "Emu/RSX/GSRender.h" #include "Emu/RSX/GSRender.h"
#include "VKHelpers.h" #include "VKHelpers.h"
#include "VKTextureCache.h" #include "VKTextureCache.h"
@ -403,9 +403,11 @@ private:
vk::vertex_upload_info upload_vertex_data(); vk::vertex_upload_info upload_vertex_data();
public:
bool load_program(); bool load_program();
void load_program_env(const vk::vertex_upload_info& vertex_info); void load_program_env(const vk::vertex_upload_info& upload_info);
void update_vertex_env(const vk::vertex_upload_info& upload_info);
public:
void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false); void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false);
void read_buffers(); void read_buffers();
void write_buffers(); void write_buffers();
@ -422,6 +424,7 @@ public:
protected: protected:
void begin() override; void begin() override;
void end() override; void end() override;
void emit_geometry(u32 sub_index) override;
void on_init_thread() override; void on_init_thread() override;
void on_exit() override; void on_exit() override;

View file

@ -32,6 +32,7 @@
#endif #endif
#define DESCRIPTOR_MAX_DRAW_CALLS 4096 #define DESCRIPTOR_MAX_DRAW_CALLS 4096
#define OCCLUSION_MAX_POOL_SIZE 8192
#define VERTEX_BUFFERS_FIRST_BIND_SLOT 3 #define VERTEX_BUFFERS_FIRST_BIND_SLOT 3
#define FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT 2 #define FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT 2
@ -652,7 +653,7 @@ namespace vk
VkImageTiling tiling, VkImageTiling tiling,
VkImageUsageFlags usage, VkImageUsageFlags usage,
VkImageCreateFlags image_flags) VkImageCreateFlags image_flags)
: m_device(dev) : m_device(dev), current_layout(initial_layout)
{ {
info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
info.imageType = image_type; info.imageType = image_type;
@ -1195,6 +1196,11 @@ namespace vk
return commands; return commands;
} }
bool is_recording() const
{
return is_open;
}
void begin() void begin()
{ {
if (m_submit_fence && is_pending) if (m_submit_fence && is_pending)
@ -2413,8 +2419,8 @@ public:
VkQueryPool query_pool = VK_NULL_HANDLE; VkQueryPool query_pool = VK_NULL_HANDLE;
vk::render_device* owner = nullptr; vk::render_device* owner = nullptr;
std::deque<u32> available_slots;
std::vector<bool> query_active_status; std::vector<bool> query_active_status;
public: public:
void create(vk::render_device &dev, u32 num_entries) void create(vk::render_device &dev, u32 num_entries)
@ -2428,6 +2434,12 @@ public:
owner = &dev; owner = &dev;
query_active_status.resize(num_entries, false); query_active_status.resize(num_entries, false);
available_slots.resize(num_entries);
for (u32 n = 0; n < num_entries; ++n)
{
available_slots[n] = n;
}
} }
void destroy() void destroy()
@ -2484,8 +2496,13 @@ public:
void reset_query(vk::command_buffer &cmd, u32 index) void reset_query(vk::command_buffer &cmd, u32 index)
{ {
vkCmdResetQueryPool(cmd, query_pool, index, 1); if (query_active_status[index])
query_active_status[index] = false; {
vkCmdResetQueryPool(cmd, query_pool, index, 1);
query_active_status[index] = false;
available_slots.push_back(index);
}
} }
void reset_queries(vk::command_buffer &cmd, std::vector<u32> &list) void reset_queries(vk::command_buffer &cmd, std::vector<u32> &list)
@ -2505,13 +2522,16 @@ public:
u32 find_free_slot() u32 find_free_slot()
{ {
for (u32 n = 0; n < query_active_status.size(); n++) if (available_slots.empty())
{ {
if (query_active_status[n] == false) return -1u;
return n;
} }
return UINT32_MAX; u32 result = available_slots.front();
available_slots.pop_front();
verify(HERE), !query_active_status[result];
return result;
} }
}; };

View file

@ -106,6 +106,12 @@ namespace
const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count(); const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
const u32 min_index = rsx::method_registers.current_draw_clause.min_index(); const u32 min_index = rsx::method_registers.current_draw_clause.min_index();
//if (rsx::method_registers.current_draw_clause.draw_command_ranges.size() > 1)
//{
// TODO
//LOG_ERROR(RSX, "REEEEEEEEEEEEEEEEEEEEEEE (prims_emulated=%d)", primitives_emulated);
//}
if (primitives_emulated) if (primitives_emulated)
{ {
u32 index_count; u32 index_count;
@ -165,7 +171,7 @@ namespace
command.raw_index_buffer, index_type, command.raw_index_buffer, index_type,
rsx::method_registers.current_draw_clause.primitive, rsx::method_registers.current_draw_clause.primitive,
rsx::method_registers.restart_index_enabled(), rsx::method_registers.restart_index_enabled(),
rsx::method_registers.restart_index(), rsx::method_registers.current_draw_clause.draw_command_ranges, rsx::method_registers.restart_index(),
rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !vk::is_primitive_native(prim); }); rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !vk::is_primitive_native(prim); });
if (min_index >= max_index) if (min_index >= max_index)
@ -227,11 +233,6 @@ namespace
vk::vertex_upload_info VKGSRender::upload_vertex_data() vk::vertex_upload_info VKGSRender::upload_vertex_data()
{ {
m_vertex_layout = analyse_inputs_interleaved();
if (!m_vertex_layout.validate())
return {};
draw_command_visitor visitor(m_index_buffer_ring_info, m_vertex_layout); draw_command_visitor visitor(m_index_buffer_ring_info, m_vertex_layout);
auto result = std::visit(visitor, get_draw_command(rsx::method_registers)); auto result = std::visit(visitor, get_draw_command(rsx::method_registers));
@ -258,6 +259,8 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + vertex_base; storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + vertex_base;
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first)) if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first))
{ {
verify(HERE), cached->local_address == storage_address;
in_cache = true; in_cache = true;
persistent_range_base = cached->offset_in_heap; persistent_range_base = cached->offset_in_heap;
} }

View file

@ -880,12 +880,42 @@ namespace rsx
storage_type* find_vertex_range(uintptr_t local_addr, upload_format fmt, u32 data_length) override storage_type* find_vertex_range(uintptr_t local_addr, upload_format fmt, u32 data_length) override
{ {
const auto data_end = local_addr + data_length;
for (auto &v : vertex_ranges[local_addr]) for (auto &v : vertex_ranges[local_addr])
{ {
if (v.buffer_format == fmt && v.data_length == data_length) if (v.buffer_format == fmt && v.data_length >= data_length)
return &v; return &v;
} }
#if 0
for (const auto &range : vertex_ranges)
{
if (range.first > local_addr)
continue;
for (const auto &v : range.second)
{
if (v.buffer_format == fmt)
{
const auto entry_end = v.local_address + v.data_length;
if (data_end <= entry_end)
{
const u32 offset = (local_addr - v.local_address);
if (offset % 16)
continue; // TexelBuffer alignment rules
storage_type e = v;
e.data_length = data_length;
e.local_address = local_addr;
e.offset_in_heap += offset;
auto& ret = vertex_ranges[local_addr].emplace_back(e);
return &ret;
}
}
}
}
#endif
return nullptr; return nullptr;
} }

View file

@ -423,9 +423,7 @@ namespace rsx
{ {
if (arg) if (arg)
{ {
rsx::method_registers.current_draw_clause.draw_command_ranges.clear(); rsx::method_registers.current_draw_clause.reset(to_primitive_type(arg));
rsx::method_registers.current_draw_clause.command = draw_command::none;
rsx::method_registers.current_draw_clause.primitive = to_primitive_type(arg);
rsxthr->begin(); rsxthr->begin();
return; return;
} }
@ -453,9 +451,9 @@ namespace rsx
else else
rsx::method_registers.current_draw_clause.is_immediate_draw = false; rsx::method_registers.current_draw_clause.is_immediate_draw = false;
if (!(rsx::method_registers.current_draw_clause.draw_command_ranges.empty() && if (!rsx::method_registers.current_draw_clause.empty())
rsx::method_registers.current_draw_clause.inline_vertex_array.empty()))
{ {
rsx::method_registers.current_draw_clause.compile();
rsxthr->end(); rsxthr->end();
} }
} }
@ -598,6 +596,30 @@ namespace rsx
rsx->m_rtts_dirty = true; rsx->m_rtts_dirty = true;
} }
void set_vertex_base_offset(thread* rsx, u32 reg, u32 arg)
{
if (rsx->in_begin_end)
{
// Revert change to queue later
method_registers.decode(reg, method_registers.register_previous_value);
// Insert base mofifier barrier
method_registers.current_draw_clause.insert_command_barrier(vertex_base_modifier_barrier, arg);
}
}
void set_index_base_offset(thread* rsx, u32 reg, u32 arg)
{
if (rsx->in_begin_end)
{
// Revert change to queue later
method_registers.decode(reg, method_registers.register_previous_value);
// Insert base mofifier barrier
method_registers.current_draw_clause.insert_command_barrier(index_base_modifier_barrier, arg);
}
}
template<u32 index> template<u32 index>
struct set_texture_dirty_bit struct set_texture_dirty_bit
{ {
@ -1156,6 +1178,13 @@ namespace rsx
}; };
} }
namespace fifo
{
void draw_barrier(thread* rsx, u32, u32)
{
}
}
void rsx_state::init() void rsx_state::init()
{ {
// Special values set at initialization, these are not set by a context reset // Special values set at initialization, these are not set by a context reset
@ -2122,6 +2151,34 @@ namespace rsx
return registers[reg] == value; return registers[reg] == value;
} }
u32 draw_clause::execute_pipeline_dependencies() const
{
u32 result = 0;
for (const auto &barrier : draw_command_barriers[current_range_index])
{
switch (barrier.type)
{
case primitive_restart_barrier:
break;
case index_base_modifier_barrier:
// Change index base offset
method_registers.decode(NV4097_SET_VERTEX_DATA_BASE_INDEX, barrier.arg);
result |= index_base_changed;
break;
case vertex_base_modifier_barrier:
// Change vertex base offset
method_registers.decode(NV4097_SET_VERTEX_DATA_BASE_OFFSET, barrier.arg);
result |= vertex_base_changed;
break;
default:
fmt::throw_exception("Unreachable" HERE);
}
}
return result;
}
namespace method_detail namespace method_detail
{ {
template<int Id, int Step, int Count, template<u32> class T, int Index = 0> template<int Id, int Step, int Count, template<u32> class T, int Index = 0>
@ -2494,6 +2551,7 @@ namespace rsx
//Some custom GCM methods //Some custom GCM methods
methods[GCM_SET_DRIVER_OBJECT] = nullptr; methods[GCM_SET_DRIVER_OBJECT] = nullptr;
methods[FIFO::FIFO_DRAW_BARRIER] = nullptr;
bind_array<GCM_FLIP_HEAD, 1, 2, nullptr>(); bind_array<GCM_FLIP_HEAD, 1, 2, nullptr>();
bind_array<GCM_DRIVER_QUEUE, 1, 8, nullptr>(); bind_array<GCM_DRIVER_QUEUE, 1, 8, nullptr>();
@ -2600,6 +2658,8 @@ namespace rsx
bind<NV4097_SET_SHADER_PROGRAM, nv4097::set_shader_program_dirty>(); bind<NV4097_SET_SHADER_PROGRAM, nv4097::set_shader_program_dirty>();
bind<NV4097_SET_TRANSFORM_PROGRAM_START, nv4097::set_transform_program_start>(); bind<NV4097_SET_TRANSFORM_PROGRAM_START, nv4097::set_transform_program_start>();
bind<NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, nv4097::set_vertex_attribute_output_mask>(); bind<NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, nv4097::set_vertex_attribute_output_mask>();
bind<NV4097_SET_VERTEX_DATA_BASE_OFFSET, nv4097::set_vertex_base_offset>();
bind<NV4097_SET_VERTEX_DATA_BASE_INDEX, nv4097::set_index_base_offset>();
//NV308A //NV308A
bind_range<NV308A_COLOR, 1, 256, nv308a::color>(); bind_range<NV308A_COLOR, 1, 256, nv308a::color>();
@ -2619,6 +2679,8 @@ namespace rsx
// custom methods // custom methods
bind<GCM_FLIP_COMMAND, flip_command>(); bind<GCM_FLIP_COMMAND, flip_command>();
// FIFO
bind<FIFO::FIFO_DRAW_BARRIER, fifo::draw_barrier>();
return true; return true;
}(); }();

View file

@ -3,16 +3,21 @@
#include <array> #include <array>
#include <vector> #include <vector>
#include <numeric> #include <numeric>
#include <deque>
#include <set>
#include "GCM.h" #include "GCM.h"
#include "rsx_decode.h" #include "rsx_decode.h"
#include "RSXTexture.h" #include "RSXTexture.h"
#include "rsx_vertex_data.h" #include "rsx_vertex_data.h"
#include "rsx_utils.h"
#include "Utilities/geometry.h" #include "Utilities/geometry.h"
#include <cereal/types/array.hpp> #include <cereal/types/array.hpp>
#include <cereal/types/unordered_map.hpp> #include <cereal/types/unordered_map.hpp>
extern u64 get_system_time();
namespace rsx namespace rsx
{ {
enum class draw_command enum class draw_command
@ -23,6 +28,39 @@ namespace rsx
indexed, indexed,
}; };
enum command_barrier_type : u32
{
primitive_restart_barrier,
vertex_base_modifier_barrier,
index_base_modifier_barrier
};
enum command_execution_flags : u32
{
vertex_base_changed = (1 << 0),
index_base_changed = (1 << 1)
};
struct barrier_t
{
u64 timestamp;
u32 address;
u32 arg;
u32 flags;
command_barrier_type type;
bool operator < (const barrier_t& other) const
{
if (address != -1u)
{
return address < other.address;
}
return timestamp < other.timestamp;
}
};
struct draw_range_t struct draw_range_t
{ {
u32 command_data_offset = 0; u32 command_data_offset = 0;
@ -30,55 +68,273 @@ namespace rsx
u32 count = 0; u32 count = 0;
}; };
struct draw_clause class draw_clause
{ {
// Stores the first and count argument from draw/draw indexed parameters between begin/end clauses.
simple_array<draw_range_t> draw_command_ranges;
// Stores rasterization barriers for primitive types sensitive to adjacency
std::vector<std::set<barrier_t>> draw_command_barriers;
// Counter used to parse the commands in order
u32 current_range_index;
// Location of last execution barrier
u32 last_execution_barrier_index;
// Helper functions
// Add a new draw command
void append_draw_command(const draw_range_t& range)
{
draw_command_ranges.push_back(range);
draw_command_barriers.push_back({});
}
// Insert a new draw command within the others
void insert_draw_command(int index, const draw_range_t& range)
{
auto range_It = draw_command_ranges.begin();
auto barrier_It = draw_command_barriers.begin();
// Because deque::insert fails with initializer list on MSVC
const std::set<barrier_t> new_barrier;
while (index--)
{
++range_It;
++barrier_It;
}
draw_command_ranges.insert(range_It, range);
draw_command_barriers.insert(barrier_It, new_barrier);
verify(HERE), draw_command_ranges.size() == draw_command_barriers.size();
}
public:
primitive_type primitive; primitive_type primitive;
draw_command command; draw_command command;
bool is_immediate_draw; bool is_immediate_draw;
bool is_disjoint_primitive; bool is_disjoint_primitive;
std::vector<u32> inline_vertex_array; simple_array<u32> inline_vertex_array;
void insert_command_barrier(command_barrier_type type, u32 arg)
{
verify(HERE), !draw_command_ranges.empty();
if (type == primitive_restart_barrier)
{
// Rasterization flow barrier
const auto& last = draw_command_ranges.back();
const auto address = last.first + last.count;
const auto command_index = draw_command_ranges.size() - 1;
draw_command_barriers[command_index].insert({ 0, address, arg, 0, type });
}
else
{
// Execution dependency barrier
append_draw_command({});
const auto command_index = draw_command_ranges.size() - 1;
draw_command_barriers[command_index].insert({ get_system_time(), -1u, arg, 0, type });
last_execution_barrier_index = command_index;
}
}
/** /**
* Stores the first and count argument from draw/draw indexed parameters between begin/end clauses. * Optimize commands for rendering
*/ */
std::vector<draw_range_t> draw_command_ranges; void compile()
{
// TODO
}
/**
* Insert one command range
*/
void append(u32 first, u32 count)
{
if (!draw_command_ranges.empty())
{
auto& last = draw_command_ranges.back();
if (last.count == 0)
{
// Special case, usually indicates an execution barrier
last.first = first;
last.count = count;
return;
}
if (last.first + last.count == first)
{
if (!is_disjoint_primitive)
{
// Insert barrier
insert_command_barrier(primitive_restart_barrier, 0);
}
last.count += count;
return;
}
for (int index = last_execution_barrier_index; index < draw_command_ranges.size(); ++index)
{
if (draw_command_ranges[index].first == first &&
draw_command_ranges[index].count == count)
{
// Duplicate entry? WTF!
return;
}
if (draw_command_ranges[index].first > first)
{
insert_draw_command(index, { 0, first, count });
return;
}
}
}
append_draw_command({ 0, first, count });
}
/** /**
* Returns how many vertex or index will be consumed by the draw clause. * Returns how many vertex or index will be consumed by the draw clause.
*/ */
u32 get_elements_count() const u32 get_elements_count() const
{ {
u32 count = 0; return get_range().count;
for (const auto &draw : draw_command_ranges) }
u32 min_index() const
{
return get_range().first;
}
bool is_single_draw() const
{
if (is_disjoint_primitive)
return true;
if (draw_command_ranges.empty())
{ {
count += draw.count; verify(HERE), !inline_vertex_array.empty();
return true;
} }
return count; verify(HERE), current_range_index != -1u;
for (const auto &barrier : draw_command_barriers[current_range_index])
{
if (barrier.type == primitive_restart_barrier)
return false;
}
return true;
}
bool empty() const
{
return (draw_command_ranges.empty() && inline_vertex_array.empty());
}
void reset(rsx::primitive_type type)
{
current_range_index = -1u;
last_execution_barrier_index = 0;
command = draw_command::none;
primitive = type;
draw_command_ranges.clear();
draw_command_barriers.clear();
inline_vertex_array.clear();
switch (primitive)
{
case rsx::primitive_type::line_loop:
case rsx::primitive_type::line_strip:
case rsx::primitive_type::polygon:
case rsx::primitive_type::quad_strip:
case rsx::primitive_type::triangle_fan:
case rsx::primitive_type::triangle_strip:
// Adjacency matters for these types
is_disjoint_primitive = false;
break;
default:
is_disjoint_primitive = true;
}
}
void begin()
{
current_range_index = 0;
}
void end()
{
current_range_index = draw_command_ranges.size() - 1;
}
bool next()
{
current_range_index++;
if (current_range_index >= draw_command_ranges.size())
{
current_range_index = 0;
return false;
}
verify(HERE), draw_command_ranges[current_range_index].count != 0;
return true;
} }
/** /**
* Optimize draw command stream for rendering * Executes commands reqiured to make the current draw state valid
*/ */
void compile() u32 execute_pipeline_dependencies() const;
{
const draw_range_t& get_range() const
{
verify(HERE), current_range_index < draw_command_ranges.size();
return draw_command_ranges[current_range_index];
} }
/** simple_array<draw_range_t> get_subranges() const
* Insert one command range
*/
void append(u32 first, u32 count)
{ {
verify(HERE), !is_single_draw();
} const auto range = get_range();
const auto limit = range.first + range.count;
u32 min_index() simple_array<draw_range_t> ret;
{ u32 previous_barrier = range.first;
LOG_FATAL(RSX, "Unimplemented"); u32 vertex_counter = 0;
return 0;
for (const auto &barrier : draw_command_barriers[current_range_index])
{
if (barrier.type != primitive_restart_barrier)
continue;
if (barrier.address <= range.first)
continue;
if (barrier.address >= limit)
break;
const u32 count = barrier.address - previous_barrier;
ret.push_back({ 0, vertex_counter, count });
previous_barrier = (u32)barrier.address;
vertex_counter += count;
}
verify(HERE), !ret.empty(), previous_barrier < limit;
ret.push_back({ 0, vertex_counter, limit - previous_barrier });
return ret;
} }
}; };

View file

@ -663,4 +663,237 @@ namespace rsx
m_data.store(0); m_data.store(0);
} }
}; };
template <typename Ty>
struct simple_array
{
public:
using iterator = Ty * ;
using const_iterator = Ty * const;
private:
u32 _capacity = 0;
u32 _size = 0;
Ty* _data = nullptr;
inline u32 offset(const_iterator pos)
{
return (_data) ? (pos - _data) : 0;
}
public:
simple_array() {}
simple_array(u32 initial_size, const Ty val = {})
{
reserve(initial_size);
_size = initial_size;
for (int n = 0; n < initial_size; ++n)
{
_data[n] = val;
}
}
simple_array(const std::initializer_list<Ty>& args)
{
reserve(args.size());
for (const auto& arg : args)
{
push_back(arg);
}
}
~simple_array()
{
if (_data)
{
free(_data);
_data = nullptr;
_size = _capacity = 0;
}
}
void swap(simple_array<Ty>& other) noexcept
{
std::swap(_capacity, other._capacity);
std::swap(_size, other._size);
std::swap(_data, other._data);
}
void reserve(u32 size)
{
if (_capacity > size)
return;
auto old_data = _data;
auto old_size = _size;
_data = (Ty*)malloc(sizeof(Ty) * size);
_capacity = size;
if (old_data)
{
memcpy(_data, old_data, sizeof(Ty) * old_size);
free(old_data);
}
}
void push_back(const Ty& val)
{
if (_size >= _capacity)
{
reserve(_capacity + 16);
}
_data[_size++] = val;
}
void push_back(Ty&& val)
{
if (_size >= _capacity)
{
reserve(_capacity + 16);
}
_data[_size++] = val;
}
iterator insert(iterator pos, const Ty& val)
{
verify(HERE), pos >= _data;
const auto _loc = offset(pos);
if (_size >= _capacity)
{
reserve(_capacity + 16);
pos = _data + _loc;
}
if (_loc >= _size)
{
_data[_size++] = val;
return pos;
}
verify(HERE), _loc < _size;
const u32 remaining = (_size - _loc);
memmove(pos + 1, pos, remaining * sizeof(Ty));
*pos = val;
_size++;
return pos;
}
iterator insert(iterator pos, Ty&& val)
{
verify(HERE), pos >= _data;
const auto _loc = offset(pos);
if (_size >= _capacity)
{
reserve(_capacity + 16);
pos = _data + _loc;
}
if (_loc >= _size)
{
_data[_size++] = val;
return pos;
}
verify(HERE), _loc < _size;
const u32 remaining = (_size - _loc);
memmove(pos + 1, pos, remaining * sizeof(Ty));
*pos = val;
_size++;
return pos;
}
void clear()
{
_size = 0;
}
bool empty() const
{
return _size == 0;
}
u32 size() const
{
return _size;
}
u32 capacity() const
{
return _capacity;
}
Ty& operator[] (u32 index)
{
return _data[index];
}
const Ty& operator[] (u32 index) const
{
return _data[index];
}
Ty* data()
{
return _data;
}
const Ty* data() const
{
return _data;
}
Ty& back()
{
return _data[_size - 1];
}
const Ty& back() const
{
return _data[_size - 1];
}
Ty& front()
{
return _data[0];
}
const Ty& front() const
{
return _data[0];
}
iterator begin()
{
return _data;
}
iterator end()
{
return _data ? _data + _size : nullptr;
}
const_iterator begin() const
{
return _data;
}
const_iterator end() const
{
return _data ? _data + _size : nullptr;
}
};
} }

View file

@ -1,4 +1,4 @@
#pragma once #pragma once
#include "GCM.h" #include "GCM.h"
#include "Utilities/types.h" #include "Utilities/types.h"
@ -64,10 +64,13 @@ struct push_buffer_vertex_info
void clear() void clear()
{ {
data.resize(0); if (size)
attribute_mask = ~0; {
vertex_count = 0; data.clear();
size = 0; attribute_mask = ~0;
vertex_count = 0;
size = 0;
}
} }
u8 get_vertex_size_in_dwords(vertex_base_type type) u8 get_vertex_size_in_dwords(vertex_base_type type)