mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-06 06:51:26 +12:00
rsx: Fixups
- Also fix visual corruption when using disjoint indexed draws - Refactor draw call emit again (vk) - Improve execution barrier resolve - Allow vertex/index rebase inside begin/end pair - Add ALPHA_TEST to list of excluded methods [TODO: defer raster state] - gl bringup - Simplify - using the simple_array gets back a few more fps :)
This commit is contained in:
parent
e01d2f08c9
commit
677b16f5c6
19 changed files with 2242 additions and 565 deletions
|
@ -175,20 +175,23 @@ namespace rsx
|
||||||
const u32 vertSize = get_vertex_type_size_on_host(info.type(), info.size());
|
const u32 vertSize = get_vertex_type_size_on_host(info.type(), info.size());
|
||||||
const u32 vertStride = info.stride();
|
const u32 vertStride = info.stride();
|
||||||
|
|
||||||
for (const auto& range : method_registers.current_draw_clause.draw_command_ranges)
|
method_registers.current_draw_clause.begin();
|
||||||
|
do
|
||||||
{
|
{
|
||||||
const u32 vertCount = range.count;
|
const auto& range = method_registers.current_draw_clause.get_range();
|
||||||
|
const u32 vertCount = range.count;
|
||||||
const size_t bufferSize = vertCount * vertStride + vertSize;
|
const size_t bufferSize = vertCount * vertStride + vertSize;
|
||||||
|
|
||||||
frame_capture_data::memory_block block;
|
frame_capture_data::memory_block block;
|
||||||
block.ioOffset = base_address;
|
block.ioOffset = base_address;
|
||||||
block.location = memory_location;
|
block.location = memory_location;
|
||||||
block.offset = (range.first * vertStride);
|
block.offset = (range.first * vertStride);
|
||||||
frame_capture_data::memory_block_data block_data;
|
frame_capture_data::memory_block_data block_data;
|
||||||
block_data.data.resize(bufferSize);
|
block_data.data.resize(bufferSize);
|
||||||
std::memcpy(block_data.data.data(), vm::base(addr + block.offset), bufferSize);
|
std::memcpy(block_data.data.data(), vm::base(addr + block.offset), bufferSize);
|
||||||
insert_mem_block_in_map(mem_changes, std::move(block), std::move(block_data));
|
insert_mem_block_in_map(mem_changes, std::move(block), std::move(block_data));
|
||||||
}
|
}
|
||||||
|
while (method_registers.current_draw_clause.next());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// save index buffer if used
|
// save index buffer if used
|
||||||
|
@ -211,8 +214,10 @@ namespace rsx
|
||||||
const bool is_primitive_restart_enabled = method_registers.restart_index_enabled();
|
const bool is_primitive_restart_enabled = method_registers.restart_index_enabled();
|
||||||
const u32 primitive_restart_index = method_registers.restart_index();
|
const u32 primitive_restart_index = method_registers.restart_index();
|
||||||
|
|
||||||
for (const auto& range : method_registers.current_draw_clause.draw_command_ranges)
|
method_registers.current_draw_clause.begin();
|
||||||
|
do
|
||||||
{
|
{
|
||||||
|
const auto& range = method_registers.current_draw_clause.get_range();
|
||||||
const u32 idxFirst = range.first;
|
const u32 idxFirst = range.first;
|
||||||
const u32 idxCount = range.count;
|
const u32 idxCount = range.count;
|
||||||
const u32 idxAddr = base_addr + (idxFirst * type_size);
|
const u32 idxAddr = base_addr + (idxFirst * type_size);
|
||||||
|
@ -261,6 +266,7 @@ namespace rsx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
while (method_registers.current_draw_clause.next());
|
||||||
|
|
||||||
if (min_index > max_index)
|
if (min_index > max_index)
|
||||||
{
|
{
|
||||||
|
|
|
@ -435,14 +435,11 @@ namespace
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::span<const gsl::byte> src_ptr, const std::vector<rsx::draw_range_t>& first_count_commands, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, bool swap_endianness)
|
void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::span<const gsl::byte> src_ptr, u32 count, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, bool swap_endianness)
|
||||||
{
|
{
|
||||||
verify(HERE), (vector_element_count > 0);
|
verify(HERE), (vector_element_count > 0);
|
||||||
const u32 src_read_stride = rsx::get_vertex_type_size_on_host(type, vector_element_count);
|
const u32 src_read_stride = rsx::get_vertex_type_size_on_host(type, vector_element_count);
|
||||||
|
|
||||||
// HACK! This is a legacy routine only used by D3D12
|
|
||||||
const u32 count = first_count_commands.front().count;
|
|
||||||
|
|
||||||
bool use_stream_no_stride = false;
|
bool use_stream_no_stride = false;
|
||||||
bool use_stream_with_stride = false;
|
bool use_stream_with_stride = false;
|
||||||
|
|
||||||
|
@ -799,7 +796,7 @@ namespace
|
||||||
template<typename T>
|
template<typename T>
|
||||||
std::tuple<u32, u32, u32> write_index_array_data_to_buffer_impl(gsl::span<u32> dst,
|
std::tuple<u32, u32, u32> write_index_array_data_to_buffer_impl(gsl::span<u32> dst,
|
||||||
gsl::span<const be_t<T>> src,
|
gsl::span<const be_t<T>> src,
|
||||||
rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index, const rsx::draw_range_t &range,
|
rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index,
|
||||||
u32 base_index, std::function<bool(rsx::primitive_type)> expands)
|
u32 base_index, std::function<bool(rsx::primitive_type)> expands)
|
||||||
{
|
{
|
||||||
if (!expands(draw_mode)) return upload_untouched<T>(src, dst, restart_index_enabled, restart_index, base_index);
|
if (!expands(draw_mode)) return upload_untouched<T>(src, dst, restart_index_enabled, restart_index, base_index);
|
||||||
|
@ -809,7 +806,8 @@ namespace
|
||||||
case rsx::primitive_type::line_loop:
|
case rsx::primitive_type::line_loop:
|
||||||
{
|
{
|
||||||
const auto &returnvalue = upload_untouched<T>(src, dst, restart_index_enabled, restart_index, base_index);
|
const auto &returnvalue = upload_untouched<T>(src, dst, restart_index_enabled, restart_index, base_index);
|
||||||
dst[range.count] = src[0];
|
const auto index_count = dst.size_bytes() / sizeof(T);
|
||||||
|
dst[index_count] = src[0];
|
||||||
return returnvalue;
|
return returnvalue;
|
||||||
}
|
}
|
||||||
case rsx::primitive_type::polygon:
|
case rsx::primitive_type::polygon:
|
||||||
|
@ -826,51 +824,23 @@ namespace
|
||||||
std::tuple<u32, u32, u32> write_index_array_data_to_buffer(gsl::span<gsl::byte> dst_ptr,
|
std::tuple<u32, u32, u32> write_index_array_data_to_buffer(gsl::span<gsl::byte> dst_ptr,
|
||||||
gsl::span<const gsl::byte> src_ptr,
|
gsl::span<const gsl::byte> src_ptr,
|
||||||
rsx::index_array_type type, rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index,
|
rsx::index_array_type type, rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index,
|
||||||
const std::vector<rsx::draw_range_t> &first_count_arguments,
|
|
||||||
u32 base_index, std::function<bool(rsx::primitive_type)> expands)
|
u32 base_index, std::function<bool(rsx::primitive_type)> expands)
|
||||||
{
|
{
|
||||||
u32 read = 0;
|
|
||||||
u32 written = 0;
|
|
||||||
u32 min_index = -1u;
|
|
||||||
u32 max_index = 0;
|
|
||||||
|
|
||||||
const u32 type_size = get_index_type_size(type);
|
|
||||||
|
|
||||||
for (const auto &range : first_count_arguments)
|
|
||||||
{
|
|
||||||
auto src = src_ptr.subspan(range.command_data_offset, range.count * type_size);
|
|
||||||
auto dst = dst_ptr.subspan(written * type_size);
|
|
||||||
|
|
||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
case rsx::index_array_type::u16:
|
case rsx::index_array_type::u16:
|
||||||
{
|
{
|
||||||
auto ret = write_index_array_data_to_buffer_impl<u16>(as_span_workaround<u32>(dst),
|
return write_index_array_data_to_buffer_impl<u16>(as_span_workaround<u32>(dst_ptr),
|
||||||
as_const_span<const be_t<u16>>(src), draw_mode, restart_index_enabled, restart_index, range, base_index, expands);
|
as_const_span<const be_t<u16>>(src_ptr), draw_mode, restart_index_enabled, restart_index, base_index, expands);
|
||||||
|
|
||||||
min_index = std::min<u32>(std::get<0>(ret), min_index);
|
|
||||||
max_index = std::min<u32>(std::get<1>(ret), max_index);
|
|
||||||
written += std::get<2>(ret);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
case rsx::index_array_type::u32:
|
case rsx::index_array_type::u32:
|
||||||
{
|
{
|
||||||
auto ret = write_index_array_data_to_buffer_impl<u32>(as_span_workaround<u32>(dst),
|
return write_index_array_data_to_buffer_impl<u32>(as_span_workaround<u32>(dst_ptr),
|
||||||
as_const_span<const be_t<u32>>(src), draw_mode, restart_index_enabled, restart_index, range, base_index, expands);
|
as_const_span<const be_t<u32>>(src_ptr), draw_mode, restart_index_enabled, restart_index, base_index, expands);
|
||||||
|
|
||||||
min_index = std::min<u32>(std::get<0>(ret), min_index);
|
|
||||||
max_index = std::min<u32>(std::get<1>(ret), max_index);
|
|
||||||
written += std::get<2>(ret);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
fmt::throw_exception("Unreachable" HERE);
|
fmt::throw_exception("Unreachable" HERE);
|
||||||
}
|
}
|
||||||
|
|
||||||
read += range.count;
|
|
||||||
}
|
|
||||||
|
|
||||||
return std::make_tuple(min_index, max_index, written);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w)
|
void stream_vector(void *dst, u32 x, u32 y, u32 z, u32 w)
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
* Write count vertex attributes from src_ptr.
|
* Write count vertex attributes from src_ptr.
|
||||||
* src_ptr array layout is deduced from the type, vector element count and src_stride arguments.
|
* src_ptr array layout is deduced from the type, vector element count and src_stride arguments.
|
||||||
*/
|
*/
|
||||||
void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::span<const gsl::byte> src_ptr, const std::vector<rsx::draw_range_t>& first_count_commands, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, bool swap_endianness);
|
void write_vertex_array_data_to_buffer(gsl::span<gsl::byte> raw_dst_span, gsl::span<const gsl::byte> src_ptr, u32 count, rsx::vertex_base_type type, u32 vector_element_count, u32 attribute_src_stride, u8 dst_stride, bool swap_endianness);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If primitive mode is not supported and need to be emulated (using an index buffer) returns false.
|
* If primitive mode is not supported and need to be emulated (using an index buffer) returns false.
|
||||||
|
@ -33,7 +33,7 @@ u32 get_index_type_size(rsx::index_array_type type);
|
||||||
* The function expands index buffer for non native primitive type if expands(draw_mode) return true.
|
* The function expands index buffer for non native primitive type if expands(draw_mode) return true.
|
||||||
*/
|
*/
|
||||||
std::tuple<u32, u32, u32> write_index_array_data_to_buffer(gsl::span<gsl::byte> dst, gsl::span<const gsl::byte> src,
|
std::tuple<u32, u32, u32> write_index_array_data_to_buffer(gsl::span<gsl::byte> dst, gsl::span<const gsl::byte> src,
|
||||||
rsx::index_array_type, rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index, const std::vector<rsx::draw_range_t> &first_count_arguments,
|
rsx::index_array_type, rsx::primitive_type draw_mode, bool restart_index_enabled, u32 restart_index,
|
||||||
u32 base_index, std::function<bool(rsx::primitive_type)> expands);
|
u32 base_index, std::function<bool(rsx::primitive_type)> expands);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -158,7 +158,7 @@ namespace
|
||||||
m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
|
m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
|
||||||
gsl::span<gsl::byte> mapped_buffer_span = {
|
gsl::span<gsl::byte> mapped_buffer_span = {
|
||||||
(gsl::byte*)mapped_buffer, gsl::narrow_cast<int>(buffer_size)};
|
(gsl::byte*)mapped_buffer, gsl::narrow_cast<int>(buffer_size)};
|
||||||
write_vertex_array_data_to_buffer(mapped_buffer_span, vertex_array.data, rsx::method_registers.current_draw_clause.draw_command_ranges,
|
write_vertex_array_data_to_buffer(mapped_buffer_span, vertex_array.data, vertex_count,
|
||||||
vertex_array.type, vertex_array.attribute_size, vertex_array.stride, element_size, vertex_array.is_be);
|
vertex_array.type, vertex_array.attribute_size, vertex_array.stride, element_size, vertex_array.is_be);
|
||||||
|
|
||||||
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
|
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
|
||||||
|
@ -211,12 +211,9 @@ namespace
|
||||||
};
|
};
|
||||||
|
|
||||||
std::tuple<D3D12_INDEX_BUFFER_VIEW, size_t> generate_index_buffer_for_emulated_primitives_array(
|
std::tuple<D3D12_INDEX_BUFFER_VIEW, size_t> generate_index_buffer_for_emulated_primitives_array(
|
||||||
const std::vector<rsx::draw_range_t> & vertex_ranges, d3d12_data_heap& m_buffer_data)
|
u32 vertex_count, d3d12_data_heap& m_buffer_data)
|
||||||
{
|
{
|
||||||
size_t index_count = std::accumulate(
|
size_t index_count = get_index_count(rsx::method_registers.current_draw_clause.primitive, vertex_count);
|
||||||
vertex_ranges.begin(), vertex_ranges.end(), 0ll, [](size_t acc, const auto& pair) {
|
|
||||||
return acc + get_index_count(rsx::method_registers.current_draw_clause.primitive, pair.count);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Alloc
|
// Alloc
|
||||||
size_t buffer_size = align(index_count * sizeof(u16), 64);
|
size_t buffer_size = align(index_count * sizeof(u16), 64);
|
||||||
|
@ -226,10 +223,6 @@ namespace
|
||||||
void* mapped_buffer =
|
void* mapped_buffer =
|
||||||
m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
|
m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
|
||||||
|
|
||||||
u32 vertex_count = 0;
|
|
||||||
for (const auto& pair : vertex_ranges)
|
|
||||||
vertex_count += pair.count;
|
|
||||||
|
|
||||||
write_index_array_for_non_indexed_non_native_primitive_to_buffer((char *)mapped_buffer, rsx::method_registers.current_draw_clause.primitive, vertex_count);
|
write_index_array_for_non_indexed_non_native_primitive_to_buffer((char *)mapped_buffer, rsx::method_registers.current_draw_clause.primitive, vertex_count);
|
||||||
|
|
||||||
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
|
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
|
||||||
|
@ -249,9 +242,8 @@ namespace
|
||||||
* range, and whose second element is the number of vertex in this range.
|
* range, and whose second element is the number of vertex in this range.
|
||||||
*/
|
*/
|
||||||
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> upload_vertex_attributes(
|
std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC> upload_vertex_attributes(
|
||||||
std::vector<rsx::draw_range_t> vertex_ranges,
|
u32 vertex_count,
|
||||||
std::function<attribute_storage(std::vector<rsx::draw_range_t>)>
|
std::function<attribute_storage()> get_vertex_buffers,
|
||||||
get_vertex_buffers,
|
|
||||||
ID3D12Resource* m_vertex_buffer_data, d3d12_data_heap& m_buffer_data,
|
ID3D12Resource* m_vertex_buffer_data, d3d12_data_heap& m_buffer_data,
|
||||||
ID3D12GraphicsCommandList* command_list)
|
ID3D12GraphicsCommandList* command_list)
|
||||||
{
|
{
|
||||||
|
@ -259,13 +251,9 @@ namespace
|
||||||
&CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data,
|
&CD3DX12_RESOURCE_BARRIER::Transition(m_vertex_buffer_data,
|
||||||
D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_RESOURCE_STATE_COPY_DEST));
|
D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_RESOURCE_STATE_COPY_DEST));
|
||||||
|
|
||||||
u32 vertex_count = 0;
|
|
||||||
for (const auto &range : vertex_ranges)
|
|
||||||
vertex_count += range.count;
|
|
||||||
|
|
||||||
vertex_buffer_visitor visitor(
|
vertex_buffer_visitor visitor(
|
||||||
vertex_count, command_list, m_vertex_buffer_data, m_buffer_data);
|
vertex_count, command_list, m_vertex_buffer_data, m_buffer_data);
|
||||||
const auto& vertex_buffers = get_vertex_buffers(vertex_ranges);
|
const auto& vertex_buffers = get_vertex_buffers();
|
||||||
|
|
||||||
for (const auto& vbo : vertex_buffers) std::visit(visitor, vbo);
|
for (const auto& vbo : vertex_buffers) std::visit(visitor, vbo);
|
||||||
|
|
||||||
|
@ -348,7 +336,7 @@ namespace
|
||||||
{
|
{
|
||||||
draw_command_visitor(ID3D12GraphicsCommandList* cmd_list, d3d12_data_heap& buffer_data,
|
draw_command_visitor(ID3D12GraphicsCommandList* cmd_list, d3d12_data_heap& buffer_data,
|
||||||
ID3D12Resource* vertex_buffer_data,
|
ID3D12Resource* vertex_buffer_data,
|
||||||
std::function<attribute_storage(const std::vector<rsx::draw_range_t>&)> get_vertex_info_lambda)
|
std::function<attribute_storage()> get_vertex_info_lambda)
|
||||||
: command_list(cmd_list), m_buffer_data(buffer_data),
|
: command_list(cmd_list), m_buffer_data(buffer_data),
|
||||||
m_vertex_buffer_data(vertex_buffer_data), get_vertex_buffers(get_vertex_info_lambda)
|
m_vertex_buffer_data(vertex_buffer_data), get_vertex_buffers(get_vertex_info_lambda)
|
||||||
{
|
{
|
||||||
|
@ -357,10 +345,10 @@ namespace
|
||||||
std::tuple<bool, size_t, std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC>> operator()(
|
std::tuple<bool, size_t, std::vector<D3D12_SHADER_RESOURCE_VIEW_DESC>> operator()(
|
||||||
const rsx::draw_array_command& command)
|
const rsx::draw_array_command& command)
|
||||||
{
|
{
|
||||||
|
const auto vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
||||||
if (is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) {
|
if (is_primitive_native(rsx::method_registers.current_draw_clause.primitive)) {
|
||||||
size_t vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
|
||||||
return std::make_tuple(false, vertex_count,
|
return std::make_tuple(false, vertex_count,
|
||||||
upload_vertex_attributes(rsx::method_registers.current_draw_clause.draw_command_ranges,
|
upload_vertex_attributes(vertex_count,
|
||||||
get_vertex_buffers,
|
get_vertex_buffers,
|
||||||
m_vertex_buffer_data, m_buffer_data, command_list));
|
m_vertex_buffer_data, m_buffer_data, command_list));
|
||||||
}
|
}
|
||||||
|
@ -369,10 +357,10 @@ namespace
|
||||||
size_t index_count;
|
size_t index_count;
|
||||||
std::tie(index_buffer_view, index_count) =
|
std::tie(index_buffer_view, index_count) =
|
||||||
generate_index_buffer_for_emulated_primitives_array(
|
generate_index_buffer_for_emulated_primitives_array(
|
||||||
rsx::method_registers.current_draw_clause.draw_command_ranges, m_buffer_data);
|
vertex_count, m_buffer_data);
|
||||||
command_list->IASetIndexBuffer(&index_buffer_view);
|
command_list->IASetIndexBuffer(&index_buffer_view);
|
||||||
return std::make_tuple(true, index_count,
|
return std::make_tuple(true, index_count,
|
||||||
upload_vertex_attributes(rsx::method_registers.current_draw_clause.draw_command_ranges,
|
upload_vertex_attributes(vertex_count,
|
||||||
get_vertex_buffers,
|
get_vertex_buffers,
|
||||||
m_vertex_buffer_data, m_buffer_data, command_list));
|
m_vertex_buffer_data, m_buffer_data, command_list));
|
||||||
}
|
}
|
||||||
|
@ -406,7 +394,7 @@ namespace
|
||||||
write_index_array_data_to_buffer(dst, command.raw_index_buffer, indexed_type,
|
write_index_array_data_to_buffer(dst, command.raw_index_buffer, indexed_type,
|
||||||
rsx::method_registers.current_draw_clause.primitive,
|
rsx::method_registers.current_draw_clause.primitive,
|
||||||
rsx::method_registers.restart_index_enabled(),
|
rsx::method_registers.restart_index_enabled(),
|
||||||
rsx::method_registers.restart_index(), rsx::method_registers.current_draw_clause.draw_command_ranges,
|
rsx::method_registers.restart_index(),
|
||||||
rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !is_primitive_native(prim); });
|
rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !is_primitive_native(prim); });
|
||||||
|
|
||||||
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
|
m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
|
||||||
|
@ -417,7 +405,7 @@ namespace
|
||||||
command_list->IASetIndexBuffer(&index_buffer_view);
|
command_list->IASetIndexBuffer(&index_buffer_view);
|
||||||
|
|
||||||
return std::make_tuple(true, index_count,
|
return std::make_tuple(true, index_count,
|
||||||
upload_vertex_attributes({ {0, max_index + 1} }, get_vertex_buffers,
|
upload_vertex_attributes(max_index + 1, get_vertex_buffers,
|
||||||
m_vertex_buffer_data, m_buffer_data, command_list));
|
m_vertex_buffer_data, m_buffer_data, command_list));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -439,7 +427,7 @@ namespace
|
||||||
size_t index_count;
|
size_t index_count;
|
||||||
std::tie(index_buffer_view, index_count) =
|
std::tie(index_buffer_view, index_count) =
|
||||||
generate_index_buffer_for_emulated_primitives_array(
|
generate_index_buffer_for_emulated_primitives_array(
|
||||||
{{0, (u32)vertex_count}}, m_buffer_data);
|
vertex_count, m_buffer_data);
|
||||||
command_list->IASetIndexBuffer(&index_buffer_view);
|
command_list->IASetIndexBuffer(&index_buffer_view);
|
||||||
return std::make_tuple(true, index_count, vertex_buffer_view);
|
return std::make_tuple(true, index_count, vertex_buffer_view);
|
||||||
}
|
}
|
||||||
|
@ -447,7 +435,7 @@ namespace
|
||||||
private:
|
private:
|
||||||
ID3D12GraphicsCommandList* command_list;
|
ID3D12GraphicsCommandList* command_list;
|
||||||
d3d12_data_heap& m_buffer_data;
|
d3d12_data_heap& m_buffer_data;
|
||||||
std::function<attribute_storage(const std::vector<rsx::draw_range_t>&)> get_vertex_buffers;
|
std::function<attribute_storage()> get_vertex_buffers;
|
||||||
ID3D12Resource* m_vertex_buffer_data;
|
ID3D12Resource* m_vertex_buffer_data;
|
||||||
};
|
};
|
||||||
} // End anonymous namespace
|
} // End anonymous namespace
|
||||||
|
@ -457,7 +445,7 @@ D3D12GSRender::upload_and_set_vertex_index_data(ID3D12GraphicsCommandList* comma
|
||||||
{
|
{
|
||||||
return std::visit(
|
return std::visit(
|
||||||
draw_command_visitor(command_list, m_buffer_data, m_vertex_buffer_data.Get(),
|
draw_command_visitor(command_list, m_buffer_data, m_vertex_buffer_data.Get(),
|
||||||
[this](const auto& list) { return get_vertex_buffers(rsx::method_registers, list, 0); }),
|
[this]() { return get_vertex_buffers(rsx::method_registers, 0); }),
|
||||||
get_draw_command(rsx::method_registers));
|
get_draw_command(rsx::method_registers));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -195,17 +195,6 @@ void GLGSRender::end()
|
||||||
std::chrono::time_point<steady_clock> state_check_end = steady_clock::now();
|
std::chrono::time_point<steady_clock> state_check_end = steady_clock::now();
|
||||||
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count();
|
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(state_check_end - state_check_start).count();
|
||||||
|
|
||||||
if (manually_flush_ring_buffers)
|
|
||||||
{
|
|
||||||
//Use approximations to reserve space. This path is mostly for debug purposes anyway
|
|
||||||
u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
|
||||||
u32 approx_working_buffer_size = approx_vertex_count * 256;
|
|
||||||
|
|
||||||
//Allocate 256K heap if we have no approximation at this time (inlined array)
|
|
||||||
m_attrib_ring_buffer->reserve_storage_on_heap(std::max(approx_working_buffer_size, 256 * 1024U));
|
|
||||||
m_index_ring_buffer->reserve_storage_on_heap(16 * 1024);
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto do_heap_cleanup = [this]()
|
const auto do_heap_cleanup = [this]()
|
||||||
{
|
{
|
||||||
if (manually_flush_ring_buffers)
|
if (manually_flush_ring_buffers)
|
||||||
|
@ -220,17 +209,6 @@ void GLGSRender::end()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
//Do vertex upload before RTT prep / texture lookups to give the driver time to push data
|
|
||||||
auto upload_info = set_vertex_buffer();
|
|
||||||
|
|
||||||
if (upload_info.vertex_draw_count == 0)
|
|
||||||
{
|
|
||||||
// Malformed vertex setup; abort
|
|
||||||
do_heap_cleanup();
|
|
||||||
rsx::thread::end();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
//Check if depth buffer is bound and valid
|
//Check if depth buffer is bound and valid
|
||||||
//If ds is not initialized clear it; it seems new depth textures should have depth cleared
|
//If ds is not initialized clear it; it seems new depth textures should have depth cleared
|
||||||
auto copy_rtt_contents = [this](gl::render_target *surface, bool is_depth)
|
auto copy_rtt_contents = [this](gl::render_target *surface, bool is_depth)
|
||||||
|
@ -407,15 +385,11 @@ void GLGSRender::end()
|
||||||
if (!load_program())
|
if (!load_program())
|
||||||
{
|
{
|
||||||
// Program is not ready, skip drawing this
|
// Program is not ready, skip drawing this
|
||||||
do_heap_cleanup();
|
|
||||||
std::this_thread::yield();
|
std::this_thread::yield();
|
||||||
rsx::thread::end();
|
rsx::thread::end();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load program here since it is dependent on vertex state
|
|
||||||
load_program_env(upload_info);
|
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
|
std::chrono::time_point<steady_clock> program_stop = steady_clock::now();
|
||||||
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
|
m_begin_time += (u32)std::chrono::duration_cast<std::chrono::microseconds>(program_stop - program_start).count();
|
||||||
|
|
||||||
|
@ -490,117 +464,161 @@ void GLGSRender::end()
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
|
std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
|
||||||
|
|
||||||
do_heap_cleanup();
|
|
||||||
|
|
||||||
if (g_cfg.video.debug_output)
|
if (g_cfg.video.debug_output)
|
||||||
{
|
{
|
||||||
m_program->validate();
|
m_program->validate();
|
||||||
}
|
}
|
||||||
|
|
||||||
const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive);
|
const GLenum draw_mode = gl::draw_mode(rsx::method_registers.current_draw_clause.primitive);
|
||||||
const bool allow_multidraw = supports_multidraw && !g_cfg.video.disable_FIFO_reordering;
|
rsx::method_registers.current_draw_clause.begin();
|
||||||
const bool single_draw = (!allow_multidraw ||
|
int subdraw = 0;
|
||||||
rsx::method_registers.current_draw_clause.draw_command_ranges.size() <= 1 ||
|
do
|
||||||
rsx::method_registers.current_draw_clause.is_disjoint_primitive);
|
|
||||||
|
|
||||||
if (upload_info.index_info)
|
|
||||||
{
|
{
|
||||||
const GLenum index_type = std::get<0>(*upload_info.index_info);
|
if (!subdraw)
|
||||||
const u32 index_offset = std::get<1>(*upload_info.index_info);
|
|
||||||
const bool restarts_valid = gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive) && !rsx::method_registers.current_draw_clause.is_disjoint_primitive;
|
|
||||||
|
|
||||||
if (gl_state.enable(restarts_valid && rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
|
|
||||||
{
|
{
|
||||||
glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT)? 0xffff: 0xffffffff);
|
m_vertex_layout = analyse_inputs_interleaved();
|
||||||
}
|
if (!m_vertex_layout.validate())
|
||||||
|
{
|
||||||
m_index_ring_buffer->bind();
|
break;
|
||||||
|
}
|
||||||
if (single_draw)
|
|
||||||
{
|
|
||||||
glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const auto draw_count = rsx::method_registers.current_draw_clause.draw_command_ranges.size();
|
if (rsx::method_registers.current_draw_clause.execute_pipeline_dependencies() & rsx::vertex_base_changed)
|
||||||
const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2;
|
|
||||||
uintptr_t index_ptr = index_offset;
|
|
||||||
m_scratch_buffer.resize(draw_count * 16);
|
|
||||||
|
|
||||||
GLsizei *counts = (GLsizei*)m_scratch_buffer.data();
|
|
||||||
const GLvoid** offsets = (const GLvoid**)(counts + draw_count);
|
|
||||||
int dst_index = 0;
|
|
||||||
|
|
||||||
for (const auto &range : rsx::method_registers.current_draw_clause.draw_command_ranges)
|
|
||||||
{
|
{
|
||||||
const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.count);
|
// Rebase vertex bases instead of
|
||||||
counts[dst_index] = index_size;
|
for (auto &info : m_vertex_layout.interleaved_blocks)
|
||||||
offsets[dst_index++] = (const GLvoid*)index_ptr;
|
|
||||||
|
|
||||||
index_ptr += (index_size << type_scale);
|
|
||||||
}
|
|
||||||
|
|
||||||
glMultiDrawElements(draw_mode, counts, index_type, offsets, (GLsizei)draw_count);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (single_draw)
|
|
||||||
{
|
|
||||||
glDrawArrays(draw_mode, 0, upload_info.vertex_draw_count);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
const u32 base_index = rsx::method_registers.current_draw_clause.draw_command_ranges.front().first;
|
|
||||||
bool use_draw_arrays_fallback = false;
|
|
||||||
|
|
||||||
const auto draw_count = rsx::method_registers.current_draw_clause.draw_command_ranges.size();
|
|
||||||
const auto driver_caps = gl::get_driver_caps();
|
|
||||||
|
|
||||||
m_scratch_buffer.resize(draw_count * 24);
|
|
||||||
GLint* firsts = (GLint*)m_scratch_buffer.data();
|
|
||||||
GLsizei* counts = (GLsizei*)(firsts + draw_count);
|
|
||||||
const GLvoid** offsets = (const GLvoid**)(counts + draw_count);
|
|
||||||
int dst_index = 0;
|
|
||||||
|
|
||||||
for (const auto &range : rsx::method_registers.current_draw_clause.draw_command_ranges)
|
|
||||||
{
|
|
||||||
const GLint first = range.first - base_index;
|
|
||||||
const GLsizei count = range.count;
|
|
||||||
|
|
||||||
firsts[dst_index] = first;
|
|
||||||
counts[dst_index] = count;
|
|
||||||
offsets[dst_index++] = (const GLvoid*)(first << 2);
|
|
||||||
|
|
||||||
if (driver_caps.vendor_AMD && (first + count) > (0x100000 >> 2))
|
|
||||||
{
|
{
|
||||||
//Unlikely, but added here in case the identity buffer is not large enough somehow
|
const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset();
|
||||||
use_draw_arrays_fallback = true;
|
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location);
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (use_draw_arrays_fallback)
|
++subdraw;
|
||||||
|
|
||||||
|
if (manually_flush_ring_buffers)
|
||||||
|
{
|
||||||
|
//Use approximations to reserve space. This path is mostly for debug purposes anyway
|
||||||
|
u32 approx_vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
||||||
|
u32 approx_working_buffer_size = approx_vertex_count * 256;
|
||||||
|
|
||||||
|
//Allocate 256K heap if we have no approximation at this time (inlined array)
|
||||||
|
m_attrib_ring_buffer->reserve_storage_on_heap(std::max(approx_working_buffer_size, 256 * 1024U));
|
||||||
|
m_index_ring_buffer->reserve_storage_on_heap(16 * 1024);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Do vertex upload before RTT prep / texture lookups to give the driver time to push data
|
||||||
|
auto upload_info = set_vertex_buffer();
|
||||||
|
do_heap_cleanup();
|
||||||
|
|
||||||
|
if (upload_info.vertex_draw_count == 0)
|
||||||
|
{
|
||||||
|
// Malformed vertex setup; abort
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
load_program_env(upload_info);
|
||||||
|
|
||||||
|
if (!upload_info.index_info)
|
||||||
|
{
|
||||||
|
if (rsx::method_registers.current_draw_clause.is_single_draw())
|
||||||
{
|
{
|
||||||
//MultiDrawArrays is broken on some primitive types using AMD. One known type is GL_TRIANGLE_STRIP but there could be more
|
glDrawArrays(draw_mode, 0, upload_info.vertex_draw_count);
|
||||||
for (const auto &range : rsx::method_registers.current_draw_clause.draw_command_ranges)
|
|
||||||
{
|
|
||||||
glDrawArrays(draw_mode, range.first - base_index, range.count);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (driver_caps.vendor_AMD)
|
|
||||||
{
|
|
||||||
//Use identity index buffer to fix broken vertexID on AMD
|
|
||||||
m_identity_index_buffer->bind();
|
|
||||||
glMultiDrawElements(draw_mode, counts, GL_UNSIGNED_INT, offsets, (GLsizei)draw_count);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
//Normal render
|
const auto subranges = rsx::method_registers.current_draw_clause.get_subranges();
|
||||||
glMultiDrawArrays(draw_mode, firsts, counts, (GLsizei)draw_count);
|
const auto draw_count = subranges.size();
|
||||||
|
const auto driver_caps = gl::get_driver_caps();
|
||||||
|
bool use_draw_arrays_fallback = false;
|
||||||
|
|
||||||
|
m_scratch_buffer.resize(draw_count * 24);
|
||||||
|
GLint* firsts = (GLint*)m_scratch_buffer.data();
|
||||||
|
GLsizei* counts = (GLsizei*)(firsts + draw_count);
|
||||||
|
const GLvoid** offsets = (const GLvoid**)(counts + draw_count);
|
||||||
|
|
||||||
|
u32 first = 0;
|
||||||
|
u32 dst_index = 0;
|
||||||
|
for (const auto &range : subranges)
|
||||||
|
{
|
||||||
|
firsts[dst_index] = first;
|
||||||
|
counts[dst_index] = range.count;
|
||||||
|
offsets[dst_index++] = (const GLvoid*)(first << 2);
|
||||||
|
|
||||||
|
if (driver_caps.vendor_AMD && (first + range.count) > (0x100000 >> 2))
|
||||||
|
{
|
||||||
|
//Unlikely, but added here in case the identity buffer is not large enough somehow
|
||||||
|
use_draw_arrays_fallback = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
first += range.count;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (use_draw_arrays_fallback)
|
||||||
|
{
|
||||||
|
//MultiDrawArrays is broken on some primitive types using AMD. One known type is GL_TRIANGLE_STRIP but there could be more
|
||||||
|
for (int n = 0; n < draw_count; ++n)
|
||||||
|
{
|
||||||
|
glDrawArrays(draw_mode, firsts[n], counts[n]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (driver_caps.vendor_AMD)
|
||||||
|
{
|
||||||
|
//Use identity index buffer to fix broken vertexID on AMD
|
||||||
|
m_identity_index_buffer->bind();
|
||||||
|
glMultiDrawElements(draw_mode, counts, GL_UNSIGNED_INT, offsets, (GLsizei)draw_count);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//Normal render
|
||||||
|
glMultiDrawArrays(draw_mode, firsts, counts, (GLsizei)draw_count);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
else
|
||||||
|
{
|
||||||
|
const GLenum index_type = std::get<0>(*upload_info.index_info);
|
||||||
|
const u32 index_offset = std::get<1>(*upload_info.index_info);
|
||||||
|
const bool restarts_valid = gl::is_primitive_native(rsx::method_registers.current_draw_clause.primitive) && !rsx::method_registers.current_draw_clause.is_disjoint_primitive;
|
||||||
|
|
||||||
|
if (gl_state.enable(restarts_valid && rsx::method_registers.restart_index_enabled(), GL_PRIMITIVE_RESTART))
|
||||||
|
{
|
||||||
|
glPrimitiveRestartIndex((index_type == GL_UNSIGNED_SHORT) ? 0xffff : 0xffffffff);
|
||||||
|
}
|
||||||
|
|
||||||
|
m_index_ring_buffer->bind();
|
||||||
|
|
||||||
|
if (rsx::method_registers.current_draw_clause.is_single_draw())
|
||||||
|
{
|
||||||
|
glDrawElements(draw_mode, upload_info.vertex_draw_count, index_type, (GLvoid *)(uintptr_t)index_offset);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const auto subranges = rsx::method_registers.current_draw_clause.get_subranges();
|
||||||
|
const auto draw_count = subranges.size();
|
||||||
|
const u32 type_scale = (index_type == GL_UNSIGNED_SHORT) ? 1 : 2;
|
||||||
|
uintptr_t index_ptr = index_offset;
|
||||||
|
m_scratch_buffer.resize(draw_count * 16);
|
||||||
|
|
||||||
|
GLsizei *counts = (GLsizei*)m_scratch_buffer.data();
|
||||||
|
const GLvoid** offsets = (const GLvoid**)(counts + draw_count);
|
||||||
|
int dst_index = 0;
|
||||||
|
|
||||||
|
for (const auto &range : subranges)
|
||||||
|
{
|
||||||
|
const auto index_size = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.count);
|
||||||
|
counts[dst_index] = index_size;
|
||||||
|
offsets[dst_index++] = (const GLvoid*)index_ptr;
|
||||||
|
|
||||||
|
index_ptr += (index_size << type_scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
glMultiDrawElements(draw_mode, counts, index_type, offsets, (GLsizei)draw_count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (rsx::method_registers.current_draw_clause.next());
|
||||||
|
|
||||||
m_rtts.on_write();
|
m_rtts.on_write();
|
||||||
|
|
||||||
|
|
|
@ -20,19 +20,12 @@ namespace
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
// return vertex count if primitive type is not native (empty array otherwise)
|
// return vertex count if primitive type is not native (empty array otherwise)
|
||||||
std::tuple<u32, u32> get_index_array_for_emulated_non_indexed_draw(const std::vector<rsx::draw_range_t> &first_count_commands, rsx::primitive_type primitive_mode, gl::ring_buffer &dst)
|
std::tuple<u32, u32> get_index_array_for_emulated_non_indexed_draw(rsx::primitive_type primitive_mode, gl::ring_buffer &dst, u32 vertex_count)
|
||||||
{
|
{
|
||||||
//This is an emulated buffer, so our indices only range from 0->original_vertex_array_length
|
// This is an emulated buffer, so our indices only range from 0->original_vertex_array_length
|
||||||
u32 vertex_count = 0;
|
const auto element_count = get_index_count(primitive_mode, vertex_count);
|
||||||
u32 element_count = 0;
|
|
||||||
verify(HERE), !gl::is_primitive_native(primitive_mode);
|
verify(HERE), !gl::is_primitive_native(primitive_mode);
|
||||||
|
|
||||||
for (const auto &range : first_count_commands)
|
|
||||||
{
|
|
||||||
element_count += (u32)get_index_count(primitive_mode, range.count);
|
|
||||||
vertex_count += range.count;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto mapping = dst.alloc_from_heap(element_count * sizeof(u16), 256);
|
auto mapping = dst.alloc_from_heap(element_count * sizeof(u16), 256);
|
||||||
char *mapped_buffer = (char *)mapping.first;
|
char *mapped_buffer = (char *)mapping.first;
|
||||||
|
|
||||||
|
@ -40,7 +33,7 @@ namespace
|
||||||
return std::make_tuple(element_count, mapping.second);
|
return std::make_tuple(element_count, mapping.second);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<u32, u32, u32> upload_index_buffer(gsl::span<const gsl::byte> raw_index_buffer, void *ptr, rsx::index_array_type type, rsx::primitive_type draw_mode, const std::vector<rsx::draw_range_t>& first_count_commands, u32 initial_vertex_count)
|
std::tuple<u32, u32, u32> upload_index_buffer(gsl::span<const gsl::byte> raw_index_buffer, void *ptr, rsx::index_array_type type, rsx::primitive_type draw_mode, u32 initial_vertex_count)
|
||||||
{
|
{
|
||||||
u32 min_index, max_index, vertex_draw_count = initial_vertex_count;
|
u32 min_index, max_index, vertex_draw_count = initial_vertex_count;
|
||||||
|
|
||||||
|
@ -51,7 +44,7 @@ namespace
|
||||||
|
|
||||||
gsl::span<gsl::byte> dst{ reinterpret_cast<gsl::byte*>(ptr), ::narrow<u32>(block_sz) };
|
gsl::span<gsl::byte> dst{ reinterpret_cast<gsl::byte*>(ptr), ::narrow<u32>(block_sz) };
|
||||||
std::tie(min_index, max_index, vertex_draw_count) = write_index_array_data_to_buffer(dst, raw_index_buffer,
|
std::tie(min_index, max_index, vertex_draw_count) = write_index_array_data_to_buffer(dst, raw_index_buffer,
|
||||||
type, draw_mode, rsx::method_registers.restart_index_enabled(), rsx::method_registers.restart_index(), first_count_commands,
|
type, draw_mode, rsx::method_registers.restart_index_enabled(), rsx::method_registers.restart_index(),
|
||||||
rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !gl::is_primitive_native(prim); });
|
rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !gl::is_primitive_native(prim); });
|
||||||
|
|
||||||
return std::make_tuple(min_index, max_index, vertex_draw_count);
|
return std::make_tuple(min_index, max_index, vertex_draw_count);
|
||||||
|
@ -99,8 +92,8 @@ namespace
|
||||||
u32 index_count;
|
u32 index_count;
|
||||||
u32 offset_in_index_buffer;
|
u32 offset_in_index_buffer;
|
||||||
std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(
|
std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(
|
||||||
rsx::method_registers.current_draw_clause.draw_command_ranges,
|
rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer,
|
||||||
rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer);
|
rsx::method_registers.current_draw_clause.get_elements_count());
|
||||||
|
|
||||||
return{ index_count, vertex_count, min_index, 0, std::make_tuple(GL_UNSIGNED_SHORT, offset_in_index_buffer) };
|
return{ index_count, vertex_count, min_index, 0, std::make_tuple(GL_UNSIGNED_SHORT, offset_in_index_buffer) };
|
||||||
}
|
}
|
||||||
|
@ -128,8 +121,7 @@ namespace
|
||||||
u32 offset_in_index_buffer = mapping.second;
|
u32 offset_in_index_buffer = mapping.second;
|
||||||
|
|
||||||
std::tie(min_index, max_index, index_count) = upload_index_buffer(
|
std::tie(min_index, max_index, index_count) = upload_index_buffer(
|
||||||
command.raw_index_buffer, ptr, type, rsx::method_registers.current_draw_clause.primitive,
|
command.raw_index_buffer, ptr, type, rsx::method_registers.current_draw_clause.primitive, vertex_count);
|
||||||
rsx::method_registers.current_draw_clause.draw_command_ranges, vertex_count);
|
|
||||||
|
|
||||||
if (min_index >= max_index)
|
if (min_index >= max_index)
|
||||||
{
|
{
|
||||||
|
@ -163,8 +155,7 @@ namespace
|
||||||
u32 offset_in_index_buffer;
|
u32 offset_in_index_buffer;
|
||||||
u32 index_count;
|
u32 index_count;
|
||||||
std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(
|
std::tie(index_count, offset_in_index_buffer) = get_index_array_for_emulated_non_indexed_draw(
|
||||||
{ { 0, 0, vertex_count } },
|
rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer, vertex_count);
|
||||||
rsx::method_registers.current_draw_clause.primitive, m_index_ring_buffer);
|
|
||||||
|
|
||||||
return{ index_count, vertex_count, 0, 0, std::make_tuple(GL_UNSIGNED_SHORT, offset_in_index_buffer) };
|
return{ index_count, vertex_count, 0, 0, std::make_tuple(GL_UNSIGNED_SHORT, offset_in_index_buffer) };
|
||||||
}
|
}
|
||||||
|
@ -182,11 +173,6 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
|
||||||
{
|
{
|
||||||
std::chrono::time_point<steady_clock> then = steady_clock::now();
|
std::chrono::time_point<steady_clock> then = steady_clock::now();
|
||||||
|
|
||||||
m_vertex_layout = analyse_inputs_interleaved();
|
|
||||||
|
|
||||||
if (!m_vertex_layout.validate())
|
|
||||||
return {};
|
|
||||||
|
|
||||||
//Write index buffers and count verts
|
//Write index buffers and count verts
|
||||||
auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers));
|
auto result = std::visit(draw_command_visitor(*m_index_ring_buffer, m_vertex_layout), get_draw_command(rsx::method_registers));
|
||||||
|
|
||||||
|
@ -214,6 +200,8 @@ gl::vertex_upload_info GLGSRender::set_vertex_buffer()
|
||||||
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + vertex_base;
|
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + vertex_base;
|
||||||
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first))
|
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, GL_R8UI, required.first))
|
||||||
{
|
{
|
||||||
|
verify(HERE), cached->local_address == storage_address;
|
||||||
|
|
||||||
in_cache = true;
|
in_cache = true;
|
||||||
upload_info.persistent_mapping_offset = cached->offset_in_heap;
|
upload_info.persistent_mapping_offset = cached->offset_in_heap;
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2,8 +2,19 @@
|
||||||
|
|
||||||
#include <Utilities/types.h>
|
#include <Utilities/types.h>
|
||||||
#include <Utilities/Atomic.h>
|
#include <Utilities/Atomic.h>
|
||||||
|
#include <Utilities/mutex.h>
|
||||||
|
#include <Utilities/thread.h>
|
||||||
|
|
||||||
|
#include "rsx_utils.h"
|
||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <memory>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#ifndef __unused
|
||||||
|
#define __unused(expression) do { (void)(expression); } while(0)
|
||||||
|
#endif
|
||||||
|
|
||||||
struct RsxDmaControl;
|
struct RsxDmaControl;
|
||||||
|
|
||||||
|
@ -17,8 +28,10 @@ namespace rsx
|
||||||
{
|
{
|
||||||
NOP = 0,
|
NOP = 0,
|
||||||
FIFO_EMPTY = 0xDEADF1F0,
|
FIFO_EMPTY = 0xDEADF1F0,
|
||||||
|
FIFO_BUSY = 0xBABEF1F0,
|
||||||
FIFO_PACKET_BEGIN = 0xF1F0,
|
FIFO_PACKET_BEGIN = 0xF1F0,
|
||||||
FIFO_DISABLED_COMMAND = 0xF1F4,
|
FIFO_DISABLED_COMMAND = 0xF1F4,
|
||||||
|
FIFO_DRAW_BARRIER = 0xF1F8,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct register_pair
|
struct register_pair
|
||||||
|
@ -26,21 +39,149 @@ namespace rsx
|
||||||
u32 reg;
|
u32 reg;
|
||||||
u32 value;
|
u32 value;
|
||||||
u32 loc;
|
u32 loc;
|
||||||
|
u32 reserved;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct fifo_buffer_info_t
|
||||||
|
{
|
||||||
|
u32 start_loc;
|
||||||
|
u32 length;
|
||||||
|
u32 num_draw_calls;
|
||||||
|
u32 draw_call_distance_weight;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct branch_target_info_t
|
||||||
|
{
|
||||||
|
u32 branch_target;
|
||||||
|
u32 branch_origin;
|
||||||
|
s64 weight;
|
||||||
|
u64 checksum_16;
|
||||||
|
u64 reserved;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct optimization_pass
|
struct optimization_pass
|
||||||
{
|
{
|
||||||
virtual void optimize(std::vector<register_pair>& commands, const u32* registers) const = 0;
|
virtual void optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands, const u32* registers) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct flattening_pass : public optimization_pass
|
struct flattening_pass : public optimization_pass
|
||||||
{
|
{
|
||||||
void optimize(std::vector<register_pair>& commands, const u32* registers) const override;
|
private:
|
||||||
|
std::array<bool, 0x10000 / 4> m_skippable_registers;
|
||||||
|
|
||||||
|
public:
|
||||||
|
flattening_pass();
|
||||||
|
void optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands, const u32* registers) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct reordering_pass : public optimization_pass
|
struct reordering_pass : public optimization_pass
|
||||||
{
|
{
|
||||||
void optimize(std::vector<register_pair>& commands, const u32* registers) const override;
|
private:
|
||||||
|
|
||||||
|
struct instruction_buffer_t
|
||||||
|
{
|
||||||
|
std::unordered_map<u32, u32> m_storage;
|
||||||
|
simple_array<u32> m_insertion_order;
|
||||||
|
|
||||||
|
instruction_buffer_t()
|
||||||
|
{
|
||||||
|
m_insertion_order.reserve(64);
|
||||||
|
}
|
||||||
|
|
||||||
|
void add_cmd(u32 reg, u32 value)
|
||||||
|
{
|
||||||
|
const auto is_new = std::get<1>(m_storage.insert_or_assign(reg, value));
|
||||||
|
if (!is_new)
|
||||||
|
{
|
||||||
|
for (auto &loc : m_insertion_order)
|
||||||
|
{
|
||||||
|
if (loc == reg)
|
||||||
|
{
|
||||||
|
loc |= 0x80000000;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m_insertion_order.push_back(reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear()
|
||||||
|
{
|
||||||
|
m_storage.clear();
|
||||||
|
m_insertion_order.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void swap(instruction_buffer_t& other)
|
||||||
|
{
|
||||||
|
m_storage.swap(other.m_storage);
|
||||||
|
m_insertion_order.swap(other.m_insertion_order);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto size() const
|
||||||
|
{
|
||||||
|
return m_storage.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::pair<u32, u32> get(int index) const
|
||||||
|
{
|
||||||
|
const auto key = m_insertion_order[index];
|
||||||
|
if (key & 0x80000000)
|
||||||
|
{
|
||||||
|
// Disabled by a later write to the same register
|
||||||
|
// TODO: Track command type registers and avoid this
|
||||||
|
return { FIFO_DISABLED_COMMAND, 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto value = m_storage.at(key);
|
||||||
|
return { key, value };
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator == (const instruction_buffer_t& other) const
|
||||||
|
{
|
||||||
|
if (size() == other.size())
|
||||||
|
{
|
||||||
|
for (const auto &e : other.m_storage)
|
||||||
|
{
|
||||||
|
const auto found = m_storage.find(e.first);
|
||||||
|
if (found == m_storage.end())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (found->second != e.second)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct draw_call
|
||||||
|
{
|
||||||
|
instruction_buffer_t prologue;
|
||||||
|
std::vector<register_pair> draws;
|
||||||
|
bool write_prologue;
|
||||||
|
u32 primitive_type;
|
||||||
|
const register_pair* start_pos;
|
||||||
|
|
||||||
|
bool matches(const instruction_buffer_t setup, u32 prim) const
|
||||||
|
{
|
||||||
|
if (prim != primitive_type)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return prologue == setup;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
instruction_buffer_t registers_changed;
|
||||||
|
std::vector<draw_call> bins;
|
||||||
|
|
||||||
|
std::unordered_multimap<u32, fifo_buffer_info_t> m_results_prediction_table;
|
||||||
|
|
||||||
|
public:
|
||||||
|
void optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands, const u32* registers) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class FIFO_control
|
class FIFO_control
|
||||||
|
@ -48,28 +189,58 @@ namespace rsx
|
||||||
RsxDmaControl* m_ctrl = nullptr;
|
RsxDmaControl* m_ctrl = nullptr;
|
||||||
u32 m_internal_get = 0;
|
u32 m_internal_get = 0;
|
||||||
|
|
||||||
|
std::shared_ptr<thread_base> m_prefetcher_thread;
|
||||||
|
u32 m_prefetch_get = 0;
|
||||||
|
atomic_t<bool> m_prefetcher_busy{ false };
|
||||||
|
atomic_t<bool> m_fifo_busy{ false };
|
||||||
|
fifo_buffer_info_t m_prefetcher_info;
|
||||||
|
bool m_prefetcher_speculating;
|
||||||
|
|
||||||
std::vector<std::unique_ptr<optimization_pass>> m_optimization_passes;
|
std::vector<std::unique_ptr<optimization_pass>> m_optimization_passes;
|
||||||
|
|
||||||
std::vector<register_pair> m_queue;
|
simple_array<register_pair> m_queue;
|
||||||
|
simple_array<register_pair> m_prefetched_queue;
|
||||||
atomic_t<u32> m_command_index{ 0 };
|
atomic_t<u32> m_command_index{ 0 };
|
||||||
|
|
||||||
bool is_blocking_cmd(u32 cmd);
|
shared_mutex m_prefetch_mutex; // Guards prefetch queue
|
||||||
bool is_sync_cmd(u32 cmd);
|
shared_mutex m_queue_mutex; // Guards primary queue
|
||||||
|
atomic_t<u64> m_ctrl_tag{ 0 }; // 'Guards' control registers
|
||||||
|
|
||||||
void read_ahead();
|
register_pair empty_cmd { FIFO_EMPTY };
|
||||||
void optimize();
|
register_pair busy_cmd { FIFO_BUSY };
|
||||||
|
|
||||||
|
u32 m_memwatch_addr = 0;
|
||||||
|
u32 m_memwatch_cmp = 0;
|
||||||
|
|
||||||
|
fifo_buffer_info_t m_fifo_info;
|
||||||
|
std::unordered_multimap<u32, branch_target_info_t> m_branch_prediction_table;
|
||||||
|
|
||||||
|
void read_ahead(fifo_buffer_info_t& info, simple_array<register_pair>& commands, u32& get_pointer);
|
||||||
|
void optimize(const fifo_buffer_info_t& info, simple_array<register_pair>& commands);
|
||||||
void clear_buffer();
|
void clear_buffer();
|
||||||
|
|
||||||
|
u32 get_likely_target(u32 source);
|
||||||
|
void report_branch_miss(u32 source, u32 target, u32 actual);
|
||||||
|
void report_branch_hit(u32 source, u32 target);
|
||||||
|
bool test_prefetcher_correctness(u32 actual_target);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
FIFO_control(rsx::thread* pctrl);
|
FIFO_control(rsx::thread* pctrl);
|
||||||
~FIFO_control() {}
|
~FIFO_control() {}
|
||||||
|
|
||||||
void set_get(u32 get);
|
void set_get(u32 get, bool spinning = false);
|
||||||
void set_put(u32 put);
|
void set_put(u32 put);
|
||||||
|
|
||||||
register_pair read();
|
const register_pair& read();
|
||||||
|
inline const register_pair& read_unsafe();
|
||||||
|
|
||||||
void register_optimization_pass(optimization_pass* pass);
|
void register_optimization_pass(optimization_pass* pass);
|
||||||
|
|
||||||
|
void finalize();
|
||||||
|
|
||||||
|
public:
|
||||||
|
static bool is_blocking_cmd(u32 cmd);
|
||||||
|
static bool is_sync_cmd(u32 cmd);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -42,7 +42,135 @@ namespace rsx
|
||||||
std::function<bool(u32 addr, bool is_writing)> g_access_violation_handler;
|
std::function<bool(u32 addr, bool is_writing)> g_access_violation_handler;
|
||||||
thread* g_current_renderer = nullptr;
|
thread* g_current_renderer = nullptr;
|
||||||
|
|
||||||
//TODO: Restore a working shaders cache
|
#pragma optimize("", off)
|
||||||
|
void run_tests()
|
||||||
|
{
|
||||||
|
#if 0
|
||||||
|
if (0)
|
||||||
|
{
|
||||||
|
auto _get_method_name = [](u32 reg) -> std::string
|
||||||
|
{
|
||||||
|
if (reg == FIFO::FIFO_DISABLED_COMMAND)
|
||||||
|
{
|
||||||
|
return "COMMAND DISABLED";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (reg == FIFO::FIFO_PACKET_BEGIN)
|
||||||
|
{
|
||||||
|
return "PACKET BEGIN";
|
||||||
|
}
|
||||||
|
|
||||||
|
return rsx::get_method_name(reg >> 2);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto _dump_commands = [&](const std::vector<FIFO::register_pair>& commands)
|
||||||
|
{
|
||||||
|
LOG_ERROR(RSX, "DUMP BEGINS--------------------------------");
|
||||||
|
for (const auto &cmd : commands)
|
||||||
|
{
|
||||||
|
LOG_ERROR(RSX, "%s (0x%x)", _get_method_name(cmd.reg), cmd.value);
|
||||||
|
}
|
||||||
|
LOG_ERROR(RSX, "DUMP ENDS--------------------------------");
|
||||||
|
};
|
||||||
|
|
||||||
|
// Test
|
||||||
|
std::vector<FIFO::register_pair> fake_commands =
|
||||||
|
{
|
||||||
|
{ NV4097_SET_TEXTURE_OFFSET << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 1) << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 2) << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 3) << 2, 0xdeadbeef },
|
||||||
|
{ NV4097_SET_TEXTURE_CONTROL3 << 2, 0x100000},
|
||||||
|
{ NV4097_INVALIDATE_VERTEX_FILE << 2, 0 },
|
||||||
|
{ NV4097_SET_BEGIN_END << 2, 5 },
|
||||||
|
{ NV4097_DRAW_ARRAYS << 2, 0xff000000 },
|
||||||
|
{ NV4097_SET_BEGIN_END << 2, 0},
|
||||||
|
|
||||||
|
{ NV4097_SET_TEXTURE_OFFSET << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 1) << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 2) << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 3) << 2, 0xcafebabe },
|
||||||
|
{ NV4097_SET_TEXTURE_CONTROL3 << 2, 0x100000},
|
||||||
|
{ NV4097_INVALIDATE_VERTEX_FILE << 2, 0 },
|
||||||
|
{ NV4097_SET_BEGIN_END << 2, 5 },
|
||||||
|
{ NV4097_DRAW_ARRAYS << 2, 0xff0000ff },
|
||||||
|
{ NV4097_SET_BEGIN_END << 2, 0},
|
||||||
|
|
||||||
|
{ NV4097_SET_TEXTURE_OFFSET << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 1) << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 2) << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 3) << 2, 0xdeadbeef },
|
||||||
|
{ NV4097_SET_TEXTURE_CONTROL3 << 2, 0x100000},
|
||||||
|
{ NV4097_INVALIDATE_VERTEX_FILE << 2, 0 },
|
||||||
|
{ NV4097_SET_BEGIN_END << 2, 5 },
|
||||||
|
{ NV4097_DRAW_ARRAYS << 2, 0xff0001fe },
|
||||||
|
{ NV4097_SET_BEGIN_END << 2, 0},
|
||||||
|
|
||||||
|
{ 0xffffffff, 0 },
|
||||||
|
|
||||||
|
{ NV4097_SET_TEXTURE_OFFSET << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 1) << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 2) << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 3) << 2, 0xcafebabe },
|
||||||
|
{ NV4097_SET_TEXTURE_CONTROL3 << 2, 0x100000},
|
||||||
|
{ NV4097_INVALIDATE_VERTEX_FILE << 2, 0 },
|
||||||
|
{ NV4097_SET_BEGIN_END << 2, 5 },
|
||||||
|
{ NV4097_DRAW_ARRAYS << 2, 0xff0002fd },
|
||||||
|
{ NV4097_SET_BEGIN_END << 2, 0},
|
||||||
|
|
||||||
|
{ NV4097_SET_TEXTURE_OFFSET << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 1) << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 2) << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 3) << 2, 0xdeadbeef },
|
||||||
|
{ NV4097_SET_TEXTURE_CONTROL3 << 2, 0x100000},
|
||||||
|
{ NV4097_INVALIDATE_VERTEX_FILE << 2, 0 },
|
||||||
|
{ NV4097_SET_BEGIN_END << 2, 5 },
|
||||||
|
{ NV4097_DRAW_ARRAYS << 2, 0xff0003fc },
|
||||||
|
{ NV4097_SET_BEGIN_END << 2, 0},
|
||||||
|
|
||||||
|
{ NV4097_SET_TEXTURE_OFFSET << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 1) << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 2) << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 3) << 2, 0xcafebabe },
|
||||||
|
{ NV4097_SET_TEXTURE_CONTROL3 << 2, 0x100000},
|
||||||
|
{ NV4097_INVALIDATE_VERTEX_FILE << 2, 0 },
|
||||||
|
{ NV4097_SET_BEGIN_END << 2, 5 },
|
||||||
|
{ NV4097_DRAW_ARRAYS << 2, 0xff0004fb },
|
||||||
|
{ NV4097_SET_BEGIN_END << 2, 0},
|
||||||
|
|
||||||
|
{ NV4097_SET_TEXTURE_OFFSET << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 1) << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 2) << 2, 0xdeadbeef },
|
||||||
|
{ (NV4097_SET_TEXTURE_OFFSET + 3) << 2, 0xdeadbeef },
|
||||||
|
{ NV4097_SET_TEXTURE_CONTROL3 << 2, 0x100000},
|
||||||
|
{ NV4097_INVALIDATE_VERTEX_FILE << 2, 0 },
|
||||||
|
{ NV4097_SET_BEGIN_END << 2, 5 },
|
||||||
|
{ NV4097_DRAW_ARRAYS << 2, 0xff0005fa },
|
||||||
|
{ NV4097_SET_BEGIN_END << 2, 0},
|
||||||
|
|
||||||
|
{ 0xffffffff, 0xdead },
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<u32> fake_registers(16384);
|
||||||
|
std::fill(fake_registers.begin(), fake_registers.end(), 0u);
|
||||||
|
|
||||||
|
FIFO::flattening_pass flattening_pass;
|
||||||
|
FIFO::reordering_pass reordering_pass;
|
||||||
|
|
||||||
|
FIFO::fifo_buffer_info_t info{ 0, fake_commands.size() * 4, /*7*/18, 0 };
|
||||||
|
flattening_pass.optimize(info, fake_commands, fake_registers.data());
|
||||||
|
|
||||||
|
_dump_commands(fake_commands);
|
||||||
|
|
||||||
|
reordering_pass.optimize(info, fake_commands, fake_registers.data());
|
||||||
|
|
||||||
|
_dump_commands(fake_commands);
|
||||||
|
|
||||||
|
LOG_ERROR(RSX, "FINISHED TEST");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#pragma optimize("", on)
|
||||||
|
|
||||||
u32 get_address(u32 offset, u32 location)
|
u32 get_address(u32 offset, u32 location)
|
||||||
{
|
{
|
||||||
|
@ -97,8 +225,10 @@ namespace rsx
|
||||||
return get_current_renderer()->ctxt_addr + offset;
|
return get_current_renderer()->ctxt_addr + offset;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
{
|
||||||
fmt::throw_exception("Invalid location (offset=0x%x, location=0x%x)" HERE, offset, location);
|
fmt::throw_exception("Invalid location (offset=0x%x, location=0x%x)" HERE, offset, location);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size)
|
u32 get_vertex_type_size_on_host(vertex_base_type type, u32 size)
|
||||||
|
@ -289,23 +419,7 @@ namespace rsx
|
||||||
conditional_render_test_address = 0;
|
conditional_render_test_address = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
rsx::method_registers.current_draw_clause.inline_vertex_array.resize(0);
|
|
||||||
in_begin_end = true;
|
in_begin_end = true;
|
||||||
|
|
||||||
switch (rsx::method_registers.current_draw_clause.primitive)
|
|
||||||
{
|
|
||||||
case rsx::primitive_type::line_loop:
|
|
||||||
case rsx::primitive_type::line_strip:
|
|
||||||
case rsx::primitive_type::polygon:
|
|
||||||
case rsx::primitive_type::quad_strip:
|
|
||||||
case rsx::primitive_type::triangle_fan:
|
|
||||||
case rsx::primitive_type::triangle_strip:
|
|
||||||
// Adjacency matters for these types
|
|
||||||
rsx::method_registers.current_draw_clause.is_disjoint_primitive = false;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
rsx::method_registers.current_draw_clause.is_disjoint_primitive = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value)
|
void thread::append_to_push_buffer(u32 attribute, u32 size, u32 subreg_index, vertex_base_type type, u32 value)
|
||||||
|
@ -348,15 +462,15 @@ namespace rsx
|
||||||
m_graphics_state |= rsx::pipeline_state::framebuffer_reads_dirty;
|
m_graphics_state |= rsx::pipeline_state::framebuffer_reads_dirty;
|
||||||
ROP_sync_timestamp = get_system_time();
|
ROP_sync_timestamp = get_system_time();
|
||||||
|
|
||||||
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
|
for (auto & push_buf : vertex_push_buffers)
|
||||||
{
|
{
|
||||||
//Disabled, see https://github.com/RPCS3/rpcs3/issues/1932
|
//Disabled, see https://github.com/RPCS3/rpcs3/issues/1932
|
||||||
//rsx::method_registers.register_vertex_info[index].size = 0;
|
//rsx::method_registers.register_vertex_info[index].size = 0;
|
||||||
|
|
||||||
vertex_push_buffers[index].clear();
|
push_buf.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
element_push_buffer.resize(0);
|
element_push_buffer.clear();
|
||||||
|
|
||||||
if (zcull_ctrl->active)
|
if (zcull_ctrl->active)
|
||||||
zcull_ctrl->on_draw();
|
zcull_ctrl->on_draw();
|
||||||
|
@ -397,6 +511,7 @@ namespace rsx
|
||||||
void thread::on_task()
|
void thread::on_task()
|
||||||
{
|
{
|
||||||
m_rsx_thread = std::this_thread::get_id();
|
m_rsx_thread = std::this_thread::get_id();
|
||||||
|
run_tests();
|
||||||
|
|
||||||
if (supports_native_ui)
|
if (supports_native_ui)
|
||||||
{
|
{
|
||||||
|
@ -430,8 +545,8 @@ namespace rsx
|
||||||
|
|
||||||
fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this);
|
fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this);
|
||||||
|
|
||||||
fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass());
|
//fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass());
|
||||||
//fifo_ctrl->register_optimization_pass(new FIFO::reordering_pass());
|
//fifo_ctrl->register_optimization_pass(new FIFO::reordering_pass()); // R&C2 - Not working if flattening is also enabled!!!
|
||||||
//fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass());
|
//fifo_ctrl->register_optimization_pass(new FIFO::flattening_pass());
|
||||||
|
|
||||||
last_flip_time = get_system_time() - 1000000;
|
last_flip_time = get_system_time() - 1000000;
|
||||||
|
@ -539,6 +654,29 @@ namespace rsx
|
||||||
void thread::on_exit()
|
void thread::on_exit()
|
||||||
{
|
{
|
||||||
m_rsx_thread_exiting = true;
|
m_rsx_thread_exiting = true;
|
||||||
|
|
||||||
|
if (m_vblank_thread)
|
||||||
|
{
|
||||||
|
m_vblank_thread->join();
|
||||||
|
m_vblank_thread.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m_decompiler_thread)
|
||||||
|
{
|
||||||
|
m_decompiler_thread->join();
|
||||||
|
m_decompiler_thread.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fifo_ctrl)
|
||||||
|
{
|
||||||
|
fifo_ctrl->finalize();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string thread::get_name() const
|
||||||
|
{
|
||||||
|
return "rsx::thread";
|
||||||
|
>>>>>>> rsx: Fixups
|
||||||
}
|
}
|
||||||
|
|
||||||
void thread::fill_scale_offset_data(void *buffer, bool flip_y) const
|
void thread::fill_scale_offset_data(void *buffer, bool flip_y) const
|
||||||
|
@ -740,7 +878,7 @@ namespace rsx
|
||||||
return t + timestamp_subvalue;
|
return t + timestamp_subvalue;
|
||||||
}
|
}
|
||||||
|
|
||||||
gsl::span<const gsl::byte> thread::get_raw_index_array(const std::vector<draw_range_t>& draw_indexed_clause) const
|
gsl::span<const gsl::byte> thread::get_raw_index_array(const draw_clause& draw_indexed_clause) const
|
||||||
{
|
{
|
||||||
if (element_push_buffer.size())
|
if (element_push_buffer.size())
|
||||||
{
|
{
|
||||||
|
@ -755,49 +893,29 @@ namespace rsx
|
||||||
bool is_primitive_restart_enabled = rsx::method_registers.restart_index_enabled();
|
bool is_primitive_restart_enabled = rsx::method_registers.restart_index_enabled();
|
||||||
u32 primitive_restart_index = rsx::method_registers.restart_index();
|
u32 primitive_restart_index = rsx::method_registers.restart_index();
|
||||||
|
|
||||||
u32 min_index = UINT32_MAX;
|
const u32 first = draw_indexed_clause.min_index();
|
||||||
u32 max_index = 0;
|
const u32 count = draw_indexed_clause.get_elements_count();
|
||||||
|
|
||||||
for (const auto &range : draw_indexed_clause)
|
|
||||||
{
|
|
||||||
const u32 root_index = (range.command_data_offset / type_size) + range.first;
|
|
||||||
min_index = std::min(root_index, min_index);
|
|
||||||
max_index = std::max(root_index + range.count, max_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
const u32 first = min_index;
|
|
||||||
const u32 count = max_index - min_index;
|
|
||||||
|
|
||||||
const gsl::byte* ptr = static_cast<const gsl::byte*>(vm::base(address));
|
const gsl::byte* ptr = static_cast<const gsl::byte*>(vm::base(address));
|
||||||
return{ ptr + first * type_size, count * type_size };
|
return{ ptr + first * type_size, count * type_size };
|
||||||
}
|
}
|
||||||
|
|
||||||
gsl::span<const gsl::byte> thread::get_raw_vertex_buffer(const rsx::data_array_format_info& vertex_array_info, u32 base_offset, const std::vector<draw_range_t>& vertex_ranges) const
|
gsl::span<const gsl::byte> thread::get_raw_vertex_buffer(const rsx::data_array_format_info& vertex_array_info, u32 base_offset, const draw_clause& draw_array_clause) const
|
||||||
{
|
{
|
||||||
u32 offset = vertex_array_info.offset();
|
u32 offset = vertex_array_info.offset();
|
||||||
u32 address = rsx::get_address(rsx::get_vertex_offset_from_base(base_offset, offset & 0x7fffffff), offset >> 31);
|
u32 address = rsx::get_address(rsx::get_vertex_offset_from_base(base_offset, offset & 0x7fffffff), offset >> 31);
|
||||||
|
|
||||||
u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array_info.type(), vertex_array_info.size());
|
u32 element_size = rsx::get_vertex_type_size_on_host(vertex_array_info.type(), vertex_array_info.size());
|
||||||
|
|
||||||
u32 min_index = UINT32_MAX;
|
const u32 first = draw_array_clause.min_index();
|
||||||
u32 max_index = 0;
|
const u32 count = draw_array_clause.get_elements_count();
|
||||||
|
|
||||||
for (const auto &range : vertex_ranges)
|
|
||||||
{
|
|
||||||
const auto root_index = (range.command_data_offset / vertex_array_info.stride()) + range.first;
|
|
||||||
min_index = std::min(root_index, min_index);
|
|
||||||
max_index = std::max(root_index + range.count, max_index);
|
|
||||||
}
|
|
||||||
|
|
||||||
const u32 first = min_index;
|
|
||||||
const u32 count = max_index - min_index;
|
|
||||||
|
|
||||||
const gsl::byte* ptr = gsl::narrow_cast<const gsl::byte*>(vm::base(address));
|
const gsl::byte* ptr = gsl::narrow_cast<const gsl::byte*>(vm::base(address));
|
||||||
return {ptr + first * vertex_array_info.stride(), count * vertex_array_info.stride() + element_size};
|
return {ptr + first * vertex_array_info.stride(), count * vertex_array_info.stride() + element_size};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>>
|
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>>
|
||||||
thread::get_vertex_buffers(const rsx::rsx_state& state, const std::vector<draw_range_t>& vertex_ranges, const u64 consumed_attrib_mask) const
|
thread::get_vertex_buffers(const rsx::rsx_state& state, const u64 consumed_attrib_mask) const
|
||||||
{
|
{
|
||||||
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>> result;
|
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>> result;
|
||||||
result.reserve(rsx::limits::vertex_count);
|
result.reserve(rsx::limits::vertex_count);
|
||||||
|
@ -815,7 +933,7 @@ namespace rsx
|
||||||
{
|
{
|
||||||
const rsx::data_array_format_info& info = state.vertex_arrays_info[index];
|
const rsx::data_array_format_info& info = state.vertex_arrays_info[index];
|
||||||
result.push_back(vertex_array_buffer{info.type(), info.size(), info.stride(),
|
result.push_back(vertex_array_buffer{info.type(), info.size(), info.stride(),
|
||||||
get_raw_vertex_buffer(info, state.vertex_data_base_offset(), vertex_ranges), index, true});
|
get_raw_vertex_buffer(info, state.vertex_data_base_offset(), state.current_draw_clause), index, true});
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -854,7 +972,7 @@ namespace rsx
|
||||||
{
|
{
|
||||||
return draw_indexed_array_command
|
return draw_indexed_array_command
|
||||||
{
|
{
|
||||||
get_raw_index_array( rsx::method_registers.current_draw_clause.draw_command_ranges)
|
get_raw_index_array(state.current_draw_clause)
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1301,7 +1419,6 @@ namespace rsx
|
||||||
if (state.current_draw_clause.command == rsx::draw_command::inlined_array)
|
if (state.current_draw_clause.command == rsx::draw_command::inlined_array)
|
||||||
{
|
{
|
||||||
vertex_input_layout result = {};
|
vertex_input_layout result = {};
|
||||||
result.interleaved_blocks.reserve(8);
|
|
||||||
|
|
||||||
interleaved_range_info info = {};
|
interleaved_range_info info = {};
|
||||||
info.interleaved = true;
|
info.interleaved = true;
|
||||||
|
@ -1336,8 +1453,8 @@ namespace rsx
|
||||||
|
|
||||||
const u32 frequency_divider_mask = rsx::method_registers.frequency_divider_operation_mask();
|
const u32 frequency_divider_mask = rsx::method_registers.frequency_divider_operation_mask();
|
||||||
vertex_input_layout result = {};
|
vertex_input_layout result = {};
|
||||||
result.interleaved_blocks.reserve(8);
|
result.interleaved_blocks.reserve(16);
|
||||||
result.referenced_registers.reserve(4);
|
result.referenced_registers.reserve(16);
|
||||||
|
|
||||||
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
|
for (u8 index = 0; index < rsx::limits::vertex_count; ++index)
|
||||||
{
|
{
|
||||||
|
@ -1430,7 +1547,7 @@ namespace rsx
|
||||||
block.base_offset = base_address;
|
block.base_offset = base_address;
|
||||||
block.attribute_stride = info.stride();
|
block.attribute_stride = info.stride();
|
||||||
block.memory_location = info.offset() >> 31;
|
block.memory_location = info.offset() >> 31;
|
||||||
block.locations.reserve(8);
|
block.locations.reserve(16);
|
||||||
block.locations.push_back(index);
|
block.locations.push_back(index);
|
||||||
block.min_divisor = info.frequency();
|
block.min_divisor = info.frequency();
|
||||||
block.all_modulus = !!(frequency_divider_mask & (1 << index));
|
block.all_modulus = !!(frequency_divider_mask & (1 << index));
|
||||||
|
|
|
@ -528,6 +528,8 @@ namespace rsx
|
||||||
virtual void on_decompiler_exit() {}
|
virtual void on_decompiler_exit() {}
|
||||||
virtual bool on_decompiler_task() { return false; }
|
virtual bool on_decompiler_task() { return false; }
|
||||||
|
|
||||||
|
virtual void emit_geometry(u32) {}
|
||||||
|
|
||||||
void run_FIFO();
|
void run_FIFO();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -554,11 +556,11 @@ namespace rsx
|
||||||
void read_barrier(u32 memory_address, u32 memory_range);
|
void read_barrier(u32 memory_address, u32 memory_range);
|
||||||
virtual void sync_hint(FIFO_hint hint) {}
|
virtual void sync_hint(FIFO_hint hint) {}
|
||||||
|
|
||||||
gsl::span<const gsl::byte> get_raw_index_array(const std::vector<draw_range_t>& draw_indexed_clause) const;
|
gsl::span<const gsl::byte> get_raw_index_array(const draw_clause& draw_indexed_clause) const;
|
||||||
gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const std::vector<draw_range_t>& vertex_ranges) const;
|
gsl::span<const gsl::byte> get_raw_vertex_buffer(const rsx::data_array_format_info&, u32 base_offset, const draw_clause& draw_array_clause) const;
|
||||||
|
|
||||||
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>>
|
std::vector<std::variant<vertex_array_buffer, vertex_array_register, empty_vertex_array>>
|
||||||
get_vertex_buffers(const rsx::rsx_state& state, const std::vector<draw_range_t>& vertex_ranges, const u64 consumed_attrib_mask) const;
|
get_vertex_buffers(const rsx::rsx_state& state, const u64 consumed_attrib_mask) const;
|
||||||
|
|
||||||
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
|
std::variant<draw_array_command, draw_indexed_array_command, draw_inlined_array>
|
||||||
get_draw_command(const rsx::rsx_state& state) const;
|
get_draw_command(const rsx::rsx_state& state) const;
|
||||||
|
|
|
@ -603,7 +603,7 @@ VKGSRender::VKGSRender() : GSRender()
|
||||||
std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device);
|
std::tie(pipeline_layout, descriptor_layouts) = get_shared_pipeline_layout(*m_device);
|
||||||
|
|
||||||
//Occlusion
|
//Occlusion
|
||||||
m_occlusion_query_pool.create((*m_device), DESCRIPTOR_MAX_DRAW_CALLS); //Enough for 4k draw calls per pass
|
m_occlusion_query_pool.create((*m_device), OCCLUSION_MAX_POOL_SIZE);
|
||||||
for (int n = 0; n < 128; ++n)
|
for (int n = 0; n < 128; ++n)
|
||||||
m_occlusion_query_data[n].driver_handle = n;
|
m_occlusion_query_data[n].driver_handle = n;
|
||||||
|
|
||||||
|
@ -619,7 +619,7 @@ VKGSRender::VKGSRender() : GSRender()
|
||||||
|
|
||||||
//VRAM allocation
|
//VRAM allocation
|
||||||
m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000);
|
m_attrib_ring_info.create(VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, VK_ATTRIB_RING_BUFFER_SIZE_M * 0x100000, "attrib buffer", 0x400000);
|
||||||
m_uniform_buffer_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "uniform buffer");
|
m_uniform_buffer_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_UBO_RING_BUFFER_SIZE_M * 0x100000, "uniform buffer");
|
||||||
m_transform_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer");
|
m_transform_constants_ring_info.create(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VK_TRANSFORM_CONSTANTS_BUFFER_SIZE_M * 0x100000, "transform constants buffer");
|
||||||
m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer");
|
m_index_buffer_ring_info.create(VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_INDEX_RING_BUFFER_SIZE_M * 0x100000, "index buffer");
|
||||||
m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000);
|
m_texture_upload_buffer_ring_info.create(VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_TEXTURE_UPLOAD_RING_BUFFER_SIZE_M * 0x100000, "texture upload buffer", 32 * 0x100000);
|
||||||
|
@ -849,11 +849,15 @@ bool VKGSRender::on_access_violation(u32 address, bool is_writing)
|
||||||
m_flush_requests.post(sync_timestamp == 0ull);
|
m_flush_requests.post(sync_timestamp == 0ull);
|
||||||
has_queue_ref = true;
|
has_queue_ref = true;
|
||||||
}
|
}
|
||||||
else
|
else if (!vk::is_uninterruptible())
|
||||||
{
|
{
|
||||||
//Flush primary cb queue to sync pending changes (e.g image transitions!)
|
//Flush primary cb queue to sync pending changes (e.g image transitions!)
|
||||||
flush_command_queue();
|
flush_command_queue();
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
LOG_ERROR(RSX, "Fault in uninterruptible code!");
|
||||||
|
}
|
||||||
|
|
||||||
if (sync_timestamp > 0)
|
if (sync_timestamp > 0)
|
||||||
{
|
{
|
||||||
|
@ -1110,6 +1114,145 @@ void VKGSRender::close_render_pass()
|
||||||
render_pass_open = false;
|
render_pass_open = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VKGSRender::emit_geometry(u32 sub_index)
|
||||||
|
{
|
||||||
|
auto &draw_call = rsx::method_registers.current_draw_clause;
|
||||||
|
//std::chrono::time_point<steady_clock> vertex_start = steady_clock::now();
|
||||||
|
|
||||||
|
if (sub_index == 0)
|
||||||
|
{
|
||||||
|
m_vertex_layout = analyse_inputs_interleaved();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!m_vertex_layout.validate())
|
||||||
|
{
|
||||||
|
// No vertex inputs enabled
|
||||||
|
draw_call.end();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sub_index > 0 && draw_call.execute_pipeline_dependencies() & rsx::vertex_base_changed)
|
||||||
|
{
|
||||||
|
// Rebase vertex bases instead of
|
||||||
|
for (auto &info : m_vertex_layout.interleaved_blocks)
|
||||||
|
{
|
||||||
|
const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset();
|
||||||
|
info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto old_persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
|
||||||
|
const auto old_volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
|
||||||
|
|
||||||
|
// Programs data is dependent on vertex state
|
||||||
|
auto upload_info = upload_vertex_data();
|
||||||
|
if (!upload_info.vertex_draw_count)
|
||||||
|
{
|
||||||
|
// Malformed vertex setup; abort
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
//std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
|
||||||
|
//m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
|
||||||
|
|
||||||
|
auto persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
|
||||||
|
auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
|
||||||
|
bool update_descriptors = false;
|
||||||
|
|
||||||
|
if (sub_index == 0)
|
||||||
|
{
|
||||||
|
// Load program execution environment
|
||||||
|
load_program_env(upload_info);
|
||||||
|
update_descriptors = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Update vertex fetch environment
|
||||||
|
update_vertex_env(upload_info);
|
||||||
|
|
||||||
|
if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer)
|
||||||
|
{
|
||||||
|
/* VkDescriptorSetAllocateInfo alloc_info = {};
|
||||||
|
alloc_info.descriptorPool = m_current_frame->descriptor_pool;
|
||||||
|
alloc_info.descriptorSetCount = 1;
|
||||||
|
alloc_info.pSetLayouts = &descriptor_layouts;
|
||||||
|
alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
|
||||||
|
|
||||||
|
VkDescriptorSet new_descriptor_set;
|
||||||
|
CHECK_RESULT(vkAllocateDescriptorSets(*m_device, &alloc_info, &new_descriptor_set));
|
||||||
|
|
||||||
|
VkCopyDescriptorSet copy = {};
|
||||||
|
copy.sType = VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET;
|
||||||
|
copy
|
||||||
|
|
||||||
|
m_current_frame->descriptor_set = new_descriptor_set;
|
||||||
|
m_current_frame->used_descriptors++;
|
||||||
|
|
||||||
|
update_descriptors = true;*/
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (update_descriptors)
|
||||||
|
{
|
||||||
|
m_program->bind_uniform(persistent_buffer, "persistent_input_stream", m_current_frame->descriptor_set);
|
||||||
|
m_program->bind_uniform(volatile_buffer, "volatile_input_stream", m_current_frame->descriptor_set);
|
||||||
|
|
||||||
|
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
//std::chrono::time_point<steady_clock> draw_start = steady_clock::now();
|
||||||
|
//m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_start - vertex_end).count();
|
||||||
|
|
||||||
|
begin_render_pass();
|
||||||
|
|
||||||
|
if (!upload_info.index_info)
|
||||||
|
{
|
||||||
|
if (draw_call.is_single_draw())
|
||||||
|
{
|
||||||
|
vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
u32 vertex_offset = 0;
|
||||||
|
const auto subranges = draw_call.get_subranges();
|
||||||
|
for (const auto &range : subranges)
|
||||||
|
{
|
||||||
|
vkCmdDraw(*m_current_command_buffer, range.count, 1, vertex_offset, 0);
|
||||||
|
vertex_offset += range.count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const VkIndexType index_type = std::get<1>(*upload_info.index_info);
|
||||||
|
const VkDeviceSize offset = std::get<0>(*upload_info.index_info);
|
||||||
|
|
||||||
|
vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type);
|
||||||
|
|
||||||
|
if (rsx::method_registers.current_draw_clause.is_single_draw())
|
||||||
|
{
|
||||||
|
const u32 index_count = upload_info.vertex_draw_count;
|
||||||
|
vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
u32 vertex_offset = 0;
|
||||||
|
const auto subranges = draw_call.get_subranges();
|
||||||
|
for (const auto &range : subranges)
|
||||||
|
{
|
||||||
|
const auto count = get_index_count(draw_call.primitive, range.count);
|
||||||
|
vkCmdDrawIndexed(*m_current_command_buffer, count, 1, vertex_offset, 0, 0);
|
||||||
|
vertex_offset += count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close_render_pass();
|
||||||
|
|
||||||
|
//std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
|
||||||
|
//m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - draw_start).count();
|
||||||
|
}
|
||||||
|
|
||||||
void VKGSRender::end()
|
void VKGSRender::end()
|
||||||
{
|
{
|
||||||
if (skip_frame || !framebuffer_status_valid || renderer_unavailable ||
|
if (skip_frame || !framebuffer_status_valid || renderer_unavailable ||
|
||||||
|
@ -1363,31 +1506,6 @@ void VKGSRender::end()
|
||||||
std::chrono::time_point<steady_clock> program_end = steady_clock::now();
|
std::chrono::time_point<steady_clock> program_end = steady_clock::now();
|
||||||
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_end - program_start).count();
|
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_end - program_start).count();
|
||||||
|
|
||||||
// Programs data is dependent on vertex state
|
|
||||||
std::chrono::time_point<steady_clock> vertex_start = program_end;
|
|
||||||
auto upload_info = upload_vertex_data();
|
|
||||||
std::chrono::time_point<steady_clock> vertex_end = steady_clock::now();
|
|
||||||
m_vertex_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(vertex_end - vertex_start).count();
|
|
||||||
|
|
||||||
if (!upload_info.vertex_draw_count)
|
|
||||||
{
|
|
||||||
// Malformed vertex setup; abort
|
|
||||||
rsx::thread::end();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Load program execution environment
|
|
||||||
program_start = vertex_end;
|
|
||||||
load_program_env(upload_info);
|
|
||||||
|
|
||||||
VkBufferView persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value;
|
|
||||||
VkBufferView volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value;
|
|
||||||
m_program->bind_uniform(persistent_buffer, "persistent_input_stream", m_current_frame->descriptor_set);
|
|
||||||
m_program->bind_uniform(volatile_buffer, "volatile_input_stream", m_current_frame->descriptor_set);
|
|
||||||
|
|
||||||
program_end = steady_clock::now();
|
|
||||||
m_setup_time += std::chrono::duration_cast<std::chrono::microseconds>(program_end - program_start).count();
|
|
||||||
|
|
||||||
textures_start = program_end;
|
textures_start = program_end;
|
||||||
|
|
||||||
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
for (int i = 0; i < rsx::limits::fragment_textures_count; ++i)
|
||||||
|
@ -1453,10 +1571,6 @@ void VKGSRender::end()
|
||||||
textures_end = steady_clock::now();
|
textures_end = steady_clock::now();
|
||||||
m_textures_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
|
m_textures_upload_time += std::chrono::duration_cast<std::chrono::microseconds>(textures_end - textures_start).count();
|
||||||
|
|
||||||
//While vertex upload is an interruptible process, if we made it this far, there's no need to sync anything that occurs past this point
|
|
||||||
//Only textures are synchronized tightly with the GPU and they have been read back above
|
|
||||||
vk::enter_uninterruptible();
|
|
||||||
|
|
||||||
u32 occlusion_id = 0;
|
u32 occlusion_id = 0;
|
||||||
if (m_occlusion_query_active)
|
if (m_occlusion_query_active)
|
||||||
{
|
{
|
||||||
|
@ -1475,21 +1589,9 @@ void VKGSRender::end()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
|
|
||||||
vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr);
|
|
||||||
|
|
||||||
update_draw_state();
|
|
||||||
|
|
||||||
begin_render_pass();
|
|
||||||
|
|
||||||
bool primitive_emulated = false;
|
bool primitive_emulated = false;
|
||||||
vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated);
|
vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated);
|
||||||
|
|
||||||
const bool allow_multidraw = supports_multidraw && !g_cfg.video.disable_FIFO_reordering;
|
|
||||||
const bool single_draw = (!allow_multidraw ||
|
|
||||||
rsx::method_registers.current_draw_clause.draw_command_ranges.size() <= 1 ||
|
|
||||||
rsx::method_registers.current_draw_clause.is_disjoint_primitive);
|
|
||||||
|
|
||||||
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
|
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
|
||||||
{
|
{
|
||||||
//Begin query
|
//Begin query
|
||||||
|
@ -1500,45 +1602,22 @@ void VKGSRender::end()
|
||||||
m_current_command_buffer->flags |= cb_has_occlusion_task;
|
m_current_command_buffer->flags |= cb_has_occlusion_task;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!upload_info.index_info)
|
// While vertex upload is an interruptible process, if we made it this far, there's no need to sync anything that occurs past this point
|
||||||
{
|
// Only textures are synchronized tightly with the GPU and they have been read back above
|
||||||
if (single_draw)
|
vk::enter_uninterruptible();
|
||||||
{
|
|
||||||
vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
const auto base_vertex = rsx::method_registers.current_draw_clause.draw_command_ranges.front().first;
|
|
||||||
for (const auto &range : rsx::method_registers.current_draw_clause.draw_command_ranges)
|
|
||||||
{
|
|
||||||
vkCmdDraw(*m_current_command_buffer, range.count, 1, range.first - base_vertex, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
VkIndexType index_type;
|
|
||||||
const u32 index_count = upload_info.vertex_draw_count;
|
|
||||||
VkDeviceSize offset;
|
|
||||||
|
|
||||||
std::tie(offset, index_type) = *upload_info.index_info;
|
vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline);
|
||||||
vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type);
|
update_draw_state();
|
||||||
|
|
||||||
if (single_draw)
|
u32 sub_index = 0;
|
||||||
{
|
rsx::method_registers.current_draw_clause.begin();
|
||||||
vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0);
|
do
|
||||||
}
|
{
|
||||||
else
|
emit_geometry(sub_index++);
|
||||||
{
|
|
||||||
u32 first_vertex = 0;
|
|
||||||
for (const auto &range : rsx::method_registers.current_draw_clause.draw_command_ranges)
|
|
||||||
{
|
|
||||||
const auto verts = get_index_count(rsx::method_registers.current_draw_clause.primitive, range.count);
|
|
||||||
vkCmdDrawIndexed(*m_current_command_buffer, verts, 1, first_vertex, 0, 0);
|
|
||||||
first_vertex += verts;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
while (rsx::method_registers.current_draw_clause.next());
|
||||||
|
|
||||||
|
vk::leave_uninterruptible();
|
||||||
|
|
||||||
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
|
if (m_occlusion_query_active && (occlusion_id != UINT32_MAX))
|
||||||
{
|
{
|
||||||
|
@ -1546,15 +1625,9 @@ void VKGSRender::end()
|
||||||
m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id);
|
m_occlusion_query_pool.end_query(*m_current_command_buffer, occlusion_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
close_render_pass();
|
|
||||||
vk::leave_uninterruptible();
|
|
||||||
|
|
||||||
m_current_command_buffer->num_draws++;
|
m_current_command_buffer->num_draws++;
|
||||||
m_rtts.on_write();
|
m_rtts.on_write();
|
||||||
|
|
||||||
std::chrono::time_point<steady_clock> draw_end = steady_clock::now();
|
|
||||||
m_draw_time += std::chrono::duration_cast<std::chrono::microseconds>(draw_end - textures_end).count();
|
|
||||||
|
|
||||||
m_draw_calls++;
|
m_draw_calls++;
|
||||||
|
|
||||||
rsx::thread::end();
|
rsx::thread::end();
|
||||||
|
@ -2479,29 +2552,38 @@ void VKGSRender::load_program_env(const vk::vertex_upload_info& vertex_info)
|
||||||
m_graphics_state &= ~handled_flags;
|
m_graphics_state &= ~handled_flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const u32 mr_color_offset[rsx::limits::color_buffers_count] =
|
void VKGSRender::update_vertex_env(const vk::vertex_upload_info& vertex_info)
|
||||||
{
|
{
|
||||||
NV4097_SET_SURFACE_COLOR_AOFFSET,
|
// Vertex base index = vertex_offset + 132
|
||||||
NV4097_SET_SURFACE_COLOR_BOFFSET,
|
// Vertex layout = vertex_offset + 160
|
||||||
NV4097_SET_SURFACE_COLOR_COFFSET,
|
|
||||||
NV4097_SET_SURFACE_COLOR_DOFFSET
|
|
||||||
};
|
|
||||||
|
|
||||||
static const u32 mr_color_dma[rsx::limits::color_buffers_count] =
|
std::array<s32, 16 * 4> vertex_layout;
|
||||||
{
|
fill_vertex_layout_state(m_vertex_layout, vertex_info.allocated_vertex_count, vertex_layout.data(),
|
||||||
NV4097_SET_CONTEXT_DMA_COLOR_A,
|
vertex_info.persistent_window_offset, vertex_info.volatile_window_offset);
|
||||||
NV4097_SET_CONTEXT_DMA_COLOR_B,
|
|
||||||
NV4097_SET_CONTEXT_DMA_COLOR_C,
|
|
||||||
NV4097_SET_CONTEXT_DMA_COLOR_D
|
|
||||||
};
|
|
||||||
|
|
||||||
static const u32 mr_color_pitch[rsx::limits::color_buffers_count] =
|
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, m_vertex_state_buffer_info.offset, 512,
|
||||||
{
|
VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT);
|
||||||
NV4097_SET_SURFACE_PITCH_A,
|
|
||||||
NV4097_SET_SURFACE_PITCH_B,
|
vkCmdUpdateBuffer(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, m_vertex_state_buffer_info.offset + 132, 4, &vertex_info.vertex_index_base);
|
||||||
NV4097_SET_SURFACE_PITCH_C,
|
|
||||||
NV4097_SET_SURFACE_PITCH_D
|
u32 write_offset = m_vertex_state_buffer_info.offset + 160;
|
||||||
};
|
s32 *src_ptr = vertex_layout.data();
|
||||||
|
|
||||||
|
for (const auto& placement : m_vertex_layout.attribute_placement)
|
||||||
|
{
|
||||||
|
constexpr u32 data_len = 4 * sizeof(s32);
|
||||||
|
if (placement != rsx::attribute_buffer_placement::none)
|
||||||
|
{
|
||||||
|
vkCmdUpdateBuffer(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, write_offset, data_len, src_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
write_offset += data_len;
|
||||||
|
src_ptr += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::insert_buffer_memory_barrier(*m_current_command_buffer, m_uniform_buffer_ring_info.heap->value, m_vertex_state_buffer_info.offset, 512,
|
||||||
|
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_UNIFORM_READ_BIT);
|
||||||
|
}
|
||||||
|
|
||||||
void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool skip_reading)
|
void VKGSRender::init_buffers(rsx::framebuffer_creation_context context, bool skip_reading)
|
||||||
{
|
{
|
||||||
|
@ -3048,7 +3130,27 @@ void VKGSRender::flip(int buffer)
|
||||||
|
|
||||||
if (!image_to_flip)
|
if (!image_to_flip)
|
||||||
{
|
{
|
||||||
//Read from cell
|
// Read from cell
|
||||||
|
const auto range = utils::address_range::start_length(absolute_address, buffer_pitch * buffer_height);
|
||||||
|
const auto overlap = m_texture_cache.find_texture_from_range(range);
|
||||||
|
bool flush_queue = false;
|
||||||
|
|
||||||
|
for (const auto & section : overlap)
|
||||||
|
{
|
||||||
|
if (section->get_protection() == utils::protection::no)
|
||||||
|
{
|
||||||
|
section->copy_texture(false, *m_current_command_buffer, m_swapchain->get_graphics_queue());
|
||||||
|
flush_queue = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (flush_queue)
|
||||||
|
{
|
||||||
|
// Submit for processing to lower hard fault penalty
|
||||||
|
flush_command_queue();
|
||||||
|
}
|
||||||
|
|
||||||
|
m_texture_cache.invalidate_range(range, rsx::invalidation_cause::read, *m_current_command_buffer, m_swapchain->get_graphics_queue());
|
||||||
image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height);
|
image_to_flip = m_texture_cache.upload_image_simple(*m_current_command_buffer, absolute_address, buffer_width, buffer_height);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#include "Emu/RSX/GSRender.h"
|
#include "Emu/RSX/GSRender.h"
|
||||||
#include "VKHelpers.h"
|
#include "VKHelpers.h"
|
||||||
#include "VKTextureCache.h"
|
#include "VKTextureCache.h"
|
||||||
|
@ -403,9 +403,11 @@ private:
|
||||||
|
|
||||||
vk::vertex_upload_info upload_vertex_data();
|
vk::vertex_upload_info upload_vertex_data();
|
||||||
|
|
||||||
public:
|
|
||||||
bool load_program();
|
bool load_program();
|
||||||
void load_program_env(const vk::vertex_upload_info& vertex_info);
|
void load_program_env(const vk::vertex_upload_info& upload_info);
|
||||||
|
void update_vertex_env(const vk::vertex_upload_info& upload_info);
|
||||||
|
|
||||||
|
public:
|
||||||
void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false);
|
void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false);
|
||||||
void read_buffers();
|
void read_buffers();
|
||||||
void write_buffers();
|
void write_buffers();
|
||||||
|
@ -422,6 +424,7 @@ public:
|
||||||
protected:
|
protected:
|
||||||
void begin() override;
|
void begin() override;
|
||||||
void end() override;
|
void end() override;
|
||||||
|
void emit_geometry(u32 sub_index) override;
|
||||||
|
|
||||||
void on_init_thread() override;
|
void on_init_thread() override;
|
||||||
void on_exit() override;
|
void on_exit() override;
|
||||||
|
|
|
@ -32,6 +32,7 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define DESCRIPTOR_MAX_DRAW_CALLS 4096
|
#define DESCRIPTOR_MAX_DRAW_CALLS 4096
|
||||||
|
#define OCCLUSION_MAX_POOL_SIZE 8192
|
||||||
|
|
||||||
#define VERTEX_BUFFERS_FIRST_BIND_SLOT 3
|
#define VERTEX_BUFFERS_FIRST_BIND_SLOT 3
|
||||||
#define FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT 2
|
#define FRAGMENT_CONSTANT_BUFFERS_BIND_SLOT 2
|
||||||
|
@ -652,7 +653,7 @@ namespace vk
|
||||||
VkImageTiling tiling,
|
VkImageTiling tiling,
|
||||||
VkImageUsageFlags usage,
|
VkImageUsageFlags usage,
|
||||||
VkImageCreateFlags image_flags)
|
VkImageCreateFlags image_flags)
|
||||||
: m_device(dev)
|
: m_device(dev), current_layout(initial_layout)
|
||||||
{
|
{
|
||||||
info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
|
info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
|
||||||
info.imageType = image_type;
|
info.imageType = image_type;
|
||||||
|
@ -1195,6 +1196,11 @@ namespace vk
|
||||||
return commands;
|
return commands;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_recording() const
|
||||||
|
{
|
||||||
|
return is_open;
|
||||||
|
}
|
||||||
|
|
||||||
void begin()
|
void begin()
|
||||||
{
|
{
|
||||||
if (m_submit_fence && is_pending)
|
if (m_submit_fence && is_pending)
|
||||||
|
@ -2413,8 +2419,8 @@ public:
|
||||||
VkQueryPool query_pool = VK_NULL_HANDLE;
|
VkQueryPool query_pool = VK_NULL_HANDLE;
|
||||||
vk::render_device* owner = nullptr;
|
vk::render_device* owner = nullptr;
|
||||||
|
|
||||||
|
std::deque<u32> available_slots;
|
||||||
std::vector<bool> query_active_status;
|
std::vector<bool> query_active_status;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
void create(vk::render_device &dev, u32 num_entries)
|
void create(vk::render_device &dev, u32 num_entries)
|
||||||
|
@ -2428,6 +2434,12 @@ public:
|
||||||
owner = &dev;
|
owner = &dev;
|
||||||
|
|
||||||
query_active_status.resize(num_entries, false);
|
query_active_status.resize(num_entries, false);
|
||||||
|
available_slots.resize(num_entries);
|
||||||
|
|
||||||
|
for (u32 n = 0; n < num_entries; ++n)
|
||||||
|
{
|
||||||
|
available_slots[n] = n;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void destroy()
|
void destroy()
|
||||||
|
@ -2484,8 +2496,13 @@ public:
|
||||||
|
|
||||||
void reset_query(vk::command_buffer &cmd, u32 index)
|
void reset_query(vk::command_buffer &cmd, u32 index)
|
||||||
{
|
{
|
||||||
vkCmdResetQueryPool(cmd, query_pool, index, 1);
|
if (query_active_status[index])
|
||||||
query_active_status[index] = false;
|
{
|
||||||
|
vkCmdResetQueryPool(cmd, query_pool, index, 1);
|
||||||
|
|
||||||
|
query_active_status[index] = false;
|
||||||
|
available_slots.push_back(index);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void reset_queries(vk::command_buffer &cmd, std::vector<u32> &list)
|
void reset_queries(vk::command_buffer &cmd, std::vector<u32> &list)
|
||||||
|
@ -2505,13 +2522,16 @@ public:
|
||||||
|
|
||||||
u32 find_free_slot()
|
u32 find_free_slot()
|
||||||
{
|
{
|
||||||
for (u32 n = 0; n < query_active_status.size(); n++)
|
if (available_slots.empty())
|
||||||
{
|
{
|
||||||
if (query_active_status[n] == false)
|
return -1u;
|
||||||
return n;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return UINT32_MAX;
|
u32 result = available_slots.front();
|
||||||
|
available_slots.pop_front();
|
||||||
|
|
||||||
|
verify(HERE), !query_active_status[result];
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -106,6 +106,12 @@ namespace
|
||||||
const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
const u32 vertex_count = rsx::method_registers.current_draw_clause.get_elements_count();
|
||||||
const u32 min_index = rsx::method_registers.current_draw_clause.min_index();
|
const u32 min_index = rsx::method_registers.current_draw_clause.min_index();
|
||||||
|
|
||||||
|
//if (rsx::method_registers.current_draw_clause.draw_command_ranges.size() > 1)
|
||||||
|
//{
|
||||||
|
// TODO
|
||||||
|
//LOG_ERROR(RSX, "REEEEEEEEEEEEEEEEEEEEEEE (prims_emulated=%d)", primitives_emulated);
|
||||||
|
//}
|
||||||
|
|
||||||
if (primitives_emulated)
|
if (primitives_emulated)
|
||||||
{
|
{
|
||||||
u32 index_count;
|
u32 index_count;
|
||||||
|
@ -165,7 +171,7 @@ namespace
|
||||||
command.raw_index_buffer, index_type,
|
command.raw_index_buffer, index_type,
|
||||||
rsx::method_registers.current_draw_clause.primitive,
|
rsx::method_registers.current_draw_clause.primitive,
|
||||||
rsx::method_registers.restart_index_enabled(),
|
rsx::method_registers.restart_index_enabled(),
|
||||||
rsx::method_registers.restart_index(), rsx::method_registers.current_draw_clause.draw_command_ranges,
|
rsx::method_registers.restart_index(),
|
||||||
rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !vk::is_primitive_native(prim); });
|
rsx::method_registers.vertex_data_base_index(), [](auto prim) { return !vk::is_primitive_native(prim); });
|
||||||
|
|
||||||
if (min_index >= max_index)
|
if (min_index >= max_index)
|
||||||
|
@ -227,11 +233,6 @@ namespace
|
||||||
|
|
||||||
vk::vertex_upload_info VKGSRender::upload_vertex_data()
|
vk::vertex_upload_info VKGSRender::upload_vertex_data()
|
||||||
{
|
{
|
||||||
m_vertex_layout = analyse_inputs_interleaved();
|
|
||||||
|
|
||||||
if (!m_vertex_layout.validate())
|
|
||||||
return {};
|
|
||||||
|
|
||||||
draw_command_visitor visitor(m_index_buffer_ring_info, m_vertex_layout);
|
draw_command_visitor visitor(m_index_buffer_ring_info, m_vertex_layout);
|
||||||
auto result = std::visit(visitor, get_draw_command(rsx::method_registers));
|
auto result = std::visit(visitor, get_draw_command(rsx::method_registers));
|
||||||
|
|
||||||
|
@ -258,6 +259,8 @@ vk::vertex_upload_info VKGSRender::upload_vertex_data()
|
||||||
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + vertex_base;
|
storage_address = m_vertex_layout.interleaved_blocks[0].real_offset_address + vertex_base;
|
||||||
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first))
|
if (auto cached = m_vertex_cache->find_vertex_range(storage_address, VK_FORMAT_R8_UINT, required.first))
|
||||||
{
|
{
|
||||||
|
verify(HERE), cached->local_address == storage_address;
|
||||||
|
|
||||||
in_cache = true;
|
in_cache = true;
|
||||||
persistent_range_base = cached->offset_in_heap;
|
persistent_range_base = cached->offset_in_heap;
|
||||||
}
|
}
|
||||||
|
|
|
@ -880,12 +880,42 @@ namespace rsx
|
||||||
|
|
||||||
storage_type* find_vertex_range(uintptr_t local_addr, upload_format fmt, u32 data_length) override
|
storage_type* find_vertex_range(uintptr_t local_addr, upload_format fmt, u32 data_length) override
|
||||||
{
|
{
|
||||||
|
const auto data_end = local_addr + data_length;
|
||||||
|
|
||||||
for (auto &v : vertex_ranges[local_addr])
|
for (auto &v : vertex_ranges[local_addr])
|
||||||
{
|
{
|
||||||
if (v.buffer_format == fmt && v.data_length == data_length)
|
if (v.buffer_format == fmt && v.data_length >= data_length)
|
||||||
return &v;
|
return &v;
|
||||||
}
|
}
|
||||||
|
#if 0
|
||||||
|
for (const auto &range : vertex_ranges)
|
||||||
|
{
|
||||||
|
if (range.first > local_addr)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
for (const auto &v : range.second)
|
||||||
|
{
|
||||||
|
if (v.buffer_format == fmt)
|
||||||
|
{
|
||||||
|
const auto entry_end = v.local_address + v.data_length;
|
||||||
|
if (data_end <= entry_end)
|
||||||
|
{
|
||||||
|
const u32 offset = (local_addr - v.local_address);
|
||||||
|
if (offset % 16)
|
||||||
|
continue; // TexelBuffer alignment rules
|
||||||
|
|
||||||
|
storage_type e = v;
|
||||||
|
e.data_length = data_length;
|
||||||
|
e.local_address = local_addr;
|
||||||
|
e.offset_in_heap += offset;
|
||||||
|
|
||||||
|
auto& ret = vertex_ranges[local_addr].emplace_back(e);
|
||||||
|
return &ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -423,9 +423,7 @@ namespace rsx
|
||||||
{
|
{
|
||||||
if (arg)
|
if (arg)
|
||||||
{
|
{
|
||||||
rsx::method_registers.current_draw_clause.draw_command_ranges.clear();
|
rsx::method_registers.current_draw_clause.reset(to_primitive_type(arg));
|
||||||
rsx::method_registers.current_draw_clause.command = draw_command::none;
|
|
||||||
rsx::method_registers.current_draw_clause.primitive = to_primitive_type(arg);
|
|
||||||
rsxthr->begin();
|
rsxthr->begin();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -453,9 +451,9 @@ namespace rsx
|
||||||
else
|
else
|
||||||
rsx::method_registers.current_draw_clause.is_immediate_draw = false;
|
rsx::method_registers.current_draw_clause.is_immediate_draw = false;
|
||||||
|
|
||||||
if (!(rsx::method_registers.current_draw_clause.draw_command_ranges.empty() &&
|
if (!rsx::method_registers.current_draw_clause.empty())
|
||||||
rsx::method_registers.current_draw_clause.inline_vertex_array.empty()))
|
|
||||||
{
|
{
|
||||||
|
rsx::method_registers.current_draw_clause.compile();
|
||||||
rsxthr->end();
|
rsxthr->end();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -598,6 +596,30 @@ namespace rsx
|
||||||
rsx->m_rtts_dirty = true;
|
rsx->m_rtts_dirty = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void set_vertex_base_offset(thread* rsx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
if (rsx->in_begin_end)
|
||||||
|
{
|
||||||
|
// Revert change to queue later
|
||||||
|
method_registers.decode(reg, method_registers.register_previous_value);
|
||||||
|
|
||||||
|
// Insert base mofifier barrier
|
||||||
|
method_registers.current_draw_clause.insert_command_barrier(vertex_base_modifier_barrier, arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_index_base_offset(thread* rsx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
if (rsx->in_begin_end)
|
||||||
|
{
|
||||||
|
// Revert change to queue later
|
||||||
|
method_registers.decode(reg, method_registers.register_previous_value);
|
||||||
|
|
||||||
|
// Insert base mofifier barrier
|
||||||
|
method_registers.current_draw_clause.insert_command_barrier(index_base_modifier_barrier, arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template<u32 index>
|
template<u32 index>
|
||||||
struct set_texture_dirty_bit
|
struct set_texture_dirty_bit
|
||||||
{
|
{
|
||||||
|
@ -1156,6 +1178,13 @@ namespace rsx
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace fifo
|
||||||
|
{
|
||||||
|
void draw_barrier(thread* rsx, u32, u32)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void rsx_state::init()
|
void rsx_state::init()
|
||||||
{
|
{
|
||||||
// Special values set at initialization, these are not set by a context reset
|
// Special values set at initialization, these are not set by a context reset
|
||||||
|
@ -2122,6 +2151,34 @@ namespace rsx
|
||||||
return registers[reg] == value;
|
return registers[reg] == value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 draw_clause::execute_pipeline_dependencies() const
|
||||||
|
{
|
||||||
|
u32 result = 0;
|
||||||
|
|
||||||
|
for (const auto &barrier : draw_command_barriers[current_range_index])
|
||||||
|
{
|
||||||
|
switch (barrier.type)
|
||||||
|
{
|
||||||
|
case primitive_restart_barrier:
|
||||||
|
break;
|
||||||
|
case index_base_modifier_barrier:
|
||||||
|
// Change index base offset
|
||||||
|
method_registers.decode(NV4097_SET_VERTEX_DATA_BASE_INDEX, barrier.arg);
|
||||||
|
result |= index_base_changed;
|
||||||
|
break;
|
||||||
|
case vertex_base_modifier_barrier:
|
||||||
|
// Change vertex base offset
|
||||||
|
method_registers.decode(NV4097_SET_VERTEX_DATA_BASE_OFFSET, barrier.arg);
|
||||||
|
result |= vertex_base_changed;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fmt::throw_exception("Unreachable" HERE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
namespace method_detail
|
namespace method_detail
|
||||||
{
|
{
|
||||||
template<int Id, int Step, int Count, template<u32> class T, int Index = 0>
|
template<int Id, int Step, int Count, template<u32> class T, int Index = 0>
|
||||||
|
@ -2494,6 +2551,7 @@ namespace rsx
|
||||||
|
|
||||||
//Some custom GCM methods
|
//Some custom GCM methods
|
||||||
methods[GCM_SET_DRIVER_OBJECT] = nullptr;
|
methods[GCM_SET_DRIVER_OBJECT] = nullptr;
|
||||||
|
methods[FIFO::FIFO_DRAW_BARRIER] = nullptr;
|
||||||
|
|
||||||
bind_array<GCM_FLIP_HEAD, 1, 2, nullptr>();
|
bind_array<GCM_FLIP_HEAD, 1, 2, nullptr>();
|
||||||
bind_array<GCM_DRIVER_QUEUE, 1, 8, nullptr>();
|
bind_array<GCM_DRIVER_QUEUE, 1, 8, nullptr>();
|
||||||
|
@ -2600,6 +2658,8 @@ namespace rsx
|
||||||
bind<NV4097_SET_SHADER_PROGRAM, nv4097::set_shader_program_dirty>();
|
bind<NV4097_SET_SHADER_PROGRAM, nv4097::set_shader_program_dirty>();
|
||||||
bind<NV4097_SET_TRANSFORM_PROGRAM_START, nv4097::set_transform_program_start>();
|
bind<NV4097_SET_TRANSFORM_PROGRAM_START, nv4097::set_transform_program_start>();
|
||||||
bind<NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, nv4097::set_vertex_attribute_output_mask>();
|
bind<NV4097_SET_VERTEX_ATTRIB_OUTPUT_MASK, nv4097::set_vertex_attribute_output_mask>();
|
||||||
|
bind<NV4097_SET_VERTEX_DATA_BASE_OFFSET, nv4097::set_vertex_base_offset>();
|
||||||
|
bind<NV4097_SET_VERTEX_DATA_BASE_INDEX, nv4097::set_index_base_offset>();
|
||||||
|
|
||||||
//NV308A
|
//NV308A
|
||||||
bind_range<NV308A_COLOR, 1, 256, nv308a::color>();
|
bind_range<NV308A_COLOR, 1, 256, nv308a::color>();
|
||||||
|
@ -2619,6 +2679,8 @@ namespace rsx
|
||||||
// custom methods
|
// custom methods
|
||||||
bind<GCM_FLIP_COMMAND, flip_command>();
|
bind<GCM_FLIP_COMMAND, flip_command>();
|
||||||
|
|
||||||
|
// FIFO
|
||||||
|
bind<FIFO::FIFO_DRAW_BARRIER, fifo::draw_barrier>();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}();
|
}();
|
||||||
|
|
|
@ -3,16 +3,21 @@
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
|
#include <deque>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
#include "GCM.h"
|
#include "GCM.h"
|
||||||
#include "rsx_decode.h"
|
#include "rsx_decode.h"
|
||||||
#include "RSXTexture.h"
|
#include "RSXTexture.h"
|
||||||
#include "rsx_vertex_data.h"
|
#include "rsx_vertex_data.h"
|
||||||
|
#include "rsx_utils.h"
|
||||||
#include "Utilities/geometry.h"
|
#include "Utilities/geometry.h"
|
||||||
|
|
||||||
#include <cereal/types/array.hpp>
|
#include <cereal/types/array.hpp>
|
||||||
#include <cereal/types/unordered_map.hpp>
|
#include <cereal/types/unordered_map.hpp>
|
||||||
|
|
||||||
|
extern u64 get_system_time();
|
||||||
|
|
||||||
namespace rsx
|
namespace rsx
|
||||||
{
|
{
|
||||||
enum class draw_command
|
enum class draw_command
|
||||||
|
@ -23,6 +28,39 @@ namespace rsx
|
||||||
indexed,
|
indexed,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum command_barrier_type : u32
|
||||||
|
{
|
||||||
|
primitive_restart_barrier,
|
||||||
|
vertex_base_modifier_barrier,
|
||||||
|
index_base_modifier_barrier
|
||||||
|
};
|
||||||
|
|
||||||
|
enum command_execution_flags : u32
|
||||||
|
{
|
||||||
|
vertex_base_changed = (1 << 0),
|
||||||
|
index_base_changed = (1 << 1)
|
||||||
|
};
|
||||||
|
|
||||||
|
struct barrier_t
|
||||||
|
{
|
||||||
|
u64 timestamp;
|
||||||
|
|
||||||
|
u32 address;
|
||||||
|
u32 arg;
|
||||||
|
u32 flags;
|
||||||
|
command_barrier_type type;
|
||||||
|
|
||||||
|
bool operator < (const barrier_t& other) const
|
||||||
|
{
|
||||||
|
if (address != -1u)
|
||||||
|
{
|
||||||
|
return address < other.address;
|
||||||
|
}
|
||||||
|
|
||||||
|
return timestamp < other.timestamp;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct draw_range_t
|
struct draw_range_t
|
||||||
{
|
{
|
||||||
u32 command_data_offset = 0;
|
u32 command_data_offset = 0;
|
||||||
|
@ -30,55 +68,273 @@ namespace rsx
|
||||||
u32 count = 0;
|
u32 count = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct draw_clause
|
class draw_clause
|
||||||
{
|
{
|
||||||
|
// Stores the first and count argument from draw/draw indexed parameters between begin/end clauses.
|
||||||
|
simple_array<draw_range_t> draw_command_ranges;
|
||||||
|
|
||||||
|
// Stores rasterization barriers for primitive types sensitive to adjacency
|
||||||
|
std::vector<std::set<barrier_t>> draw_command_barriers;
|
||||||
|
|
||||||
|
// Counter used to parse the commands in order
|
||||||
|
u32 current_range_index;
|
||||||
|
|
||||||
|
// Location of last execution barrier
|
||||||
|
u32 last_execution_barrier_index;
|
||||||
|
|
||||||
|
// Helper functions
|
||||||
|
// Add a new draw command
|
||||||
|
void append_draw_command(const draw_range_t& range)
|
||||||
|
{
|
||||||
|
draw_command_ranges.push_back(range);
|
||||||
|
draw_command_barriers.push_back({});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert a new draw command within the others
|
||||||
|
void insert_draw_command(int index, const draw_range_t& range)
|
||||||
|
{
|
||||||
|
auto range_It = draw_command_ranges.begin();
|
||||||
|
auto barrier_It = draw_command_barriers.begin();
|
||||||
|
|
||||||
|
// Because deque::insert fails with initializer list on MSVC
|
||||||
|
const std::set<barrier_t> new_barrier;
|
||||||
|
|
||||||
|
while (index--)
|
||||||
|
{
|
||||||
|
++range_It;
|
||||||
|
++barrier_It;
|
||||||
|
}
|
||||||
|
|
||||||
|
draw_command_ranges.insert(range_It, range);
|
||||||
|
draw_command_barriers.insert(barrier_It, new_barrier);
|
||||||
|
|
||||||
|
verify(HERE), draw_command_ranges.size() == draw_command_barriers.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
primitive_type primitive;
|
primitive_type primitive;
|
||||||
draw_command command;
|
draw_command command;
|
||||||
|
|
||||||
bool is_immediate_draw;
|
bool is_immediate_draw;
|
||||||
bool is_disjoint_primitive;
|
bool is_disjoint_primitive;
|
||||||
|
|
||||||
std::vector<u32> inline_vertex_array;
|
simple_array<u32> inline_vertex_array;
|
||||||
|
|
||||||
|
void insert_command_barrier(command_barrier_type type, u32 arg)
|
||||||
|
{
|
||||||
|
verify(HERE), !draw_command_ranges.empty();
|
||||||
|
|
||||||
|
if (type == primitive_restart_barrier)
|
||||||
|
{
|
||||||
|
// Rasterization flow barrier
|
||||||
|
const auto& last = draw_command_ranges.back();
|
||||||
|
const auto address = last.first + last.count;
|
||||||
|
|
||||||
|
const auto command_index = draw_command_ranges.size() - 1;
|
||||||
|
draw_command_barriers[command_index].insert({ 0, address, arg, 0, type });
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Execution dependency barrier
|
||||||
|
append_draw_command({});
|
||||||
|
const auto command_index = draw_command_ranges.size() - 1;
|
||||||
|
|
||||||
|
draw_command_barriers[command_index].insert({ get_system_time(), -1u, arg, 0, type });
|
||||||
|
last_execution_barrier_index = command_index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stores the first and count argument from draw/draw indexed parameters between begin/end clauses.
|
* Optimize commands for rendering
|
||||||
*/
|
*/
|
||||||
std::vector<draw_range_t> draw_command_ranges;
|
void compile()
|
||||||
|
{
|
||||||
|
// TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Insert one command range
|
||||||
|
*/
|
||||||
|
|
||||||
|
void append(u32 first, u32 count)
|
||||||
|
{
|
||||||
|
if (!draw_command_ranges.empty())
|
||||||
|
{
|
||||||
|
auto& last = draw_command_ranges.back();
|
||||||
|
|
||||||
|
if (last.count == 0)
|
||||||
|
{
|
||||||
|
// Special case, usually indicates an execution barrier
|
||||||
|
last.first = first;
|
||||||
|
last.count = count;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (last.first + last.count == first)
|
||||||
|
{
|
||||||
|
if (!is_disjoint_primitive)
|
||||||
|
{
|
||||||
|
// Insert barrier
|
||||||
|
insert_command_barrier(primitive_restart_barrier, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
last.count += count;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int index = last_execution_barrier_index; index < draw_command_ranges.size(); ++index)
|
||||||
|
{
|
||||||
|
if (draw_command_ranges[index].first == first &&
|
||||||
|
draw_command_ranges[index].count == count)
|
||||||
|
{
|
||||||
|
// Duplicate entry? WTF!
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (draw_command_ranges[index].first > first)
|
||||||
|
{
|
||||||
|
insert_draw_command(index, { 0, first, count });
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
append_draw_command({ 0, first, count });
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns how many vertex or index will be consumed by the draw clause.
|
* Returns how many vertex or index will be consumed by the draw clause.
|
||||||
*/
|
*/
|
||||||
u32 get_elements_count() const
|
u32 get_elements_count() const
|
||||||
{
|
{
|
||||||
u32 count = 0;
|
return get_range().count;
|
||||||
for (const auto &draw : draw_command_ranges)
|
}
|
||||||
|
|
||||||
|
u32 min_index() const
|
||||||
|
{
|
||||||
|
return get_range().first;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool is_single_draw() const
|
||||||
|
{
|
||||||
|
if (is_disjoint_primitive)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (draw_command_ranges.empty())
|
||||||
{
|
{
|
||||||
count += draw.count;
|
verify(HERE), !inline_vertex_array.empty();
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return count;
|
verify(HERE), current_range_index != -1u;
|
||||||
|
for (const auto &barrier : draw_command_barriers[current_range_index])
|
||||||
|
{
|
||||||
|
if (barrier.type == primitive_restart_barrier)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool empty() const
|
||||||
|
{
|
||||||
|
return (draw_command_ranges.empty() && inline_vertex_array.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
void reset(rsx::primitive_type type)
|
||||||
|
{
|
||||||
|
current_range_index = -1u;
|
||||||
|
last_execution_barrier_index = 0;
|
||||||
|
|
||||||
|
command = draw_command::none;
|
||||||
|
primitive = type;
|
||||||
|
|
||||||
|
draw_command_ranges.clear();
|
||||||
|
draw_command_barriers.clear();
|
||||||
|
inline_vertex_array.clear();
|
||||||
|
|
||||||
|
switch (primitive)
|
||||||
|
{
|
||||||
|
case rsx::primitive_type::line_loop:
|
||||||
|
case rsx::primitive_type::line_strip:
|
||||||
|
case rsx::primitive_type::polygon:
|
||||||
|
case rsx::primitive_type::quad_strip:
|
||||||
|
case rsx::primitive_type::triangle_fan:
|
||||||
|
case rsx::primitive_type::triangle_strip:
|
||||||
|
// Adjacency matters for these types
|
||||||
|
is_disjoint_primitive = false;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
is_disjoint_primitive = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void begin()
|
||||||
|
{
|
||||||
|
current_range_index = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void end()
|
||||||
|
{
|
||||||
|
current_range_index = draw_command_ranges.size() - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool next()
|
||||||
|
{
|
||||||
|
current_range_index++;
|
||||||
|
if (current_range_index >= draw_command_ranges.size())
|
||||||
|
{
|
||||||
|
current_range_index = 0;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
verify(HERE), draw_command_ranges[current_range_index].count != 0;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Optimize draw command stream for rendering
|
* Executes commands reqiured to make the current draw state valid
|
||||||
*/
|
*/
|
||||||
void compile()
|
u32 execute_pipeline_dependencies() const;
|
||||||
{
|
|
||||||
|
|
||||||
|
const draw_range_t& get_range() const
|
||||||
|
{
|
||||||
|
verify(HERE), current_range_index < draw_command_ranges.size();
|
||||||
|
return draw_command_ranges[current_range_index];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
simple_array<draw_range_t> get_subranges() const
|
||||||
* Insert one command range
|
|
||||||
*/
|
|
||||||
void append(u32 first, u32 count)
|
|
||||||
{
|
{
|
||||||
|
verify(HERE), !is_single_draw();
|
||||||
|
|
||||||
}
|
const auto range = get_range();
|
||||||
|
const auto limit = range.first + range.count;
|
||||||
|
|
||||||
u32 min_index()
|
simple_array<draw_range_t> ret;
|
||||||
{
|
u32 previous_barrier = range.first;
|
||||||
LOG_FATAL(RSX, "Unimplemented");
|
u32 vertex_counter = 0;
|
||||||
return 0;
|
|
||||||
|
for (const auto &barrier : draw_command_barriers[current_range_index])
|
||||||
|
{
|
||||||
|
if (barrier.type != primitive_restart_barrier)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (barrier.address <= range.first)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (barrier.address >= limit)
|
||||||
|
break;
|
||||||
|
|
||||||
|
const u32 count = barrier.address - previous_barrier;
|
||||||
|
ret.push_back({ 0, vertex_counter, count });
|
||||||
|
previous_barrier = (u32)barrier.address;
|
||||||
|
vertex_counter += count;
|
||||||
|
}
|
||||||
|
|
||||||
|
verify(HERE), !ret.empty(), previous_barrier < limit;
|
||||||
|
ret.push_back({ 0, vertex_counter, limit - previous_barrier });
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -663,4 +663,237 @@ namespace rsx
|
||||||
m_data.store(0);
|
m_data.store(0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename Ty>
|
||||||
|
struct simple_array
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using iterator = Ty * ;
|
||||||
|
using const_iterator = Ty * const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
u32 _capacity = 0;
|
||||||
|
u32 _size = 0;
|
||||||
|
Ty* _data = nullptr;
|
||||||
|
|
||||||
|
inline u32 offset(const_iterator pos)
|
||||||
|
{
|
||||||
|
return (_data) ? (pos - _data) : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
simple_array() {}
|
||||||
|
|
||||||
|
simple_array(u32 initial_size, const Ty val = {})
|
||||||
|
{
|
||||||
|
reserve(initial_size);
|
||||||
|
_size = initial_size;
|
||||||
|
|
||||||
|
for (int n = 0; n < initial_size; ++n)
|
||||||
|
{
|
||||||
|
_data[n] = val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
simple_array(const std::initializer_list<Ty>& args)
|
||||||
|
{
|
||||||
|
reserve(args.size());
|
||||||
|
|
||||||
|
for (const auto& arg : args)
|
||||||
|
{
|
||||||
|
push_back(arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
~simple_array()
|
||||||
|
{
|
||||||
|
if (_data)
|
||||||
|
{
|
||||||
|
free(_data);
|
||||||
|
_data = nullptr;
|
||||||
|
_size = _capacity = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void swap(simple_array<Ty>& other) noexcept
|
||||||
|
{
|
||||||
|
std::swap(_capacity, other._capacity);
|
||||||
|
std::swap(_size, other._size);
|
||||||
|
std::swap(_data, other._data);
|
||||||
|
}
|
||||||
|
|
||||||
|
void reserve(u32 size)
|
||||||
|
{
|
||||||
|
if (_capacity > size)
|
||||||
|
return;
|
||||||
|
|
||||||
|
auto old_data = _data;
|
||||||
|
auto old_size = _size;
|
||||||
|
|
||||||
|
_data = (Ty*)malloc(sizeof(Ty) * size);
|
||||||
|
_capacity = size;
|
||||||
|
|
||||||
|
if (old_data)
|
||||||
|
{
|
||||||
|
memcpy(_data, old_data, sizeof(Ty) * old_size);
|
||||||
|
free(old_data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void push_back(const Ty& val)
|
||||||
|
{
|
||||||
|
if (_size >= _capacity)
|
||||||
|
{
|
||||||
|
reserve(_capacity + 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
_data[_size++] = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
void push_back(Ty&& val)
|
||||||
|
{
|
||||||
|
if (_size >= _capacity)
|
||||||
|
{
|
||||||
|
reserve(_capacity + 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
_data[_size++] = val;
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator insert(iterator pos, const Ty& val)
|
||||||
|
{
|
||||||
|
verify(HERE), pos >= _data;
|
||||||
|
const auto _loc = offset(pos);
|
||||||
|
|
||||||
|
if (_size >= _capacity)
|
||||||
|
{
|
||||||
|
reserve(_capacity + 16);
|
||||||
|
pos = _data + _loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_loc >= _size)
|
||||||
|
{
|
||||||
|
_data[_size++] = val;
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
verify(HERE), _loc < _size;
|
||||||
|
|
||||||
|
const u32 remaining = (_size - _loc);
|
||||||
|
memmove(pos + 1, pos, remaining * sizeof(Ty));
|
||||||
|
|
||||||
|
*pos = val;
|
||||||
|
_size++;
|
||||||
|
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator insert(iterator pos, Ty&& val)
|
||||||
|
{
|
||||||
|
verify(HERE), pos >= _data;
|
||||||
|
const auto _loc = offset(pos);
|
||||||
|
|
||||||
|
if (_size >= _capacity)
|
||||||
|
{
|
||||||
|
reserve(_capacity + 16);
|
||||||
|
pos = _data + _loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_loc >= _size)
|
||||||
|
{
|
||||||
|
_data[_size++] = val;
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
verify(HERE), _loc < _size;
|
||||||
|
|
||||||
|
const u32 remaining = (_size - _loc);
|
||||||
|
memmove(pos + 1, pos, remaining * sizeof(Ty));
|
||||||
|
|
||||||
|
*pos = val;
|
||||||
|
_size++;
|
||||||
|
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear()
|
||||||
|
{
|
||||||
|
_size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool empty() const
|
||||||
|
{
|
||||||
|
return _size == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 size() const
|
||||||
|
{
|
||||||
|
return _size;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 capacity() const
|
||||||
|
{
|
||||||
|
return _capacity;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ty& operator[] (u32 index)
|
||||||
|
{
|
||||||
|
return _data[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
const Ty& operator[] (u32 index) const
|
||||||
|
{
|
||||||
|
return _data[index];
|
||||||
|
}
|
||||||
|
|
||||||
|
Ty* data()
|
||||||
|
{
|
||||||
|
return _data;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Ty* data() const
|
||||||
|
{
|
||||||
|
return _data;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ty& back()
|
||||||
|
{
|
||||||
|
return _data[_size - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
const Ty& back() const
|
||||||
|
{
|
||||||
|
return _data[_size - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
Ty& front()
|
||||||
|
{
|
||||||
|
return _data[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
const Ty& front() const
|
||||||
|
{
|
||||||
|
return _data[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator begin()
|
||||||
|
{
|
||||||
|
return _data;
|
||||||
|
}
|
||||||
|
|
||||||
|
iterator end()
|
||||||
|
{
|
||||||
|
return _data ? _data + _size : nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
const_iterator begin() const
|
||||||
|
{
|
||||||
|
return _data;
|
||||||
|
}
|
||||||
|
|
||||||
|
const_iterator end() const
|
||||||
|
{
|
||||||
|
return _data ? _data + _size : nullptr;
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "GCM.h"
|
#include "GCM.h"
|
||||||
#include "Utilities/types.h"
|
#include "Utilities/types.h"
|
||||||
|
@ -64,10 +64,13 @@ struct push_buffer_vertex_info
|
||||||
|
|
||||||
void clear()
|
void clear()
|
||||||
{
|
{
|
||||||
data.resize(0);
|
if (size)
|
||||||
attribute_mask = ~0;
|
{
|
||||||
vertex_count = 0;
|
data.clear();
|
||||||
size = 0;
|
attribute_mask = ~0;
|
||||||
|
vertex_count = 0;
|
||||||
|
size = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u8 get_vertex_size_in_dwords(vertex_base_type type)
|
u8 get_vertex_size_in_dwords(vertex_base_type type)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue