Merge pull request #1415 from vlj/d3d12

D3d12: Factoring
This commit is contained in:
B1ackDaemon 2016-01-12 00:52:45 +02:00
commit 68f9898171
10 changed files with 519 additions and 501 deletions

View file

@ -13,18 +13,14 @@ namespace
/** /**
* *
*/ */
D3D12_GPU_VIRTUAL_ADDRESS createVertexBuffer(const rsx::data_array_format_info &vertex_array_desc, const std::vector<u8> &vertex_data, ID3D12Device *device, data_heap<ID3D12Resource, 65536> &vertex_index_heap) D3D12_GPU_VIRTUAL_ADDRESS createVertexBuffer(const rsx::data_array_format_info &vertex_array_desc, const std::vector<u8> &vertex_data, ID3D12Device *device, data_heap &vertex_index_heap)
{ {
size_t buffer_size = vertex_data.size(); size_t buffer_size = vertex_data.size();
assert(vertex_index_heap.can_alloc(buffer_size)); size_t heap_offset = vertex_index_heap.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
size_t heap_offset = vertex_index_heap.alloc(buffer_size);
void *buffer; memcpy(vertex_index_heap.map<float>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)), vertex_data.data(), vertex_data.size());
CHECK_HRESULT(vertex_index_heap.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer)); vertex_index_heap.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
void *bufferMap = (char*)buffer + heap_offset; return vertex_index_heap.get_heap()->GetGPUVirtualAddress() + heap_offset;
memcpy(bufferMap, vertex_data.data(), vertex_data.size());
vertex_index_heap.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
return vertex_index_heap.m_heap->GetGPUVirtualAddress() + heap_offset;
} }
} }
@ -63,22 +59,19 @@ std::vector<D3D12_VERTEX_BUFFER_VIEW> D3D12GSRender::upload_vertex_attributes(co
u32 element_size = rsx::get_vertex_type_size_on_host(info.type, info.size); u32 element_size = rsx::get_vertex_type_size_on_host(info.type, info.size);
size_t buffer_size = element_size * vertex_count; size_t buffer_size = element_size * vertex_count;
assert(m_vertex_index_data.can_alloc(buffer_size)); size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
size_t heap_offset = m_vertex_index_data.alloc(buffer_size);
void *buffer; void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
for (const auto &range : vertex_ranges) for (const auto &range : vertex_ranges)
{ {
write_vertex_array_data_to_buffer(mapped_buffer, range.first, range.second, index, info); write_vertex_array_data_to_buffer(mapped_buffer, range.first, range.second, index, info);
mapped_buffer = (char*)mapped_buffer + range.second * element_size; mapped_buffer = (char*)mapped_buffer + range.second * element_size;
} }
m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view =
{ {
m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset, m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size, (UINT)buffer_size,
(UINT)element_size (UINT)element_size
}; };
@ -106,17 +99,14 @@ std::vector<D3D12_VERTEX_BUFFER_VIEW> D3D12GSRender::upload_vertex_attributes(co
u32 element_size = rsx::get_vertex_type_size_on_host(info.type, info.size); u32 element_size = rsx::get_vertex_type_size_on_host(info.type, info.size);
size_t buffer_size = data.size(); size_t buffer_size = data.size();
assert(m_vertex_index_data.can_alloc(buffer_size)); size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
size_t heap_offset = m_vertex_index_data.alloc(buffer_size);
void *buffer; void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
memcpy(mapped_buffer, data.data(), data.size()); memcpy(mapped_buffer, data.data(), data.size());
m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = { D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = {
m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset, m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size, (UINT)buffer_size,
(UINT)element_size (UINT)element_size
}; };
@ -143,18 +133,16 @@ void D3D12GSRender::load_vertex_index_data(u32 first, u32 count)
void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex) void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex)
{ {
assert(m_constants_data.can_alloc(256)); size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(256);
size_t heap_offset = m_constants_data.alloc(256);
// Scale offset buffer // Scale offset buffer
// Separate constant buffer // Separate constant buffer
void *mapped_buffer; void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + 256));
CHECK_HRESULT(m_constants_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + 256), &mapped_buffer)); fill_scale_offset_data(mapped_buffer);
fill_scale_offset_data((char*)mapped_buffer + heap_offset);
int is_alpha_tested = !!(rsx::method_registers[NV4097_SET_ALPHA_TEST_ENABLE]); int is_alpha_tested = !!(rsx::method_registers[NV4097_SET_ALPHA_TEST_ENABLE]);
float alpha_ref = (float&)rsx::method_registers[NV4097_SET_ALPHA_REF]; float alpha_ref = (float&)rsx::method_registers[NV4097_SET_ALPHA_REF];
memcpy((char*)mapped_buffer + heap_offset + 16 * sizeof(float), &is_alpha_tested, sizeof(int)); memcpy((char*)mapped_buffer + 16 * sizeof(float), &is_alpha_tested, sizeof(int));
memcpy((char*)mapped_buffer + heap_offset + 17 * sizeof(float), &alpha_ref, sizeof(float)); memcpy((char*)mapped_buffer + 17 * sizeof(float), &alpha_ref, sizeof(float));
size_t tex_idx = 0; size_t tex_idx = 0;
for (u32 i = 0; i < rsx::limits::textures_count; ++i) for (u32 i = 0; i < rsx::limits::textures_count; ++i)
@ -162,19 +150,19 @@ void D3D12GSRender::upload_and_bind_scale_offset_matrix(size_t descriptorIndex)
if (!textures[i].enabled()) if (!textures[i].enabled())
{ {
int is_unorm = false; int is_unorm = false;
memcpy((char*)mapped_buffer + heap_offset + (18 + tex_idx++) * sizeof(int), &is_unorm, sizeof(int)); memcpy((char*)mapped_buffer + (18 + tex_idx++) * sizeof(int), &is_unorm, sizeof(int));
continue; continue;
} }
size_t w = textures[i].width(), h = textures[i].height(); size_t w = textures[i].width(), h = textures[i].height();
// if (!w || !h) continue; // if (!w || !h) continue;
int is_unorm = (textures[i].format() & CELL_GCM_TEXTURE_UN); int is_unorm = (textures[i].format() & CELL_GCM_TEXTURE_UN);
memcpy((char*)mapped_buffer + heap_offset + (18 + tex_idx++) * sizeof(int), &is_unorm, sizeof(int)); memcpy((char*)mapped_buffer + (18 + tex_idx++) * sizeof(int), &is_unorm, sizeof(int));
} }
m_constants_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + 256)); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + 256));
D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = { D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = {
m_constants_data.m_heap->GetGPUVirtualAddress() + heap_offset, m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
256 256
}; };
m_device->CreateConstantBufferView(&constant_buffer_view_desc, m_device->CreateConstantBufferView(&constant_buffer_view_desc,
@ -186,16 +174,14 @@ void D3D12GSRender::upload_and_bind_vertex_shader_constants(size_t descriptor_in
{ {
size_t buffer_size = 512 * 4 * sizeof(float); size_t buffer_size = 512 * 4 * sizeof(float);
assert(m_constants_data.can_alloc(buffer_size)); size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
size_t heap_offset = m_constants_data.alloc(buffer_size);
void *mapped_buffer; void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
CHECK_HRESULT(m_constants_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &mapped_buffer)); fill_vertex_program_constants_data(mapped_buffer);
fill_vertex_program_constants_data((char*)mapped_buffer + heap_offset); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
m_constants_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = { D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = {
m_constants_data.m_heap->GetGPUVirtualAddress() + heap_offset, m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size (UINT)buffer_size
}; };
m_device->CreateConstantBufferView(&constant_buffer_view_desc, m_device->CreateConstantBufferView(&constant_buffer_view_desc,
@ -210,18 +196,15 @@ void D3D12GSRender::upload_and_bind_fragment_shader_constants(size_t descriptor_
// Multiple of 256 never 0 // Multiple of 256 never 0
buffer_size = (buffer_size + 255) & ~255; buffer_size = (buffer_size + 255) & ~255;
assert(m_constants_data.can_alloc(buffer_size)); size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
size_t heap_offset = m_constants_data.alloc(buffer_size);
size_t offset = 0; size_t offset = 0;
void *mapped_buffer; float *mapped_buffer = m_buffer_data.map<float>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
CHECK_HRESULT(m_constants_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &mapped_buffer)); m_pso_cache.fill_fragment_constans_buffer({ mapped_buffer, gsl::narrow<int>(buffer_size) }, fragment_program);
float *buffer = (float*)((char*)mapped_buffer + heap_offset); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
m_pso_cache.fill_fragment_constans_buffer({ buffer, gsl::narrow<int>(buffer_size) }, fragment_program);
m_constants_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = { D3D12_CONSTANT_BUFFER_VIEW_DESC constant_buffer_view_desc = {
m_constants_data.m_heap->GetGPUVirtualAddress() + heap_offset, m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size (UINT)buffer_size
}; };
m_device->CreateConstantBufferView(&constant_buffer_view_desc, m_device->CreateConstantBufferView(&constant_buffer_view_desc,
@ -257,17 +240,14 @@ std::tuple<D3D12_VERTEX_BUFFER_VIEW, size_t> D3D12GSRender::upload_inlined_verte
// Copy inline buffer // Copy inline buffer
size_t buffer_size = inline_vertex_array.size() * sizeof(int); size_t buffer_size = inline_vertex_array.size() * sizeof(int);
assert(m_vertex_index_data.can_alloc(buffer_size)); size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
size_t heap_offset = m_vertex_index_data.alloc(buffer_size); void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
void *buffer;
CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
write_inline_array_to_buffer(mapped_buffer); write_inline_array_to_buffer(mapped_buffer);
m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view = D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view =
{ {
m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset, m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size, (UINT)buffer_size,
(UINT)offset (UINT)offset
}; };
@ -283,12 +263,9 @@ std::tuple<D3D12_INDEX_BUFFER_VIEW, size_t> D3D12GSRender::generate_index_buffer
// Alloc // Alloc
size_t buffer_size = align(index_count * sizeof(u16), 64); size_t buffer_size = align(index_count * sizeof(u16), 64);
assert(m_vertex_index_data.can_alloc(buffer_size)); size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
size_t heap_offset = m_vertex_index_data.alloc(buffer_size);
void *buffer; void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
size_t first = 0; size_t first = 0;
for (const auto &pair : vertex_ranges) for (const auto &pair : vertex_ranges)
{ {
@ -297,9 +274,9 @@ std::tuple<D3D12_INDEX_BUFFER_VIEW, size_t> D3D12GSRender::generate_index_buffer
mapped_buffer = (char*)mapped_buffer + element_count * sizeof(u16); mapped_buffer = (char*)mapped_buffer + element_count * sizeof(u16);
first += pair.second; first += pair.second;
} }
m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_INDEX_BUFFER_VIEW index_buffer_view = { D3D12_INDEX_BUFFER_VIEW index_buffer_view = {
m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset, m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size, (UINT)buffer_size,
DXGI_FORMAT_R16_UINT DXGI_FORMAT_R16_UINT
}; };
@ -359,12 +336,9 @@ std::tuple<bool, size_t> D3D12GSRender::upload_and_set_vertex_index_data(ID3D12G
// Alloc // Alloc
size_t buffer_size = align(index_count * index_size, 64); size_t buffer_size = align(index_count * index_size, 64);
assert(m_vertex_index_data.can_alloc(buffer_size)); size_t heap_offset = m_buffer_data.alloc<D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
size_t heap_offset = m_vertex_index_data.alloc(buffer_size);
void *buffer; void *mapped_buffer = m_buffer_data.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
CHECK_HRESULT(m_vertex_index_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), (void**)&buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
u32 min_index = (u32)-1, max_index = 0; u32 min_index = (u32)-1, max_index = 0;
for (const auto &pair : m_first_count_pairs) for (const auto &pair : m_first_count_pairs)
{ {
@ -372,9 +346,9 @@ std::tuple<bool, size_t> D3D12GSRender::upload_and_set_vertex_index_data(ID3D12G
write_index_array_data_to_buffer((char*)mapped_buffer, draw_mode, pair.first, pair.second, min_index, max_index); write_index_array_data_to_buffer((char*)mapped_buffer, draw_mode, pair.first, pair.second, min_index, max_index);
mapped_buffer = (char*)mapped_buffer + element_count * index_size; mapped_buffer = (char*)mapped_buffer + element_count * index_size;
} }
m_vertex_index_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
D3D12_INDEX_BUFFER_VIEW index_buffer_view = { D3D12_INDEX_BUFFER_VIEW index_buffer_view = {
m_vertex_index_data.m_heap->GetGPUVirtualAddress() + heap_offset, m_buffer_data.get_heap()->GetGPUVirtualAddress() + heap_offset,
(UINT)buffer_size, (UINT)buffer_size,
get_index_type(indexed_type) get_index_type(indexed_type)
}; };

View file

@ -248,6 +248,7 @@ namespace
{ {
case CELL_GCM_TEXTURE_NEAREST: return D3D12_FILTER_TYPE_POINT; case CELL_GCM_TEXTURE_NEAREST: return D3D12_FILTER_TYPE_POINT;
case CELL_GCM_TEXTURE_LINEAR: return D3D12_FILTER_TYPE_LINEAR; case CELL_GCM_TEXTURE_LINEAR: return D3D12_FILTER_TYPE_LINEAR;
case CELL_GCM_TEXTURE_UNKNOWN_MAG_FILTER: return D3D12_FILTER_TYPE_LINEAR;
} }
throw EXCEPTION("Invalid mag filter (0x%x)", mag_filter); throw EXCEPTION("Invalid mag filter (0x%x)", mag_filter);
} }

View file

@ -89,11 +89,6 @@ D3D12DLLManagement::~D3D12DLLManagement()
D3D12GSRender::D3D12GSRender() D3D12GSRender::D3D12GSRender()
: GSRender(frame_type::DX12), m_d3d12_lib(), m_current_pso({}) : GSRender(frame_type::DX12), m_d3d12_lib(), m_current_pso({})
{ {
m_previous_address_a = 0;
m_previous_address_b = 0;
m_previous_address_c = 0;
m_previous_address_d = 0;
m_previous_address_z = 0;
gfxHandler = [this](u32 addr) { gfxHandler = [this](u32 addr) {
bool result = invalidate_address(addr); bool result = invalidate_address(addr);
if (result) if (result)
@ -195,14 +190,9 @@ D3D12GSRender::D3D12GSRender()
IID_PPV_ARGS(&m_dummy_texture)) IID_PPV_ARGS(&m_dummy_texture))
); );
m_readback_resources.init(m_device.Get(), 1024 * 1024 * 128, D3D12_HEAP_TYPE_READBACK, D3D12_RESOURCE_STATE_COPY_DEST);
m_uav_heap.init(m_device.Get(), 1024 * 1024 * 128, D3D12_HEAP_TYPE_DEFAULT, D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES);
m_rtts.init(m_device.Get()); m_rtts.init(m_device.Get());
m_readback_resources.init(m_device.Get(), 1024 * 1024 * 128, D3D12_HEAP_TYPE_READBACK, D3D12_RESOURCE_STATE_COPY_DEST);
m_constants_data.init(m_device.Get(), 1024 * 1024 * 64, D3D12_HEAP_TYPE_UPLOAD, D3D12_RESOURCE_STATE_GENERIC_READ); m_buffer_data.init(m_device.Get(), 1024 * 1024 * 896, D3D12_HEAP_TYPE_UPLOAD, D3D12_RESOURCE_STATE_GENERIC_READ);
m_vertex_index_data.init(m_device.Get(), 1024 * 1024 * 384, D3D12_HEAP_TYPE_UPLOAD, D3D12_RESOURCE_STATE_GENERIC_READ);
m_texture_upload_data.init(m_device.Get(), 1024 * 1024 * 512, D3D12_HEAP_TYPE_UPLOAD, D3D12_RESOURCE_STATE_GENERIC_READ);
if (rpcs3::config.rsx.d3d12.overlay.value()) if (rpcs3::config.rsx.d3d12.overlay.value())
init_d2d_structures(); init_d2d_structures();
@ -215,11 +205,6 @@ D3D12GSRender::~D3D12GSRender()
m_texture_cache.unprotect_all(); m_texture_cache.unprotect_all();
gfxHandler = [this](u32) { return false; }; gfxHandler = [this](u32) { return false; };
m_constants_data.release();
m_vertex_index_data.release();
m_texture_upload_data.release();
m_uav_heap.m_heap->Release();
m_readback_resources.m_heap->Release();
m_dummy_texture->Release(); m_dummy_texture->Release();
m_convertPSO->Release(); m_convertPSO->Release();
m_convertRootSignature->Release(); m_convertRootSignature->Release();
@ -405,15 +390,12 @@ void D3D12GSRender::flip(int buffer)
row_pitch = align(w * 4, 256); row_pitch = align(w * 4, 256);
size_t texture_size = row_pitch * h; // * 4 for mipmap levels size_t texture_size = row_pitch * h; // * 4 for mipmap levels
assert(m_texture_upload_data.can_alloc(texture_size)); size_t heap_offset = m_buffer_data.alloc<D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT>(texture_size);
size_t heap_offset = m_texture_upload_data.alloc(texture_size);
void *buffer; void *mapped_buffer = m_buffer_data.map<void>(heap_offset);
CHECK_HRESULT(m_texture_upload_data.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + texture_size), &buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
for (unsigned row = 0; row < h; row++) for (unsigned row = 0; row < h; row++)
memcpy((char*)mapped_buffer + row * row_pitch, (char*)src_buffer + row * w * 4, w * 4); memcpy((char*)mapped_buffer + row * row_pitch, (char*)src_buffer + row * w * 4, w * 4);
m_texture_upload_data.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + texture_size)); m_buffer_data.unmap(CD3DX12_RANGE(heap_offset, heap_offset + texture_size));
offset = heap_offset; offset = heap_offset;
} }
@ -428,7 +410,7 @@ void D3D12GSRender::flip(int buffer)
) )
); );
get_current_resource_storage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(storage.ram_framebuffer.Get(), 0), 0, 0, 0, get_current_resource_storage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(storage.ram_framebuffer.Get(), 0), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(m_texture_upload_data.m_heap, { offset, { DXGI_FORMAT_R8G8B8A8_UNORM, (UINT)w, (UINT)h, 1, (UINT)row_pitch } }), nullptr); &CD3DX12_TEXTURE_COPY_LOCATION(m_buffer_data.get_heap(), { offset, { DXGI_FORMAT_R8G8B8A8_UNORM, (UINT)w, (UINT)h, 1, (UINT)row_pitch } }), nullptr);
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(storage.ram_framebuffer.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ)); get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(storage.ram_framebuffer.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ));
resource_to_flip = storage.ram_framebuffer.Get(); resource_to_flip = storage.ram_framebuffer.Get();
@ -436,15 +418,15 @@ void D3D12GSRender::flip(int buffer)
} }
else else
{ {
if (m_rtts.bound_render_targets[0] != nullptr) if (std::get<1>(m_rtts.m_bound_render_targets[0]) != nullptr)
{ {
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_render_targets[0], D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ)); get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_render_targets[0]), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ));
resource_to_flip = m_rtts.bound_render_targets[0]; resource_to_flip = std::get<1>(m_rtts.m_bound_render_targets[0]);
} }
else if (m_rtts.bound_render_targets[1] != nullptr) else if (std::get<1>(m_rtts.m_bound_render_targets[1]) != nullptr)
{ {
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_render_targets[1], D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ)); get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_render_targets[1]), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ));
resource_to_flip = m_rtts.bound_render_targets[1]; resource_to_flip = std::get<1>(m_rtts.m_bound_render_targets[1]);
} }
else else
resource_to_flip = nullptr; resource_to_flip = nullptr;
@ -546,15 +528,14 @@ void D3D12GSRender::flip(int buffer)
storage.fence_value++; storage.fence_value++;
storage.in_use = true; storage.in_use = true;
storage.dirty_textures.merge(m_rtts.invalidated_resources);
m_rtts.invalidated_resources.clear();
// Get the put pos - 1. This way after cleaning we can set the get ptr to // Get the put pos - 1. This way after cleaning we can set the get ptr to
// this value, allowing heap to proceed even if we cleant before allocating // this value, allowing heap to proceed even if we cleant before allocating
// a new value (that's the reason of the -1) // a new value (that's the reason of the -1)
storage.constants_heap_get_pos = m_constants_data.get_current_put_pos_minus_one(); storage.buffer_heap_get_pos = m_buffer_data.get_current_put_pos_minus_one();
storage.vertex_index_heap_get_pos = m_vertex_index_data.get_current_put_pos_minus_one();
storage.texture_upload_heap_get_pos = m_texture_upload_data.get_current_put_pos_minus_one();
storage.readback_heap_get_pos = m_readback_resources.get_current_put_pos_minus_one(); storage.readback_heap_get_pos = m_readback_resources.get_current_put_pos_minus_one();
storage.uav_heap_get_pos = m_uav_heap.get_current_put_pos_minus_one();
// Now get ready for next frame // Now get ready for next frame
resource_storage &new_storage = get_current_resource_storage(); resource_storage &new_storage = get_current_resource_storage();
@ -562,11 +543,8 @@ void D3D12GSRender::flip(int buffer)
new_storage.wait_and_clean(); new_storage.wait_and_clean();
if (new_storage.in_use) if (new_storage.in_use)
{ {
m_constants_data.m_get_pos = new_storage.constants_heap_get_pos; m_buffer_data.m_get_pos = new_storage.buffer_heap_get_pos;
m_vertex_index_data.m_get_pos = new_storage.vertex_index_heap_get_pos;
m_texture_upload_data.m_get_pos = new_storage.texture_upload_heap_get_pos;
m_readback_resources.m_get_pos = new_storage.readback_heap_get_pos; m_readback_resources.m_get_pos = new_storage.readback_heap_get_pos;
m_uav_heap.m_get_pos = new_storage.uav_heap_get_pos;
} }
m_frame->flip(nullptr); m_frame->flip(nullptr);

View file

@ -112,14 +112,9 @@ private:
resource_storage &get_current_resource_storage(); resource_storage &get_current_resource_storage();
resource_storage &get_non_current_resource_storage(); resource_storage &get_non_current_resource_storage();
// Constants storage // Textures, constants, index and vertex buffers storage
data_heap<ID3D12Resource, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT> m_constants_data; data_heap m_buffer_data;
// Vertex storage data_heap m_readback_resources;
data_heap<ID3D12Resource, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT> m_vertex_index_data;
// Texture storage
data_heap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> m_texture_upload_data;
data_heap<ID3D12Heap, D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT> m_uav_heap;
data_heap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> m_readback_resources;
render_targets m_rtts; render_targets m_rtts;
@ -134,11 +129,11 @@ private:
ID3D12Resource *m_dummy_texture; ID3D12Resource *m_dummy_texture;
// Store previous fbo addresses to detect RTT config changes. // Store previous fbo addresses to detect RTT config changes.
u32 m_previous_address_a; std::array<u32, 4> m_previous_color_address = {};
u32 m_previous_address_b; u32 m_previous_address_z = 0;
u32 m_previous_address_c; u32 m_previous_target = 0;
u32 m_previous_address_d; u32 m_previous_clip_horizontal = 0;
u32 m_previous_address_z; u32 m_previous_clip_vertical = 0;
public: public:
D3D12GSRender(); D3D12GSRender();
virtual ~D3D12GSRender(); virtual ~D3D12GSRender();

View file

@ -3,48 +3,6 @@
#include "d3dx12.h" #include "d3dx12.h"
template<typename T>
struct init_heap
{
static T* init(ID3D12Device *device, size_t heapSize, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags);
};
template<>
struct init_heap<ID3D12Heap>
{
static ID3D12Heap* init(ID3D12Device *device, size_t heap_size, D3D12_HEAP_TYPE type, D3D12_HEAP_FLAGS flags)
{
ID3D12Heap *result;
D3D12_HEAP_DESC heap_desc = {};
heap_desc.SizeInBytes = heap_size;
heap_desc.Properties.Type = type;
heap_desc.Flags = flags;
CHECK_HRESULT(device->CreateHeap(&heap_desc, IID_PPV_ARGS(&result)));
return result;
}
};
template<>
struct init_heap<ID3D12Resource>
{
static ID3D12Resource* init(ID3D12Device *device, size_t heap_size, D3D12_HEAP_TYPE type, D3D12_RESOURCE_STATES state)
{
ID3D12Resource *result;
D3D12_HEAP_PROPERTIES heap_properties = {};
heap_properties.Type = type;
CHECK_HRESULT(device->CreateCommittedResource(&heap_properties,
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(heap_size),
state,
nullptr,
IID_PPV_ARGS(&result))
);
return result;
}
};
/** /**
* Wrapper around a ID3D12Resource or a ID3D12Heap. * Wrapper around a ID3D12Resource or a ID3D12Heap.
* Acts as a ring buffer : hold a get and put pointers, * Acts as a ring buffer : hold a get and put pointers,
@ -52,43 +10,30 @@ struct init_heap<ID3D12Resource>
* and get is used as beginning of in use data space. * and get is used as beginning of in use data space.
* This wrapper checks that put pointer doesn't cross get one. * This wrapper checks that put pointer doesn't cross get one.
*/ */
template<typename T, size_t alignment> class data_heap
struct data_heap
{ {
T *m_heap;
size_t m_size;
size_t m_put_pos; // Start of free space
size_t m_get_pos; // End of free space
template <typename... arg_type>
void init(ID3D12Device *device, size_t heap_size, D3D12_HEAP_TYPE type, arg_type... args)
{
m_size = heap_size;
m_heap = init_heap<T>::init(device, heap_size, type, args...);
m_put_pos = 0;
m_get_pos = heap_size - 1;
}
/** /**
* Does alloc cross get position ? * Does alloc cross get position ?
*/ */
template<int Alignement>
bool can_alloc(size_t size) const bool can_alloc(size_t size) const
{ {
size_t alloc_size = align(size, alignment); size_t alloc_size = align(size, Alignement);
if (m_put_pos + alloc_size < m_size) size_t aligned_put_pos = align(m_put_pos, Alignement);
if (aligned_put_pos + alloc_size < m_size)
{ {
// range before get // range before get
if (m_put_pos + alloc_size < m_get_pos) if (aligned_put_pos + alloc_size < m_get_pos)
return true; return true;
// range after get // range after get
if (m_put_pos > m_get_pos) if (aligned_put_pos > m_get_pos)
return true; return true;
return false; return false;
} }
else else
{ {
// ..]....[..get.. // ..]....[..get..
if (m_put_pos < m_get_pos) if (aligned_put_pos < m_get_pos)
return false; return false;
// ..get..]...[... // ..get..]...[...
// Actually all resources extending beyond heap space starts at 0 // Actually all resources extending beyond heap space starts at 0
@ -98,15 +43,40 @@ struct data_heap
} }
} }
size_t m_size;
size_t m_put_pos; // Start of free space
ComPtr<ID3D12Resource> m_heap;
public:
size_t m_get_pos; // End of free space
template <typename... arg_type>
void init(ID3D12Device *device, size_t heap_size, D3D12_HEAP_TYPE type, D3D12_RESOURCE_STATES state)
{
m_size = heap_size;
m_put_pos = 0;
m_get_pos = heap_size - 1;
D3D12_HEAP_PROPERTIES heap_properties = {};
heap_properties.Type = type;
CHECK_HRESULT(device->CreateCommittedResource(&heap_properties,
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Buffer(heap_size),
state,
nullptr,
IID_PPV_ARGS(m_heap.GetAddressOf()))
);
}
template<int Alignement>
size_t alloc(size_t size) size_t alloc(size_t size)
{ {
assert(can_alloc(size)); if (!can_alloc<Alignement>(size)) throw EXCEPTION("Working buffer not big enough");
size_t alloc_size = align(size, alignment); size_t alloc_size = align(size, Alignement);
if (m_put_pos + alloc_size < m_size) size_t aligned_put_pos = align(m_put_pos, Alignement);
if (aligned_put_pos + alloc_size < m_size)
{ {
size_t old_put_pos = m_put_pos; m_put_pos = aligned_put_pos + alloc_size;
m_put_pos += alloc_size; return aligned_put_pos;
return old_put_pos;
} }
else else
{ {
@ -115,9 +85,37 @@ struct data_heap
} }
} }
void release() template<typename T>
T* map(const D3D12_RANGE &range)
{ {
m_heap->Release(); void *buffer;
CHECK_HRESULT(m_heap->Map(0, &range, &buffer));
void *mapped_buffer = (char*)buffer + range.Begin;
return static_cast<T*>(mapped_buffer);
}
template<typename T>
T* map(size_t heap_offset)
{
void *buffer;
CHECK_HRESULT(m_heap->Map(0, nullptr, &buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
return static_cast<T*>(mapped_buffer);
}
void unmap(const D3D12_RANGE &range)
{
m_heap->Unmap(0, &range);
}
void unmap()
{
m_heap->Unmap(0, nullptr);
}
ID3D12Resource* get_heap()
{
return m_heap.Get();
} }
/** /**
@ -230,11 +228,8 @@ struct resource_storage
* This means newer resources shouldn't allocate memory crossing this position * This means newer resources shouldn't allocate memory crossing this position
* until the frame rendering is over. * until the frame rendering is over.
*/ */
size_t constants_heap_get_pos; size_t buffer_heap_get_pos;
size_t vertex_index_heap_get_pos;
size_t texture_upload_heap_get_pos;
size_t readback_heap_get_pos; size_t readback_heap_get_pos;
size_t uav_heap_get_pos;
void reset(); void reset();
void init(ID3D12Device *device); void init(ID3D12Device *device);

View file

@ -77,29 +77,25 @@ void D3D12GSRender::clear_surface(u32 arg)
if (arg & 0x1 || arg & 0x2) if (arg & 0x1 || arg & 0x2)
{ {
CD3DX12_CPU_DESCRIPTOR_HANDLE handle = CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().depth_stencil_descriptor_heap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)get_current_resource_storage().depth_stencil_descriptor_heap_index * g_descriptor_stride_rtv);
m_rtts.bind_depth_stencil(m_device.Get(), m_surface.depth_format, handle);
get_current_resource_storage().depth_stencil_descriptor_heap_index++; get_current_resource_storage().depth_stencil_descriptor_heap_index++;
if (arg & 0x1) if (arg & 0x1)
{ {
u32 clear_depth = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] >> 8; u32 clear_depth = rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE] >> 8;
u32 max_depth_value = m_surface.depth_format == CELL_GCM_SURFACE_Z16 ? 0x0000ffff : 0x00ffffff; u32 max_depth_value = m_surface.depth_format == CELL_GCM_SURFACE_Z16 ? 0x0000ffff : 0x00ffffff;
get_current_resource_storage().command_list->ClearDepthStencilView(handle, D3D12_CLEAR_FLAG_DEPTH, clear_depth / (float)max_depth_value, 0, get_current_resource_storage().command_list->ClearDepthStencilView(m_rtts.current_ds_handle, D3D12_CLEAR_FLAG_DEPTH, clear_depth / (float)max_depth_value, 0,
1, &get_scissor(rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL], rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL])); 1, &get_scissor(rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL], rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL]));
} }
if (arg & 0x2) if (arg & 0x2)
get_current_resource_storage().command_list->ClearDepthStencilView(handle, D3D12_CLEAR_FLAG_STENCIL, 0.f, get_clear_stencil(rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE]), get_current_resource_storage().command_list->ClearDepthStencilView(m_rtts.current_ds_handle, D3D12_CLEAR_FLAG_STENCIL, 0.f, get_clear_stencil(rsx::method_registers[NV4097_SET_ZSTENCIL_CLEAR_VALUE]),
1, &get_scissor(rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL], rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL])); 1, &get_scissor(rsx::method_registers[NV4097_SET_SCISSOR_HORIZONTAL], rsx::method_registers[NV4097_SET_SCISSOR_VERTICAL]));
} }
if (arg & 0xF0) if (arg & 0xF0)
{ {
CD3DX12_CPU_DESCRIPTOR_HANDLE handle = CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().render_targets_descriptors_heap->GetCPUDescriptorHandleForHeapStart()) CD3DX12_CPU_DESCRIPTOR_HANDLE handle = CD3DX12_CPU_DESCRIPTOR_HANDLE(m_rtts.current_rtts_handle);
.Offset((INT)get_current_resource_storage().render_targets_descriptors_heap_index * g_descriptor_stride_rtv); size_t rtt_index = get_num_rtt(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]);
size_t rtt_index = m_rtts.bind_render_targets(m_device.Get(), m_surface.color_format, handle);
get_current_resource_storage().render_targets_descriptors_heap_index += rtt_index; get_current_resource_storage().render_targets_descriptors_heap_index += rtt_index;
for (unsigned i = 0; i < rtt_index; i++) for (unsigned i = 0; i < rtt_index; i++)
get_current_resource_storage().command_list->ClearRenderTargetView(handle.Offset(i, g_descriptor_stride_rtv), get_clear_color(rsx::method_registers[NV4097_SET_COLOR_CLEAR_VALUE]).data(), get_current_resource_storage().command_list->ClearRenderTargetView(handle.Offset(i, g_descriptor_stride_rtv), get_clear_color(rsx::method_registers[NV4097_SET_COLOR_CLEAR_VALUE]).data(),
@ -120,16 +116,8 @@ void D3D12GSRender::clear_surface(u32 arg)
void D3D12GSRender::prepare_render_targets(ID3D12GraphicsCommandList *copycmdlist) void D3D12GSRender::prepare_render_targets(ID3D12GraphicsCommandList *copycmdlist)
{ {
// check if something has changed
u32 surface_format = rsx::method_registers[NV4097_SET_SURFACE_FORMAT]; u32 surface_format = rsx::method_registers[NV4097_SET_SURFACE_FORMAT];
u32 clip_horizontal = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL];
u32 clip_vertical = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL];
u32 clip_width = clip_horizontal >> 16;
u32 clip_height = clip_vertical >> 16;
u32 clip_x = clip_horizontal;
u32 clip_y = clip_vertical;
u32 context_dma_color[] = u32 context_dma_color[] =
{ {
rsx::method_registers[NV4097_SET_CONTEXT_DMA_COLOR_A], rsx::method_registers[NV4097_SET_CONTEXT_DMA_COLOR_A],
@ -149,7 +137,7 @@ void D3D12GSRender::prepare_render_targets(ID3D12GraphicsCommandList *copycmdlis
u32 offset_zeta = rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET]; u32 offset_zeta = rsx::method_registers[NV4097_SET_SURFACE_ZETA_OFFSET];
// FBO location has changed, previous data might be copied // FBO location has changed, previous data might be copied
u32 address_color[] = std::array<u32, 4> address_color =
{ {
rsx::get_address(offset_color[0], context_dma_color[0]), rsx::get_address(offset_color[0], context_dma_color[0]),
rsx::get_address(offset_color[1], context_dma_color[1]), rsx::get_address(offset_color[1], context_dma_color[1]),
@ -158,202 +146,71 @@ void D3D12GSRender::prepare_render_targets(ID3D12GraphicsCommandList *copycmdlis
}; };
u32 address_z = rsx::get_address(offset_zeta, m_context_dma_z); u32 address_z = rsx::get_address(offset_zeta, m_context_dma_z);
u32 clip_h_reg = rsx::method_registers[NV4097_SET_SURFACE_CLIP_HORIZONTAL];
u32 clip_v_reg = rsx::method_registers[NV4097_SET_SURFACE_CLIP_VERTICAL];
u32 target_reg = rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET];
// Exit early if there is no rtt changes // Exit early if there is no rtt changes
if (m_previous_address_a == address_color[0] && if (m_previous_color_address == address_color &&
m_previous_address_b == address_color[1] &&
m_previous_address_c == address_color[2] &&
m_previous_address_d == address_color[3] &&
m_previous_address_z == address_z && m_previous_address_z == address_z &&
m_surface.format == surface_format) m_surface.format == surface_format &&
m_previous_clip_horizontal == clip_h_reg &&
m_previous_clip_vertical == clip_v_reg &&
m_previous_target == target_reg)
return; return;
m_previous_address_a = address_color[0]; m_previous_color_address = address_color;
m_previous_address_b = address_color[1];
m_previous_address_c = address_color[2];
m_previous_address_d = address_color[3];
m_previous_address_z = address_z; m_previous_address_z = address_z;
m_previous_target = target_reg;
m_previous_clip_horizontal = clip_h_reg;
m_previous_clip_vertical = clip_v_reg;
if (m_surface.format != surface_format) if (m_surface.format != surface_format)
{
m_surface.unpack(surface_format); m_surface.unpack(surface_format);
m_surface.width = clip_width;
m_surface.height = clip_height;
}
// Make previous RTTs sampleable
for (unsigned i = 0; i < 4; i++)
{
if (m_rtts.bound_render_targets[i] == nullptr)
continue;
copycmdlist->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_render_targets[i], D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ));
}
// Reset bound data
memset(m_rtts.bound_render_targets_address, 0, 4 * sizeof(u32));
memset(m_rtts.bound_render_targets, 0, 4 * sizeof(ID3D12Resource *));
// Create/Reuse requested rtts
std::array<float, 4> clear_color = get_clear_color(rsx::method_registers[NV4097_SET_COLOR_CLEAR_VALUE]); std::array<float, 4> clear_color = get_clear_color(rsx::method_registers[NV4097_SET_COLOR_CLEAR_VALUE]);
for (u8 i : get_rtt_indexes(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])) m_rtts.prepare_render_target(copycmdlist, surface_format, clip_h_reg, clip_v_reg, target_reg, address_color, address_z, m_device.Get(), clear_color, 1.f, 0);
{
ComPtr<ID3D12Resource> old_render_target_resource;
m_rtts.bound_render_targets[i] = m_rtts.bind_address_as_render_targets(m_device.Get(), copycmdlist, address_color[i], clip_width, clip_height, m_surface.color_format,
clear_color, old_render_target_resource);
if (old_render_target_resource)
get_current_resource_storage().dirty_textures.push_back(old_render_target_resource);
m_rtts.bound_render_targets_address[i] = address_color[i];
}
// Same for depth buffer // write descriptors
if (m_rtts.bound_depth_stencil != nullptr) DXGI_FORMAT dxgi_format = get_color_surface_format(m_surface.color_format);
copycmdlist->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_GENERIC_READ));
m_rtts.bound_depth_stencil = nullptr;
m_rtts.bound_depth_stencil_address = 0;
if (!address_z)
return;
ComPtr<ID3D12Resource> old_depth_stencil_resource;
ID3D12Resource *ds = m_rtts.bind_address_as_depth_stencil(m_device.Get(), copycmdlist, address_z, clip_width, clip_height, m_surface.depth_format, 1., 0, old_depth_stencil_resource);
if (old_depth_stencil_resource)
get_current_resource_storage().dirty_textures.push_back(old_depth_stencil_resource);
m_rtts.bound_depth_stencil_address = address_z;
m_rtts.bound_depth_stencil = ds;
}
size_t render_targets::bind_render_targets(ID3D12Device *device, u32 color_format, D3D12_CPU_DESCRIPTOR_HANDLE handle)
{
DXGI_FORMAT dxgi_format = get_color_surface_format(color_format);
D3D12_RENDER_TARGET_VIEW_DESC rtt_view_desc = {}; D3D12_RENDER_TARGET_VIEW_DESC rtt_view_desc = {};
rtt_view_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; rtt_view_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
rtt_view_desc.Format = dxgi_format; rtt_view_desc.Format = dxgi_format;
m_rtts.current_rtts_handle = CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().render_targets_descriptors_heap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)get_current_resource_storage().render_targets_descriptors_heap_index * g_descriptor_stride_rtv);
size_t rtt_index = 0; size_t rtt_index = 0;
for (u8 i : get_rtt_indexes(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET])) for (u8 i : get_rtt_indexes(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]))
{ {
if (bound_render_targets[i] == nullptr) if (std::get<1>(m_rtts.m_bound_render_targets[i]) == nullptr)
continue; continue;
device->CreateRenderTargetView(bound_render_targets[i], &rtt_view_desc, m_device->CreateRenderTargetView(std::get<1>(m_rtts.m_bound_render_targets[i]), &rtt_view_desc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(handle).Offset((INT)rtt_index * g_descriptor_stride_rtv)); CD3DX12_CPU_DESCRIPTOR_HANDLE(m_rtts.current_rtts_handle).Offset((INT)rtt_index * g_descriptor_stride_rtv));
rtt_index++; rtt_index++;
} }
return rtt_index; get_current_resource_storage().render_targets_descriptors_heap_index += rtt_index;
}
size_t render_targets::bind_depth_stencil(ID3D12Device *device, u32 depth_format, D3D12_CPU_DESCRIPTOR_HANDLE handle) if (std::get<1>(m_rtts.m_bound_depth_stencil) == nullptr)
{ return;
if (!bound_depth_stencil) m_rtts.current_ds_handle = CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().depth_stencil_descriptor_heap->GetCPUDescriptorHandleForHeapStart())
return 0; .Offset((INT)get_current_resource_storage().depth_stencil_descriptor_heap_index * g_descriptor_stride_rtv);
get_current_resource_storage().depth_stencil_descriptor_heap_index += 1;
D3D12_DEPTH_STENCIL_VIEW_DESC depth_stencil_view_desc = {}; D3D12_DEPTH_STENCIL_VIEW_DESC depth_stencil_view_desc = {};
depth_stencil_view_desc.Format = get_depth_stencil_surface_format(depth_format); depth_stencil_view_desc.Format = get_depth_stencil_surface_format(m_surface.depth_format);
depth_stencil_view_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; depth_stencil_view_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
device->CreateDepthStencilView(bound_depth_stencil, &depth_stencil_view_desc, handle); m_device->CreateDepthStencilView(std::get<1>(m_rtts.m_bound_depth_stencil), &depth_stencil_view_desc, m_rtts.current_ds_handle);
return 1;
} }
void D3D12GSRender::set_rtt_and_ds(ID3D12GraphicsCommandList *command_list) void D3D12GSRender::set_rtt_and_ds(ID3D12GraphicsCommandList *command_list)
{ {
CD3DX12_CPU_DESCRIPTOR_HANDLE handle = CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().render_targets_descriptors_heap->GetCPUDescriptorHandleForHeapStart()) UINT num_rtt = get_num_rtt(rsx::method_registers[NV4097_SET_SURFACE_COLOR_TARGET]);
.Offset((INT)get_current_resource_storage().render_targets_descriptors_heap_index * g_descriptor_stride_rtv); D3D12_CPU_DESCRIPTOR_HANDLE* rtt_handle = (num_rtt > 0) ? &m_rtts.current_rtts_handle : nullptr;
size_t num_rtt = m_rtts.bind_render_targets(m_device.Get(), m_surface.color_format, handle); D3D12_CPU_DESCRIPTOR_HANDLE* ds_handle = (std::get<1>(m_rtts.m_bound_depth_stencil) != nullptr) ? &m_rtts.current_ds_handle : nullptr;
get_current_resource_storage().render_targets_descriptors_heap_index += num_rtt; command_list->OMSetRenderTargets((UINT)num_rtt, rtt_handle, true, ds_handle);
CD3DX12_CPU_DESCRIPTOR_HANDLE depth_stencil_handle = CD3DX12_CPU_DESCRIPTOR_HANDLE(get_current_resource_storage().depth_stencil_descriptor_heap->GetCPUDescriptorHandleForHeapStart())
.Offset((INT)get_current_resource_storage().depth_stencil_descriptor_heap_index * g_descriptor_stride_rtv);
size_t num_ds = m_rtts.bind_depth_stencil(m_device.Get(), m_surface.depth_format, depth_stencil_handle);
get_current_resource_storage().depth_stencil_descriptor_heap_index += num_ds;
command_list->OMSetRenderTargets((UINT)num_rtt, num_rtt > 0 ? &handle : nullptr, !!num_rtt,
num_ds > 0 ? &depth_stencil_handle : nullptr);
} }
ID3D12Resource *render_targets::bind_address_as_render_targets(ID3D12Device *device, ID3D12GraphicsCommandList *cmdList, u32 address, void render_targets::init(ID3D12Device *device)
size_t width, size_t height, u8 surfaceColorFormat, const std::array<float, 4> &clear_color, ComPtr<ID3D12Resource> &dirtyRTT)
{ {
DXGI_FORMAT dxgi_format = get_color_surface_format(surfaceColorFormat);
auto It = render_targets_storage.find(address);
// TODO: Check if format and size match
if (It != render_targets_storage.end())
{
ComPtr<ID3D12Resource> rtt;
rtt = It->second.Get();
if (rtt->GetDesc().Format == dxgi_format && rtt->GetDesc().Width == width && rtt->GetDesc().Height == height)
{
cmdList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(rtt.Get(), D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_RENDER_TARGET));
return rtt.Get();
}
render_targets_storage.erase(address);
dirtyRTT = rtt;
}
ComPtr<ID3D12Resource> rtt;
LOG_WARNING(RSX, "Creating RTT");
D3D12_CLEAR_VALUE clear_color_value = {};
clear_color_value.Format = dxgi_format;
clear_color_value.Color[0] = clear_color[0];
clear_color_value.Color[1] = clear_color[1];
clear_color_value.Color[2] = clear_color[2];
clear_color_value.Color[3] = clear_color[3];
device->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Tex2D(dxgi_format, (UINT)width, (UINT)height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET),
D3D12_RESOURCE_STATE_RENDER_TARGET,
&clear_color_value,
IID_PPV_ARGS(rtt.GetAddressOf())
);
render_targets_storage[address] = rtt;
std::wstring name = L"rtt_@" + std::to_wstring(address);
rtt->SetName(name.c_str());
return rtt.Get();
}
ID3D12Resource * render_targets::bind_address_as_depth_stencil(ID3D12Device * device, ID3D12GraphicsCommandList * cmdList, u32 address, size_t width, size_t height, u8 surfaceDepthFormat, float depthClear, u8 stencilClear, ComPtr<ID3D12Resource> &dirtyDS)
{
auto It = depth_stencil_storage.find(address);
// TODO: Check if surface depth format match
if (It != depth_stencil_storage.end())
{
ComPtr<ID3D12Resource> ds = It->second;
if (ds->GetDesc().Width == width && ds->GetDesc().Height == height)
{
// set the resource as depth write
cmdList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(ds.Get(), D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_DEPTH_WRITE));
return ds.Get();
}
// If size doesn't match, remove ds from cache
depth_stencil_storage.erase(address);
dirtyDS = ds;
}
D3D12_CLEAR_VALUE clear_depth_value = {};
clear_depth_value.DepthStencil.Depth = depthClear;
DXGI_FORMAT dxgi_format = get_depth_stencil_typeless_surface_format(surfaceDepthFormat);
clear_depth_value.Format = get_depth_stencil_surface_clear_format(surfaceDepthFormat);
ComPtr<ID3D12Resource> new_depth_stencil;
device->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Tex2D(dxgi_format, (UINT)width, (UINT)height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL),
D3D12_RESOURCE_STATE_DEPTH_WRITE,
&clear_depth_value,
IID_PPV_ARGS(new_depth_stencil.GetAddressOf())
);
depth_stencil_storage[address] = new_depth_stencil;
std::wstring name = L"ds_@" + std::to_wstring(address);
new_depth_stencil->SetName(name.c_str());
return new_depth_stencil.Get();
}
void render_targets::init(ID3D12Device *device)//, u8 surfaceDepthFormat, size_t width, size_t height, float clearColor[4], float clearDepth)
{
memset(bound_render_targets_address, 0, 4 * sizeof(u32));
memset(bound_render_targets, 0, 4 * sizeof(ID3D12Resource*));
bound_depth_stencil = nullptr;
bound_depth_stencil_address = 0;
g_descriptor_stride_rtv = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); g_descriptor_stride_rtv = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
} }
@ -366,7 +223,7 @@ namespace
size_t download_to_readback_buffer( size_t download_to_readback_buffer(
ID3D12Device *device, ID3D12Device *device,
ID3D12GraphicsCommandList * command_list, ID3D12GraphicsCommandList * command_list,
data_heap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> &readback_heap, data_heap &readback_heap,
ID3D12Resource * color_surface, ID3D12Resource * color_surface,
int color_surface_format int color_surface_format
) )
@ -390,23 +247,20 @@ namespace
} }
size_t buffer_size = row_pitch * clip_h; size_t buffer_size = row_pitch * clip_h;
assert(readback_heap.can_alloc(buffer_size)); size_t heap_offset = readback_heap.alloc<D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
size_t heap_offset = readback_heap.alloc(buffer_size);
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(color_surface, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE)); command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(color_surface, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE));
command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(readback_heap.m_heap, { heap_offset, { dxgi_format, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0, command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(readback_heap.get_heap(), { heap_offset, { dxgi_format, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(color_surface, 0), nullptr); &CD3DX12_TEXTURE_COPY_LOCATION(color_surface, 0), nullptr);
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(color_surface, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET)); command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(color_surface, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET));
return heap_offset; return heap_offset;
} }
void copy_readback_buffer_to_dest(void *dest, data_heap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> &readback_heap, size_t offset_in_heap, size_t dst_pitch, size_t src_pitch, size_t height) void copy_readback_buffer_to_dest(void *dest, data_heap &readback_heap, size_t offset_in_heap, size_t dst_pitch, size_t src_pitch, size_t height)
{ {
void *buffer;
// TODO: Use exact range // TODO: Use exact range
CHECK_HRESULT(readback_heap.m_heap->Map(0, nullptr, &buffer)); void *mapped_buffer = readback_heap.map<void>(offset_in_heap);
void *mapped_buffer = (char*)buffer + offset_in_heap;
for (unsigned row = 0; row < height; row++) for (unsigned row = 0; row < height; row++)
{ {
u32 *casted_dest = (u32*)((char*)dest + row * dst_pitch); u32 *casted_dest = (u32*)((char*)dest + row * dst_pitch);
@ -414,7 +268,7 @@ namespace
for (unsigned col = 0; col < src_pitch / 4; col++) for (unsigned col = 0; col < src_pitch / 4; col++)
*casted_dest++ = se_storage<u32>::swap(*casted_src++); *casted_dest++ = se_storage<u32>::swap(*casted_src++);
} }
readback_heap.m_heap->Unmap(0, nullptr); readback_heap.unmap();
} }
void wait_for_command_queue(ID3D12Device *device, ID3D12CommandQueue *command_queue) void wait_for_command_queue(ID3D12Device *device, ID3D12CommandQueue *command_queue)
@ -474,13 +328,11 @@ void D3D12GSRender::copy_render_target_to_dma_location()
if (m_context_dma_z && rpcs3::state.config.rsx.opengl.write_depth_buffer) if (m_context_dma_z && rpcs3::state.config.rsx.opengl.write_depth_buffer)
{ {
size_t uav_size = clip_w * clip_h * 2; size_t uav_size = clip_w * clip_h * 2;
assert(m_uav_heap.can_alloc(uav_size));
size_t heap_offset = m_uav_heap.alloc(uav_size);
CHECK_HRESULT( CHECK_HRESULT(
m_device->CreatePlacedResource( m_device->CreateCommittedResource(
m_uav_heap.m_heap, &D3D12_HEAP_PROPERTIES{D3D12_HEAP_TYPE_DEFAULT},
heap_offset, D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8_UNORM, clip_w, clip_h, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8_UNORM, clip_w, clip_h, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS),
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
nullptr, nullptr,
@ -497,7 +349,7 @@ void D3D12GSRender::copy_render_target_to_dma_location()
shader_resource_view_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; shader_resource_view_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
shader_resource_view_desc.Texture2D.MipLevels = 1; shader_resource_view_desc.Texture2D.MipLevels = 1;
shader_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; shader_resource_view_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
m_device->CreateShaderResourceView(m_rtts.bound_depth_stencil, &shader_resource_view_desc, m_device->CreateShaderResourceView(std::get<1>(m_rtts.m_bound_depth_stencil), &shader_resource_view_desc,
CD3DX12_CPU_DESCRIPTOR_HANDLE(descriptor_heap->GetCPUDescriptorHandleForHeapStart())); CD3DX12_CPU_DESCRIPTOR_HANDLE(descriptor_heap->GetCPUDescriptorHandleForHeapStart()));
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {}; D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = {};
uav_desc.Format = DXGI_FORMAT_R8_UNORM; uav_desc.Format = DXGI_FORMAT_R8_UNORM;
@ -506,7 +358,7 @@ void D3D12GSRender::copy_render_target_to_dma_location()
CD3DX12_CPU_DESCRIPTOR_HANDLE(descriptor_heap->GetCPUDescriptorHandleForHeapStart()).Offset(1, g_descriptor_stride_srv_cbv_uav)); CD3DX12_CPU_DESCRIPTOR_HANDLE(descriptor_heap->GetCPUDescriptorHandleForHeapStart()).Offset(1, g_descriptor_stride_srv_cbv_uav));
// Convert // Convert
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_GENERIC_READ)); get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_depth_stencil), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_GENERIC_READ));
get_current_resource_storage().command_list->SetPipelineState(m_convertPSO); get_current_resource_storage().command_list->SetPipelineState(m_convertPSO);
get_current_resource_storage().command_list->SetComputeRootSignature(m_convertRootSignature); get_current_resource_storage().command_list->SetComputeRootSignature(m_convertRootSignature);
@ -516,12 +368,12 @@ void D3D12GSRender::copy_render_target_to_dma_location()
D3D12_RESOURCE_BARRIER barriers[] = D3D12_RESOURCE_BARRIER barriers[] =
{ {
CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_DEPTH_WRITE), CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_depth_stencil), D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_DEPTH_WRITE),
CD3DX12_RESOURCE_BARRIER::UAV(depth_format_conversion_buffer.Get()), CD3DX12_RESOURCE_BARRIER::UAV(depth_format_conversion_buffer.Get()),
}; };
get_current_resource_storage().command_list->ResourceBarrier(2, barriers); get_current_resource_storage().command_list->ResourceBarrier(2, barriers);
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(depth_format_conversion_buffer.Get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE)); get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(depth_format_conversion_buffer.Get(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE));
get_current_resource_storage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(m_readback_resources.m_heap, { depth_buffer_offset_in_heap,{ DXGI_FORMAT_R8_UNORM, (UINT)clip_w, (UINT)clip_h, 1, (UINT)depth_row_pitch } }), 0, 0, 0, get_current_resource_storage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(m_readback_resources.get_heap(), { depth_buffer_offset_in_heap,{ DXGI_FORMAT_R8_UNORM, (UINT)clip_w, (UINT)clip_h, 1, (UINT)depth_row_pitch } }), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(depth_format_conversion_buffer.Get(), 0), nullptr); &CD3DX12_TEXTURE_COPY_LOCATION(depth_format_conversion_buffer.Get(), 0), nullptr);
invalidate_address(address_z); invalidate_address(address_z);
@ -536,7 +388,7 @@ void D3D12GSRender::copy_render_target_to_dma_location()
{ {
if (!address_color[i]) if (!address_color[i])
continue; continue;
color_buffer_offset_in_heap[i] = download_to_readback_buffer(m_device.Get(), get_current_resource_storage().command_list.Get(), m_readback_resources, m_rtts.bound_render_targets[i], m_surface.color_format); color_buffer_offset_in_heap[i] = download_to_readback_buffer(m_device.Get(), get_current_resource_storage().command_list.Get(), m_readback_resources, std::get<1>(m_rtts.m_bound_render_targets[i]), m_surface.color_format);
invalidate_address(address_color[i]); invalidate_address(address_color[i]);
need_transfer = true; need_transfer = true;
} }
@ -555,10 +407,7 @@ void D3D12GSRender::copy_render_target_to_dma_location()
{ {
auto ptr = vm::base(address_z); auto ptr = vm::base(address_z);
char *depth_buffer = (char*)ptr; char *depth_buffer = (char*)ptr;
void *buffer; u8 *mapped_buffer = m_readback_resources.map<u8>(depth_buffer_offset_in_heap);
// TODO: Use exact range
CHECK_HRESULT(m_readback_resources.m_heap->Map(0, nullptr, &buffer));
unsigned char *mapped_buffer = (unsigned char*)buffer + depth_buffer_offset_in_heap;
for (unsigned row = 0; row < (unsigned)clip_h; row++) for (unsigned row = 0; row < (unsigned)clip_h; row++)
{ {
@ -571,7 +420,7 @@ void D3D12GSRender::copy_render_target_to_dma_location()
depth_buffer[4 * (row * clip_w + i) + 3] = c; depth_buffer[4 * (row * clip_w + i) + 3] = c;
} }
} }
m_readback_resources.m_heap->Unmap(0, nullptr); m_readback_resources.unmap();
} }
size_t srcPitch, dstPitch; size_t srcPitch, dstPitch;
@ -613,7 +462,7 @@ void D3D12GSRender::copy_render_target_to_dma_location()
void D3D12GSRender::copy_render_targets_to_memory(void *buffer, u8 rtt) void D3D12GSRender::copy_render_targets_to_memory(void *buffer, u8 rtt)
{ {
size_t heap_offset = download_to_readback_buffer(m_device.Get(), get_current_resource_storage().command_list.Get(), m_readback_resources, m_rtts.bound_render_targets[rtt], m_surface.color_format); size_t heap_offset = download_to_readback_buffer(m_device.Get(), get_current_resource_storage().command_list.Get(), m_readback_resources, std::get<1>(m_rtts.m_bound_render_targets[rtt]), m_surface.color_format);
CHECK_HRESULT(get_current_resource_storage().command_list->Close()); CHECK_HRESULT(get_current_resource_storage().command_list->Close());
m_command_queue->ExecuteCommandLists(1, (ID3D12CommandList**)get_current_resource_storage().command_list.GetAddressOf()); m_command_queue->ExecuteCommandLists(1, (ID3D12CommandList**)get_current_resource_storage().command_list.GetAddressOf());
@ -651,14 +500,13 @@ void D3D12GSRender::copy_depth_buffer_to_memory(void *buffer)
size_t row_pitch = align(clip_w * 4, 256); size_t row_pitch = align(clip_w * 4, 256);
size_t buffer_size = row_pitch * clip_h; size_t buffer_size = row_pitch * clip_h;
assert(m_readback_resources.can_alloc(buffer_size)); size_t heap_offset = m_readback_resources.alloc<D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
size_t heap_offset = m_readback_resources.alloc(buffer_size);
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE)); get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_depth_stencil), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE));
get_current_resource_storage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(m_readback_resources.m_heap, { heap_offset,{ DXGI_FORMAT_R32_TYPELESS, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0, get_current_resource_storage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(m_readback_resources.get_heap(), { heap_offset,{ DXGI_FORMAT_R32_TYPELESS, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(m_rtts.bound_depth_stencil, 0), nullptr); &CD3DX12_TEXTURE_COPY_LOCATION(std::get<1>(m_rtts.m_bound_depth_stencil), 0), nullptr);
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE)); get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_depth_stencil), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE));
CHECK_HRESULT(get_current_resource_storage().command_list->Close()); CHECK_HRESULT(get_current_resource_storage().command_list->Close());
m_command_queue->ExecuteCommandLists(1, (ID3D12CommandList**)get_current_resource_storage().command_list.GetAddressOf()); m_command_queue->ExecuteCommandLists(1, (ID3D12CommandList**)get_current_resource_storage().command_list.GetAddressOf());
@ -667,9 +515,7 @@ void D3D12GSRender::copy_depth_buffer_to_memory(void *buffer)
wait_for_command_queue(m_device.Get(), m_command_queue.Get()); wait_for_command_queue(m_device.Get(), m_command_queue.Get());
m_readback_resources.m_get_pos = m_readback_resources.get_current_put_pos_minus_one(); m_readback_resources.m_get_pos = m_readback_resources.get_current_put_pos_minus_one();
void *temp_buffer; void *mapped_buffer = m_readback_resources.map<void>(heap_offset);
CHECK_HRESULT(m_readback_resources.m_heap->Map(0, nullptr, &temp_buffer));
void *mapped_buffer = (char*)temp_buffer + heap_offset;
for (unsigned row = 0; row < clip_h; row++) for (unsigned row = 0; row < clip_h; row++)
{ {
u32 *casted_dest = (u32*)((char*)buffer + row * clip_w * 4); u32 *casted_dest = (u32*)((char*)buffer + row * clip_w * 4);
@ -677,7 +523,7 @@ void D3D12GSRender::copy_depth_buffer_to_memory(void *buffer)
for (unsigned col = 0; col < row_pitch / 4; col++) for (unsigned col = 0; col < row_pitch / 4; col++)
*casted_dest++ = *casted_src++; *casted_dest++ = *casted_src++;
} }
m_readback_resources.m_heap->Unmap(0, nullptr); m_readback_resources.unmap();
} }
@ -689,14 +535,13 @@ void D3D12GSRender::copy_stencil_buffer_to_memory(void *buffer)
size_t row_pitch = align(clip_w * 4, 256); size_t row_pitch = align(clip_w * 4, 256);
size_t buffer_size = row_pitch * clip_h; size_t buffer_size = row_pitch * clip_h;
assert(m_readback_resources.can_alloc(buffer_size)); size_t heap_offset = m_readback_resources.alloc<D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
size_t heap_offset = m_readback_resources.alloc(buffer_size);
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE)); get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_depth_stencil), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE));
get_current_resource_storage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(m_readback_resources.m_heap, { heap_offset, { DXGI_FORMAT_R8_TYPELESS, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0, get_current_resource_storage().command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(m_readback_resources.get_heap(), { heap_offset, { DXGI_FORMAT_R8_TYPELESS, (UINT)clip_w, (UINT)clip_h, 1, (UINT)row_pitch } }), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(m_rtts.bound_depth_stencil, 1), nullptr); &CD3DX12_TEXTURE_COPY_LOCATION(std::get<1>(m_rtts.m_bound_depth_stencil), 1), nullptr);
get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_rtts.bound_depth_stencil, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE)); get_current_resource_storage().command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(std::get<1>(m_rtts.m_bound_depth_stencil), D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE));
CHECK_HRESULT(get_current_resource_storage().command_list->Close()); CHECK_HRESULT(get_current_resource_storage().command_list->Close());
m_command_queue->ExecuteCommandLists(1, (ID3D12CommandList**)get_current_resource_storage().command_list.GetAddressOf()); m_command_queue->ExecuteCommandLists(1, (ID3D12CommandList**)get_current_resource_storage().command_list.GetAddressOf());
@ -705,9 +550,7 @@ void D3D12GSRender::copy_stencil_buffer_to_memory(void *buffer)
wait_for_command_queue(m_device.Get(), m_command_queue.Get()); wait_for_command_queue(m_device.Get(), m_command_queue.Get());
m_readback_resources.m_get_pos = m_readback_resources.get_current_put_pos_minus_one(); m_readback_resources.m_get_pos = m_readback_resources.get_current_put_pos_minus_one();
void *temp_buffer; void *mapped_buffer = m_readback_resources.map<void>(heap_offset);
CHECK_HRESULT(m_readback_resources.m_heap->Map(0, nullptr, &temp_buffer));
void *mapped_buffer = (char*)temp_buffer + heap_offset;
for (unsigned row = 0; row < clip_h; row++) for (unsigned row = 0; row < clip_h; row++)
{ {
char *casted_dest = (char*)buffer + row * clip_w; char *casted_dest = (char*)buffer + row * clip_w;
@ -715,7 +558,7 @@ void D3D12GSRender::copy_stencil_buffer_to_memory(void *buffer)
for (unsigned col = 0; col < row_pitch; col++) for (unsigned col = 0; col < row_pitch; col++)
*casted_dest++ = *casted_src++; *casted_dest++ = *casted_src++;
} }
m_readback_resources.m_heap->Unmap(0, nullptr); m_readback_resources.unmap();
} }
#endif #endif

View file

@ -1,30 +1,297 @@
#pragma once #pragma once
#include <utility>
#include <d3d12.h> #include <d3d12.h>
#include "d3dx12.h"
struct render_targets #include "D3D12Formats.h"
#include <gsl.h>
namespace rsx
{ {
INT g_descriptor_stride_rtv; namespace
std::unordered_map<u32, ComPtr<ID3D12Resource> > render_targets_storage; {
ID3D12Resource *bound_render_targets[4]; std::vector<u8> get_rtt_indexes(u8 color_target)
u32 bound_render_targets_address[4]; {
std::unordered_map<u32, ComPtr<ID3D12Resource> > depth_stencil_storage; switch (color_target)
ID3D12Resource *bound_depth_stencil; {
u32 bound_depth_stencil_address; case CELL_GCM_SURFACE_TARGET_NONE: return{};
case CELL_GCM_SURFACE_TARGET_0: return{ 0 };
case CELL_GCM_SURFACE_TARGET_1: return{ 1 };
case CELL_GCM_SURFACE_TARGET_MRT1: return{ 0, 1 };
case CELL_GCM_SURFACE_TARGET_MRT2: return{ 0, 1, 2 };
case CELL_GCM_SURFACE_TARGET_MRT3: return{ 0, 1, 2, 3 };
}
throw EXCEPTION("Wrong color_target (%d)", color_target);
}
}
size_t bind_render_targets(ID3D12Device *, u32 color_format, D3D12_CPU_DESCRIPTOR_HANDLE); template<typename Traits>
size_t bind_depth_stencil(ID3D12Device *, u32 depth_format, D3D12_CPU_DESCRIPTOR_HANDLE); struct surface_store
{
private:
using surface_storage_type = typename Traits::surface_storage_type;
using surface_type = typename Traits::surface_type;
using command_list_type = typename Traits::command_list_type;
std::unordered_map<u32, surface_storage_type> m_render_targets_storage = {};
std::unordered_map<u32, surface_storage_type> m_depth_stencil_storage = {};
public:
std::array<std::tuple<u32, surface_type>, 4> m_bound_render_targets = {};
std::tuple<u32, surface_type> m_bound_depth_stencil = {};
std::list<surface_storage_type> invalidated_resources;
surface_store() = default;
~surface_store() = default;
surface_store(const surface_store&) = delete;
private:
/** /**
* If render target already exists at address, issue state change operation on cmdList. * If render target already exists at address, issue state change operation on cmdList.
* Otherwise create one with width, height, clearColor info. * Otherwise create one with width, height, clearColor info.
* returns the corresponding render target resource. * returns the corresponding render target resource.
*/ */
ID3D12Resource *bind_address_as_render_targets(ID3D12Device *device, ID3D12GraphicsCommandList *cmdList, u32 address, template <typename ...Args>
size_t width, size_t height, u8 surfaceColorFormat, const std::array<float, 4> &clearColor, ComPtr<ID3D12Resource> &dirtyDS); gsl::not_null<surface_type> bind_address_as_render_targets(
command_list_type command_list,
u32 address,
u8 surface_color_format, size_t width, size_t height,
Args&&... extra_params)
{
auto It = m_render_targets_storage.find(address);
// TODO: Fix corner cases
// This doesn't take overlapping surface(s) into account.
// Invalidated surface(s) should also copy their content to the new resources.
if (It != m_render_targets_storage.end())
{
surface_storage_type &rtt = It->second;
if (Traits::rtt_has_format_width_height(rtt, surface_color_format, width, height))
{
Traits::prepare_rtt_for_drawing(command_list, rtt.Get());
return rtt.Get();
}
invalidated_resources.push_back(std::move(rtt));
m_render_targets_storage.erase(address);
}
ID3D12Resource *bind_address_as_depth_stencil(ID3D12Device *device, ID3D12GraphicsCommandList *cmdList, u32 address, m_render_targets_storage[address] = Traits::create_new_render_target(address, surface_color_format, width, height, std::forward<Args>(extra_params)...);
size_t width, size_t height, u8 surfaceDepthFormat, float depthClear, u8 stencilClear, ComPtr<ID3D12Resource> &dirtyDS); return m_render_targets_storage[address].Get();
}
template <typename ...Args>
gsl::not_null<surface_type> bind_address_as_depth_stencil(
command_list_type command_list,
u32 address,
u8 surface_depth_format, size_t width, size_t height,
Args&&... extra_params)
{
auto It = m_depth_stencil_storage.find(address);
if (It != m_depth_stencil_storage.end())
{
surface_storage_type &ds = It->second;
if (Traits::ds_has_format_width_height(ds, surface_depth_format, width, height))
{
Traits::prepare_ds_for_drawing(command_list, ds.Get());
return ds.Get();
}
invalidated_resources.push_back(std::move(ds));
m_depth_stencil_storage.erase(address);
}
m_depth_stencil_storage[address] = Traits::create_new_depth_stencil(address, surface_depth_format, width, height, std::forward<Args>(extra_params)...);
return m_depth_stencil_storage[address].Get();
}
public:
template <typename ...Args>
void prepare_render_target(
command_list_type command_list,
u32 set_surface_format_reg,
u32 clip_horizontal_reg, u32 clip_vertical_reg,
u32 set_surface_target,
const std::array<u32, 4> &surface_addresses, u32 address_z,
Args&&... extra_params)
{
u32 clip_width = clip_horizontal_reg >> 16;
u32 clip_height = clip_vertical_reg >> 16;
u32 clip_x = clip_horizontal_reg;
u32 clip_y = clip_vertical_reg;
rsx::surface_info surface = {};
surface.unpack(set_surface_format_reg);
// Make previous RTTs sampleable
for (std::tuple<u32, surface_type> &rtt : m_bound_render_targets)
{
if (std::get<1>(rtt) != nullptr)
Traits::prepare_rtt_for_sampling(command_list, std::get<1>(rtt));
rtt = std::make_tuple(0, nullptr);
}
// Create/Reuse requested rtts
for (u8 surface_index : get_rtt_indexes(set_surface_target))
{
if (surface_addresses[surface_index] == 0)
continue;
m_bound_render_targets[surface_index] = std::make_tuple(surface_addresses[surface_index],
bind_address_as_render_targets(command_list, surface_addresses[surface_index], surface.color_format, clip_width, clip_height, std::forward<Args>(extra_params)...));
}
// Same for depth buffer
if (std::get<1>(m_bound_depth_stencil) != nullptr)
Traits::prepare_ds_for_sampling(command_list, std::get<1>(m_bound_depth_stencil));
m_bound_depth_stencil = std::make_tuple(0, nullptr);
if (!address_z)
return;
m_bound_depth_stencil = std::make_tuple(address_z,
bind_address_as_depth_stencil(command_list, address_z, surface.depth_format, clip_width, clip_height, std::forward<Args>(extra_params)...));
}
surface_type get_texture_from_render_target_if_applicable(u32 address)
{
// TODO: Handle texture that overlaps one (or several) surface.
// Handle texture conversion
// FIXME: Disgaea 3 loading screen seems to use a subset of a surface. It's not properly handled here.
// Note: not const because conversions/resolve/... can happen
auto It = m_render_targets_storage.find(address);
if (It != m_render_targets_storage.end())
return It->second.Get();
return surface_type();
}
surface_type get_texture_from_depth_stencil_if_applicable(u32 address)
{
// TODO: Same as above although there wasn't any game using corner case for DS yet.
auto It = m_depth_stencil_storage.find(address);
if (It != m_depth_stencil_storage.end())
return It->second.Get();
return surface_type();
}
};
}
struct render_target_traits
{
using surface_storage_type = ComPtr<ID3D12Resource>;
using surface_type = ID3D12Resource*;
using command_list_type = gsl::not_null<ID3D12GraphicsCommandList*>;
static
ComPtr<ID3D12Resource> create_new_render_target(
u32 address,
u8 surface_color_format, size_t width, size_t height,
gsl::not_null<ID3D12Device*> device, const std::array<float, 4> &clear_color, float, u8)
{
DXGI_FORMAT dxgi_format = get_color_surface_format(surface_color_format);
ComPtr<ID3D12Resource> rtt;
LOG_WARNING(RSX, "Creating RTT");
D3D12_CLEAR_VALUE clear_color_value = {};
clear_color_value.Format = dxgi_format;
clear_color_value.Color[0] = clear_color[0];
clear_color_value.Color[1] = clear_color[1];
clear_color_value.Color[2] = clear_color[2];
clear_color_value.Color[3] = clear_color[3];
device->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Tex2D(dxgi_format, (UINT)width, (UINT)height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET),
D3D12_RESOURCE_STATE_RENDER_TARGET,
&clear_color_value,
IID_PPV_ARGS(rtt.GetAddressOf())
);
std::wstring name = L"rtt_@" + std::to_wstring(address);
rtt->SetName(name.c_str());
return rtt;
}
static
void prepare_rtt_for_drawing(
gsl::not_null<ID3D12GraphicsCommandList*> command_list,
ID3D12Resource* rtt)
{
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(rtt, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_RENDER_TARGET));
}
static
void prepare_rtt_for_sampling(
gsl::not_null<ID3D12GraphicsCommandList*> command_list,
ID3D12Resource* rtt)
{
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(rtt, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_GENERIC_READ));
}
static
ComPtr<ID3D12Resource> create_new_depth_stencil(
u32 address,
u8 surfaceDepthFormat, size_t width, size_t height,
gsl::not_null<ID3D12Device*> device, const std::array<float, 4>& , float clear_depth, u8 clear_stencil)
{
D3D12_CLEAR_VALUE clear_depth_value = {};
clear_depth_value.DepthStencil.Depth = clear_depth;
clear_depth_value.DepthStencil.Stencil = clear_stencil;
DXGI_FORMAT dxgi_format = get_depth_stencil_typeless_surface_format(surfaceDepthFormat);
clear_depth_value.Format = get_depth_stencil_surface_clear_format(surfaceDepthFormat);
ComPtr<ID3D12Resource> new_depth_stencil;
device->CreateCommittedResource(
&CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT),
D3D12_HEAP_FLAG_NONE,
&CD3DX12_RESOURCE_DESC::Tex2D(dxgi_format, (UINT)width, (UINT)height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL),
D3D12_RESOURCE_STATE_DEPTH_WRITE,
&clear_depth_value,
IID_PPV_ARGS(new_depth_stencil.GetAddressOf())
);
std::wstring name = L"ds_@" + std::to_wstring(address);
new_depth_stencil->SetName(name.c_str());
return new_depth_stencil;
}
static
void prepare_ds_for_drawing(
gsl::not_null<ID3D12GraphicsCommandList*> command_list,
ID3D12Resource* ds)
{
// set the resource as depth write
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(ds, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_DEPTH_WRITE));
}
static
void prepare_ds_for_sampling(
gsl::not_null<ID3D12GraphicsCommandList*> command_list,
ID3D12Resource* ds)
{
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(ds, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_GENERIC_READ));
}
static
bool rtt_has_format_width_height(const ComPtr<ID3D12Resource> &rtt, u8 surface_color_format, size_t width, size_t height)
{
DXGI_FORMAT dxgi_format = get_color_surface_format(surface_color_format);
return rtt->GetDesc().Format == dxgi_format && rtt->GetDesc().Width == width && rtt->GetDesc().Height == height;
}
static
bool ds_has_format_width_height(const ComPtr<ID3D12Resource> &rtt, u8 surface_depth_stencil_format, size_t width, size_t height)
{
//TODO: Check format
return rtt->GetDesc().Width == width && rtt->GetDesc().Height == height;
}
};
struct render_targets : public rsx::surface_store<render_target_traits>
{
INT g_descriptor_stride_rtv;
D3D12_CPU_DESCRIPTOR_HANDLE current_rtts_handle;
D3D12_CPU_DESCRIPTOR_HANDLE current_ds_handle;
void init(ID3D12Device *device); void init(ID3D12Device *device);
}; };

View file

@ -49,7 +49,7 @@ ComPtr<ID3D12Resource> upload_single_texture(
const rsx::texture &texture, const rsx::texture &texture,
ID3D12Device *device, ID3D12Device *device,
ID3D12GraphicsCommandList *command_list, ID3D12GraphicsCommandList *command_list,
data_heap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> &texture_buffer_heap) data_heap &texture_buffer_heap)
{ {
size_t w = texture.width(), h = texture.height(); size_t w = texture.width(), h = texture.height();
size_t depth = texture.depth(); size_t depth = texture.depth();
@ -60,14 +60,11 @@ ComPtr<ID3D12Resource> upload_single_texture(
DXGI_FORMAT dxgi_format = get_texture_format(format); DXGI_FORMAT dxgi_format = get_texture_format(format);
size_t buffer_size = get_placed_texture_storage_size(texture, 256); size_t buffer_size = get_placed_texture_storage_size(texture, 256);
assert(texture_buffer_heap.can_alloc(buffer_size)); size_t heap_offset = texture_buffer_heap.alloc<D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
size_t heap_offset = texture_buffer_heap.alloc(buffer_size);
void *buffer; void *mapped_buffer = texture_buffer_heap.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
CHECK_HRESULT(texture_buffer_heap.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
std::vector<MipmapLevelInfo> mipInfos = upload_placed_texture(texture, 256, mapped_buffer); std::vector<MipmapLevelInfo> mipInfos = upload_placed_texture(texture, 256, mapped_buffer);
texture_buffer_heap.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); texture_buffer_heap.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
ComPtr<ID3D12Resource> result; ComPtr<ID3D12Resource> result;
CHECK_HRESULT(device->CreateCommittedResource( CHECK_HRESULT(device->CreateCommittedResource(
@ -83,7 +80,7 @@ ComPtr<ID3D12Resource> upload_single_texture(
for (const MipmapLevelInfo mli : mipInfos) for (const MipmapLevelInfo mli : mipInfos)
{ {
command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(result.Get(), (UINT)mip_level), 0, 0, 0, command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(result.Get(), (UINT)mip_level), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(texture_buffer_heap.m_heap, { heap_offset + mli.offset, { dxgi_format, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr); &CD3DX12_TEXTURE_COPY_LOCATION(texture_buffer_heap.get_heap(), { heap_offset + mli.offset, { dxgi_format, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr);
mip_level++; mip_level++;
} }
@ -97,7 +94,7 @@ ComPtr<ID3D12Resource> upload_single_texture(
void update_existing_texture( void update_existing_texture(
const rsx::texture &texture, const rsx::texture &texture,
ID3D12GraphicsCommandList *command_list, ID3D12GraphicsCommandList *command_list,
data_heap<ID3D12Resource, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT> &texture_buffer_heap, data_heap &texture_buffer_heap,
ID3D12Resource *existing_texture) ID3D12Resource *existing_texture)
{ {
size_t w = texture.width(), h = texture.height(); size_t w = texture.width(), h = texture.height();
@ -106,21 +103,18 @@ void update_existing_texture(
DXGI_FORMAT dxgi_format = get_texture_format(format); DXGI_FORMAT dxgi_format = get_texture_format(format);
size_t buffer_size = get_placed_texture_storage_size(texture, 256); size_t buffer_size = get_placed_texture_storage_size(texture, 256);
assert(texture_buffer_heap.can_alloc(buffer_size)); size_t heap_offset = texture_buffer_heap.alloc<D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT>(buffer_size);
size_t heap_offset = texture_buffer_heap.alloc(buffer_size);
void *buffer; void *mapped_buffer = texture_buffer_heap.map<void>(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
CHECK_HRESULT(texture_buffer_heap.m_heap->Map(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size), &buffer));
void *mapped_buffer = (char*)buffer + heap_offset;
std::vector<MipmapLevelInfo> mipInfos = upload_placed_texture(texture, 256, mapped_buffer); std::vector<MipmapLevelInfo> mipInfos = upload_placed_texture(texture, 256, mapped_buffer);
texture_buffer_heap.m_heap->Unmap(0, &CD3DX12_RANGE(heap_offset, heap_offset + buffer_size)); texture_buffer_heap.unmap(CD3DX12_RANGE(heap_offset, heap_offset + buffer_size));
command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(existing_texture, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_COPY_DEST)); command_list->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(existing_texture, D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_COPY_DEST));
size_t miplevel = 0; size_t miplevel = 0;
for (const MipmapLevelInfo mli : mipInfos) for (const MipmapLevelInfo mli : mipInfos)
{ {
command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(existing_texture, (UINT)miplevel), 0, 0, 0, command_list->CopyTextureRegion(&CD3DX12_TEXTURE_COPY_LOCATION(existing_texture, (UINT)miplevel), 0, 0, 0,
&CD3DX12_TEXTURE_COPY_LOCATION(texture_buffer_heap.m_heap, { heap_offset + mli.offset,{ dxgi_format, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr); &CD3DX12_TEXTURE_COPY_LOCATION(texture_buffer_heap.get_heap(), { heap_offset + mli.offset,{ dxgi_format, (UINT)mli.width, (UINT)mli.height, 1, (UINT)mli.rowPitch } }), nullptr);
miplevel++; miplevel++;
} }
@ -173,25 +167,22 @@ void D3D12GSRender::upload_and_bind_textures(ID3D12GraphicsCommandList *command_
bool is_swizzled = !(textures[i].format() & CELL_GCM_TEXTURE_LN); bool is_swizzled = !(textures[i].format() & CELL_GCM_TEXTURE_LN);
ID3D12Resource *vram_texture; ID3D12Resource *vram_texture;
std::unordered_map<u32, ComPtr<ID3D12Resource> >::const_iterator ItRTT = m_rtts.render_targets_storage.find(texaddr);
std::unordered_map<u32, ComPtr<ID3D12Resource> >::const_iterator ItDS = m_rtts.depth_stencil_storage.find(texaddr);
std::pair<texture_entry, ComPtr<ID3D12Resource> > *cached_texture = m_texture_cache.find_data_if_available(texaddr); std::pair<texture_entry, ComPtr<ID3D12Resource> > *cached_texture = m_texture_cache.find_data_if_available(texaddr);
bool is_render_target = false, is_depth_stencil_texture = false; bool is_render_target = false, is_depth_stencil_texture = false;
if (ItRTT != m_rtts.render_targets_storage.end())
if (vram_texture = m_rtts.get_texture_from_render_target_if_applicable(texaddr))
{ {
vram_texture = ItRTT->second.Get();
is_render_target = true; is_render_target = true;
} }
else if (ItDS != m_rtts.depth_stencil_storage.end()) else if (vram_texture = m_rtts.get_texture_from_depth_stencil_if_applicable(texaddr))
{ {
vram_texture = ItDS->second.Get();
is_depth_stencil_texture = true; is_depth_stencil_texture = true;
} }
else if (cached_texture != nullptr && (cached_texture->first == texture_entry(format, w, h, textures[i].mipmap()))) else if (cached_texture != nullptr && (cached_texture->first == texture_entry(format, w, h, textures[i].mipmap())))
{ {
if (cached_texture->first.m_is_dirty) if (cached_texture->first.m_is_dirty)
{ {
update_existing_texture(textures[i], command_list, m_texture_upload_data, cached_texture->second.Get()); update_existing_texture(textures[i], command_list, m_buffer_data, cached_texture->second.Get());
m_texture_cache.protect_data(texaddr, texaddr, get_texture_size(textures[i])); m_texture_cache.protect_data(texaddr, texaddr, get_texture_size(textures[i]));
} }
vram_texture = cached_texture->second.Get(); vram_texture = cached_texture->second.Get();
@ -200,7 +191,7 @@ void D3D12GSRender::upload_and_bind_textures(ID3D12GraphicsCommandList *command_
{ {
if (cached_texture != nullptr) if (cached_texture != nullptr)
get_current_resource_storage().dirty_textures.push_back(m_texture_cache.remove_from_cache(texaddr)); get_current_resource_storage().dirty_textures.push_back(m_texture_cache.remove_from_cache(texaddr));
ComPtr<ID3D12Resource> tex = upload_single_texture(textures[i], m_device.Get(), command_list, m_texture_upload_data); ComPtr<ID3D12Resource> tex = upload_single_texture(textures[i], m_device.Get(), command_list, m_buffer_data);
std::wstring name = L"texture_@" + std::to_wstring(texaddr); std::wstring name = L"texture_@" + std::to_wstring(texaddr);
tex->SetName(name.c_str()); tex->SetName(name.c_str());
vram_texture = tex.Get(); vram_texture = tex.Get();

View file

@ -28,12 +28,7 @@ std::pair<ID3DBlob *, ID3DBlob *> compileF32toU8CS()
ID3DBlob *bytecode; ID3DBlob *bytecode;
Microsoft::WRL::ComPtr<ID3DBlob> errorBlob; Microsoft::WRL::ComPtr<ID3DBlob> errorBlob;
HRESULT hr = wrapD3DCompile(shaderCode, strlen(shaderCode), "test", nullptr, nullptr, "main", "cs_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()); CHECK_HRESULT(wrapD3DCompile(shaderCode, strlen(shaderCode), "test", nullptr, nullptr, "main", "cs_5_0", 0, 0, &bytecode, errorBlob.GetAddressOf()));
if (hr != S_OK)
{
const char *tmp = (const char*)errorBlob->GetBufferPointer();
LOG_ERROR(RSX, tmp);
}
CD3DX12_DESCRIPTOR_RANGE descriptorRange[] = CD3DX12_DESCRIPTOR_RANGE descriptorRange[] =
{ {
// Textures // Textures
@ -47,13 +42,7 @@ std::pair<ID3DBlob *, ID3DBlob *> compileF32toU8CS()
ID3DBlob *rootSignatureBlob; ID3DBlob *rootSignatureBlob;
hr = wrapD3D12SerializeRootSignature(&CD3DX12_ROOT_SIGNATURE_DESC(1, &RP), D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob); CHECK_HRESULT(wrapD3D12SerializeRootSignature(&CD3DX12_ROOT_SIGNATURE_DESC(1, &RP), D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob));
if (hr != S_OK)
{
const char *tmp = (const char*)errorBlob->GetBufferPointer();
LOG_ERROR(RSX, tmp);
}
return std::make_pair(bytecode, rootSignatureBlob); return std::make_pair(bytecode, rootSignatureBlob);
} }
@ -78,12 +67,7 @@ void D3D12GSRender::Shader::Init(ID3D12Device *device, ID3D12CommandQueue *gfxco
Microsoft::WRL::ComPtr<ID3DBlob> fsBytecode; Microsoft::WRL::ComPtr<ID3DBlob> fsBytecode;
Microsoft::WRL::ComPtr<ID3DBlob> errorBlob; Microsoft::WRL::ComPtr<ID3DBlob> errorBlob;
HRESULT hr = wrapD3DCompile(fsCode, strlen(fsCode), "test", nullptr, nullptr, "main", "ps_5_0", 0, 0, &fsBytecode, errorBlob.GetAddressOf()); CHECK_HRESULT(wrapD3DCompile(fsCode, strlen(fsCode), "test", nullptr, nullptr, "main", "ps_5_0", 0, 0, &fsBytecode, errorBlob.GetAddressOf()));
if (hr != S_OK)
{
const char *tmp = (const char*)errorBlob->GetBufferPointer();
LOG_ERROR(RSX, tmp);
}
const char *vsCode = STRINGIFY( const char *vsCode = STRINGIFY(
struct VertexInput \n struct VertexInput \n
@ -108,12 +92,7 @@ void D3D12GSRender::Shader::Init(ID3D12Device *device, ID3D12CommandQueue *gfxco
); );
Microsoft::WRL::ComPtr<ID3DBlob> vsBytecode; Microsoft::WRL::ComPtr<ID3DBlob> vsBytecode;
hr = wrapD3DCompile(vsCode, strlen(vsCode), "test", nullptr, nullptr, "main", "vs_5_0", 0, 0, &vsBytecode, errorBlob.GetAddressOf()); CHECK_HRESULT(wrapD3DCompile(vsCode, strlen(vsCode), "test", nullptr, nullptr, "main", "vs_5_0", 0, 0, &vsBytecode, errorBlob.GetAddressOf()));
if (hr != S_OK)
{
const char *tmp = (const char*)errorBlob->GetBufferPointer();
LOG_ERROR(RSX, tmp);
}
D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {};
psoDesc.PS.BytecodeLength = fsBytecode->GetBufferSize(); psoDesc.PS.BytecodeLength = fsBytecode->GetBufferSize();
@ -163,14 +142,8 @@ void D3D12GSRender::Shader::Init(ID3D12Device *device, ID3D12CommandQueue *gfxco
Microsoft::WRL::ComPtr<ID3DBlob> rootSignatureBlob; Microsoft::WRL::ComPtr<ID3DBlob> rootSignatureBlob;
hr = wrapD3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob); CHECK_HRESULT(wrapD3D12SerializeRootSignature(&rootSignatureDesc, D3D_ROOT_SIGNATURE_VERSION_1, &rootSignatureBlob, &errorBlob));
if (hr != S_OK) CHECK_HRESULT(device->CreateRootSignature(0, rootSignatureBlob->GetBufferPointer(), rootSignatureBlob->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature)));
{
const char *tmp = (const char*)errorBlob->GetBufferPointer();
LOG_ERROR(RSX, tmp);
}
hr = device->CreateRootSignature(0, rootSignatureBlob->GetBufferPointer(), rootSignatureBlob->GetBufferSize(), IID_PPV_ARGS(&m_rootSignature));
psoDesc.pRootSignature = m_rootSignature; psoDesc.pRootSignature = m_rootSignature;
psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;

View file

@ -290,6 +290,7 @@ enum
CELL_GCM_TEXTURE_NEAREST_LINEAR = 5, CELL_GCM_TEXTURE_NEAREST_LINEAR = 5,
CELL_GCM_TEXTURE_LINEAR_LINEAR = 6, CELL_GCM_TEXTURE_LINEAR_LINEAR = 6,
CELL_GCM_TEXTURE_CONVOLUTION_MIN = 7, CELL_GCM_TEXTURE_CONVOLUTION_MIN = 7,
CELL_GCM_TEXTURE_UNKNOWN_MAG_FILTER = 4,
CELL_GCM_PRIMITIVE_POINTS = 1, CELL_GCM_PRIMITIVE_POINTS = 1,
CELL_GCM_PRIMITIVE_LINES = 2, CELL_GCM_PRIMITIVE_LINES = 2,