diff --git a/Utilities/types.h b/Utilities/types.h index d509e3ad76..5fdaf0dc31 100644 --- a/Utilities/types.h +++ b/Utilities/types.h @@ -26,6 +26,21 @@ union alignas(2) f16 { u16 _u16; u8 _u8[2]; + + explicit f16(u16 raw) + { + _u16 = raw; + } + + explicit operator float() const + { + // See http://stackoverflow.com/a/26779139 + // The conversion doesn't handle NaN/Inf + u32 raw = ((_u16 & 0x8000) << 16) | // Sign (just moved) + (((_u16 & 0x7c00) + 0x1C000) << 13) | // Exponent ( exp - 15 + 127) + ((_u16 & 0x03FF) << 13); // Mantissa + return (float&)raw; + } }; using f32 = float; diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.cpp b/rpcs3/Emu/RSX/Common/BufferUtils.cpp index 9d75ad4ce1..53f68dfa72 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.cpp +++ b/rpcs3/Emu/RSX/Common/BufferUtils.cpp @@ -109,34 +109,88 @@ void write_vertex_array_data_to_buffer(void *buffer, u32 first, u32 count, size_ } } -template -void expandIndexedTriangleFan(DstType *dst, const SrcType *src, size_t indexCount) +template +void uploadAsIt(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index) { - IndexType *typedDst = reinterpret_cast(dst); - const IndexType *typedSrc = reinterpret_cast(src); - for (unsigned i = 0; i < indexCount - 2; i++) + for (u32 i = 0; i < indexCount; ++i) { - typedDst[3 * i] = typedSrc[0]; - typedDst[3 * i + 1] = typedSrc[i + 2 - 1]; - typedDst[3 * i + 2] = typedSrc[i + 2]; + IndexType index = vm::ps3::_ref(address + i * sizeof(IndexType)); + (IndexType&)dst[i * sizeof(IndexType)] = index; + if (is_primitive_restart_enabled && index == (IndexType)-1) // Cut + continue; + max_index = MAX2(max_index, index); + min_index = MIN2(min_index, index); } } -template -void expandIndexedQuads(DstType *dst, const SrcType *src, size_t indexCount) +template +void expandIndexedTriangleFan(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index) +{ + for (unsigned i = 0; i < indexCount - 2; i++) + { + IndexType index0 = vm::ps3::_ref(address); + (IndexType&)dst[(3 * i) * sizeof(IndexType)] = index0; + IndexType index1 = vm::ps3::_ref(address + (i + 2 - 1) * sizeof(IndexType)); + (IndexType&)dst[(3 * i + 1) * sizeof(IndexType)] = index1; + IndexType index2 = vm::ps3::_ref(address + (i + 2) * sizeof(IndexType)); + (IndexType&)dst[(3 * i + 2) * sizeof(IndexType)] = index2; + + if (!is_primitive_restart_enabled || index0 != (IndexType)-1) // Cut + { + min_index = MIN2(min_index, index0); + max_index = MAX2(max_index, index0); + } + if (!is_primitive_restart_enabled || index1 != (IndexType)-1) // Cut + { + min_index = MIN2(min_index, index1); + max_index = MAX2(max_index, index1); + } + if (!is_primitive_restart_enabled || index2 != (IndexType)-1) // Cut + { + min_index = MIN2(min_index, index2); + max_index = MAX2(max_index, index2); + } + } +} + +template +void expandIndexedQuads(char *dst, u32 address, size_t indexCount, bool is_primitive_restart_enabled, u32 &min_index, u32 &max_index) { - IndexType *typedDst = reinterpret_cast(dst); - const IndexType *typedSrc = reinterpret_cast(src); for (unsigned i = 0; i < indexCount / 4; i++) { // First triangle - typedDst[6 * i] = typedSrc[4 * i]; - typedDst[6 * i + 1] = typedSrc[4 * i + 1]; - typedDst[6 * i + 2] = typedSrc[4 * i + 2]; + IndexType index0 = vm::ps3::_ref(address + 4 * i * sizeof(IndexType)); + (IndexType&)dst[(6 * i) * sizeof(IndexType)] = index0; + IndexType index1 = vm::ps3::_ref(address + (4 * i + 1) * sizeof(IndexType)); + (IndexType&)dst[(6 * i + 1) * sizeof(IndexType)] = index1; + IndexType index2 = vm::ps3::_ref(address + (4 * i + 2) * sizeof(IndexType)); + (IndexType&)dst[(6 * i + 2) * sizeof(IndexType)] = index2; // Second triangle - typedDst[6 * i + 3] = typedSrc[4 * i + 2]; - typedDst[6 * i + 4] = typedSrc[4 * i + 3]; - typedDst[6 * i + 5] = typedSrc[4 * i]; + (IndexType&)dst[(6 * i + 3) * sizeof(IndexType)] = index2; + IndexType index3 = vm::ps3::_ref(address + (4 * i + 3) * sizeof(IndexType)); + (IndexType&)dst[(6 * i + 4) * sizeof(IndexType)] = index3; + (IndexType&)dst[(6 * i + 5) * sizeof(IndexType)] = index0; + + if (!is_primitive_restart_enabled || index0 != (IndexType)-1) // Cut + { + min_index = MIN2(min_index, index0); + max_index = MAX2(max_index, index0); + } + if (!is_primitive_restart_enabled || index1 != (IndexType)-1) // Cut + { + min_index = MIN2(min_index, index1); + max_index = MAX2(max_index, index1); + } + if (!is_primitive_restart_enabled || index2 != (IndexType)-1) // Cut + { + min_index = MIN2(min_index, index2); + max_index = MAX2(max_index, index2); + } + if (!is_primitive_restart_enabled || index3 != (IndexType)-1) // Cut + { + min_index = MIN2(min_index, index3); + max_index = MAX2(max_index, index3); + } } } @@ -178,79 +232,84 @@ size_t getIndexCount(unsigned m_draw_mode, unsigned initial_index_count) } } - -void uploadIndexData(unsigned m_draw_mode, unsigned index_type, void* indexBuffer, void* bufferMap, unsigned element_count) +void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned draw_mode, unsigned first, unsigned count) { - if (indexBuffer != nullptr) + unsigned short *typedDst = (unsigned short *)(dst); + switch (draw_mode) { - switch (m_draw_mode) + case CELL_GCM_PRIMITIVE_TRIANGLE_FAN: + for (unsigned i = 0; i < (count - 2); i++) { - case CELL_GCM_PRIMITIVE_POINTS: - case CELL_GCM_PRIMITIVE_LINES: - case CELL_GCM_PRIMITIVE_LINE_LOOP: - case CELL_GCM_PRIMITIVE_LINE_STRIP: - case CELL_GCM_PRIMITIVE_TRIANGLES: - case CELL_GCM_PRIMITIVE_TRIANGLE_STRIP: - case CELL_GCM_PRIMITIVE_QUAD_STRIP: - case CELL_GCM_PRIMITIVE_POLYGON: + typedDst[3 * i] = first; + typedDst[3 * i + 1] = i + 2 - 1; + typedDst[3 * i + 2] = i + 2; + } + return; + case CELL_GCM_PRIMITIVE_QUADS: + for (unsigned i = 0; i < count / 4; i++) { - size_t indexSize = (index_type == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32) ? 4 : 2; - memcpy(bufferMap, indexBuffer, indexSize * element_count); - return; - } - case CELL_GCM_PRIMITIVE_TRIANGLE_FAN: - switch (index_type) - { - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: - expandIndexedTriangleFan(bufferMap, indexBuffer, element_count); - return; - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: - expandIndexedTriangleFan(bufferMap, indexBuffer, element_count); - return; - default: - abort(); - return; - } - case CELL_GCM_PRIMITIVE_QUADS: - switch (index_type) - { - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: - expandIndexedQuads(bufferMap, indexBuffer, element_count); - return; - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: - expandIndexedQuads(bufferMap, indexBuffer, element_count); - return; - default: - abort(); - return; - } + // First triangle + typedDst[6 * i] = 4 * i + first; + typedDst[6 * i + 1] = 4 * i + 1 + first; + typedDst[6 * i + 2] = 4 * i + 2 + first; + // Second triangle + typedDst[6 * i + 3] = 4 * i + 2 + first; + typedDst[6 * i + 4] = 4 * i + 3 + first; + typedDst[6 * i + 5] = 4 * i + first; } + return; } - else +} + +void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count, unsigned &min_index, unsigned &max_index) +{ + u32 address = rsx::get_address(rsx::method_registers[NV4097_SET_INDEX_ARRAY_ADDRESS], rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] & 0xf); + u32 type = rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4; + + u32 type_size = type == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32 ? sizeof(u32) : sizeof(u16); + + u32 base_offset = rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_OFFSET]; + u32 base_index = 0;//rsx::method_registers[NV4097_SET_VERTEX_DATA_BASE_INDEX]; + bool is_primitive_restart_enabled = !!rsx::method_registers[NV4097_SET_RESTART_INDEX_ENABLE]; + + switch (m_draw_mode) { - unsigned short *typedDst = static_cast(bufferMap); - switch (m_draw_mode) + case CELL_GCM_PRIMITIVE_POINTS: + case CELL_GCM_PRIMITIVE_LINES: + case CELL_GCM_PRIMITIVE_LINE_LOOP: + case CELL_GCM_PRIMITIVE_LINE_STRIP: + case CELL_GCM_PRIMITIVE_TRIANGLES: + case CELL_GCM_PRIMITIVE_TRIANGLE_STRIP: + case CELL_GCM_PRIMITIVE_QUAD_STRIP: + case CELL_GCM_PRIMITIVE_POLYGON: + switch (type) { - case CELL_GCM_PRIMITIVE_TRIANGLE_FAN: - for (unsigned i = 0; i < (element_count - 2); i++) - { - typedDst[3 * i] = 0; - typedDst[3 * i + 1] = i + 2 - 1; - typedDst[3 * i + 2] = i + 2; - } + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: + uploadAsIt(dst, address + (first + base_index) * sizeof(u32), count, is_primitive_restart_enabled, min_index, max_index); return; - case CELL_GCM_PRIMITIVE_QUADS: - for (unsigned i = 0; i < element_count / 4; i++) - { - // First triangle - typedDst[6 * i] = 4 * i; - typedDst[6 * i + 1] = 4 * i + 1; - typedDst[6 * i + 2] = 4 * i + 2; - // Second triangle - typedDst[6 * i + 3] = 4 * i + 2; - typedDst[6 * i + 4] = 4 * i + 3; - typedDst[6 * i + 5] = 4 * i; - } + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: + uploadAsIt(dst, address + (first + base_index) * sizeof(u16), count, is_primitive_restart_enabled, min_index, max_index); + return; + } + return; + case CELL_GCM_PRIMITIVE_TRIANGLE_FAN: + switch (type) + { + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: + expandIndexedTriangleFan(dst, address + (first + base_index) * sizeof(u32), count, is_primitive_restart_enabled, min_index, max_index); + return; + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: + expandIndexedTriangleFan(dst, address + (first + base_index) * sizeof(u16), count, is_primitive_restart_enabled, min_index, max_index); + return; + } + case CELL_GCM_PRIMITIVE_QUADS: + switch (type) + { + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: + expandIndexedQuads(dst, address + (first + base_index) * sizeof(u32), count, is_primitive_restart_enabled, min_index, max_index); + return; + case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: + expandIndexedQuads(dst, address + (first + base_index) * sizeof(u16), count, is_primitive_restart_enabled, min_index, max_index); return; } } diff --git a/rpcs3/Emu/RSX/Common/BufferUtils.h b/rpcs3/Emu/RSX/Common/BufferUtils.h index e04bbc33f1..0294256b57 100644 --- a/rpcs3/Emu/RSX/Common/BufferUtils.h +++ b/rpcs3/Emu/RSX/Common/BufferUtils.h @@ -35,6 +35,13 @@ bool isNativePrimitiveMode(unsigned m_draw_mode); size_t getIndexCount(unsigned m_draw_mode, unsigned initial_index_count); /* - * Write index information to bufferMap + * Write count indexes starting at first to dst buffer. + * Returns min/max index found during the process. + * The function expands index buffer for non native primitive type. */ -void uploadIndexData(unsigned m_draw_mode, unsigned index_type, void* indexBuffer, void* bufferMap, unsigned element_count); \ No newline at end of file +void write_index_array_data_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count, unsigned &min_index, unsigned &max_index); + +/* +* Write index data needed to emulate non indexed non native primitive mode. +*/ +void write_index_array_for_non_indexed_non_native_primitive_to_buffer(char* dst, unsigned m_draw_mode, unsigned first, unsigned count); \ No newline at end of file diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp index 2005391f3b..78c66bc379 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Buffer.cpp @@ -118,13 +118,17 @@ void D3D12GSRender::load_vertex_data(u32 first, u32 count) vertex_draw_count += count; } - -void D3D12GSRender::upload_vertex_attributes() +void D3D12GSRender::upload_vertex_attributes(const std::vector > &vertex_ranges) { m_vertex_buffer_views.clear(); m_IASet.clear(); size_t inputSlot = 0; + size_t vertex_count = 0; + + for (const auto &pair : vertex_ranges) + vertex_count += pair.second; + // First array attribute for (int index = 0; index < rsx::limits::vertex_count; ++index) { @@ -136,14 +140,14 @@ void D3D12GSRender::upload_vertex_attributes() u32 type_size = rsx::get_vertex_type_size(info.type); u32 element_size = type_size * info.size; - size_t subBufferSize = element_size * vertex_draw_count; + size_t subBufferSize = element_size * vertex_count; assert(m_vertexIndexData.canAlloc(subBufferSize)); size_t heapOffset = m_vertexIndexData.alloc(subBufferSize); void *buffer; ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer)); void *bufferMap = (char*)buffer + heapOffset; - for (const auto &range : m_first_count_pairs) + for (const auto &range : vertex_ranges) { write_vertex_array_data_to_buffer(bufferMap, range.first, range.second, index, info); bufferMap = (char*)bufferMap + range.second * element_size; @@ -210,73 +214,11 @@ void D3D12GSRender::upload_vertex_attributes() IAElement.InstanceDataStepRate = 1; m_IASet.push_back(IAElement); } - m_first_count_pairs.clear(); } -D3D12_INDEX_BUFFER_VIEW D3D12GSRender::uploadIndexBuffers(bool indexed_draw) +void D3D12GSRender::load_vertex_index_data(u32 first, u32 count) { - D3D12_INDEX_BUFFER_VIEW indexBufferView = {}; - - // No need for index buffer - if (!indexed_draw && isNativePrimitiveMode(draw_mode)) - { - m_renderingInfo.m_indexed = false; - m_renderingInfo.m_count = vertex_draw_count; - m_renderingInfo.m_baseVertex = 0; - return indexBufferView; - } - m_renderingInfo.m_indexed = true; - - u32 indexed_type = rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4; - - // Index type - size_t indexSize; - if (!indexed_draw) - { - indexBufferView.Format = DXGI_FORMAT_R16_UINT; - indexSize = 2; - } - else - { - switch (indexed_type) - { - default: abort(); - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16: - indexBufferView.Format = DXGI_FORMAT_R16_UINT; - indexSize = 2; - break; - case CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32: - indexBufferView.Format = DXGI_FORMAT_R32_UINT; - indexSize = 4; - break; - } - } - - // Index count - m_renderingInfo.m_count = getIndexCount(draw_mode, indexed_draw ? (u32)(vertex_index_array.size() / indexSize) : vertex_draw_count); - - // Base vertex - if (!indexed_draw && isNativePrimitiveMode(draw_mode)) - m_renderingInfo.m_baseVertex = 0; - else - m_renderingInfo.m_baseVertex = 0; - - // Alloc - size_t subBufferSize = align(m_renderingInfo.m_count * indexSize, 64); - - assert(m_vertexIndexData.canAlloc(subBufferSize)); - size_t heapOffset = m_vertexIndexData.alloc(subBufferSize); - - void *buffer; - ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer)); - void *bufferMap = (char*)buffer + heapOffset; - uploadIndexData(draw_mode, indexed_type, indexed_draw ? vertex_index_array.data() : nullptr, bufferMap, indexed_draw ? (u32)(vertex_index_array.size() / indexSize) : vertex_draw_count); - m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize)); - m_timers.m_bufferUploadSize += subBufferSize; - indexBufferView.SizeInBytes = (UINT)subBufferSize; - indexBufferView.BufferLocation = m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset; - return indexBufferView; } void D3D12GSRender::setScaleOffset(size_t descriptorIndex) @@ -419,4 +361,83 @@ void D3D12GSRender::FillPixelShaderConstantsBuffer(size_t descriptorIndex) CD3DX12_CPU_DESCRIPTOR_HANDLE(getCurrentResourceStorage().m_descriptorsHeap->GetCPUDescriptorHandleForHeapStart()) .Offset((INT)descriptorIndex, g_descriptorStrideSRVCBVUAV)); } + +void D3D12GSRender::upload_vertex_index_data(ID3D12GraphicsCommandList *cmdlist) +{ + // Index count + m_renderingInfo.m_count = 0; + for (const auto &pair : m_first_count_pairs) + m_renderingInfo.m_count += getIndexCount(draw_mode, pair.second); + + if (!m_renderingInfo.m_indexed) + { + // Non indexed + upload_vertex_attributes(m_first_count_pairs); + cmdlist->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data()); + if (isNativePrimitiveMode(draw_mode)) + return; + // Handle non native primitive + + // Alloc + size_t subBufferSize = align(m_renderingInfo.m_count * sizeof(u16), 64); + assert(m_vertexIndexData.canAlloc(subBufferSize)); + size_t heapOffset = m_vertexIndexData.alloc(subBufferSize); + + void *buffer; + ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer)); + void *bufferMap = (char*)buffer + heapOffset; + size_t first = 0; + for (const auto &pair : m_first_count_pairs) + { + size_t element_count = getIndexCount(draw_mode, pair.second); + write_index_array_for_non_indexed_non_native_primitive_to_buffer((char*)bufferMap, draw_mode, first, pair.second); + bufferMap = (char*)bufferMap + element_count * sizeof(u16); + first += pair.second; + } + m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize)); + D3D12_INDEX_BUFFER_VIEW indexBufferView = { + m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset, + (UINT)subBufferSize, + DXGI_FORMAT_R16_UINT + }; + cmdlist->IASetIndexBuffer(&indexBufferView); + m_renderingInfo.m_indexed = true; + } + else + { + u32 indexed_type = rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4; + + // Index type + size_t indexSize = (indexed_type == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16) ? 2 : 4; + + // Alloc + size_t subBufferSize = align(m_renderingInfo.m_count * indexSize, 64); + assert(m_vertexIndexData.canAlloc(subBufferSize)); + size_t heapOffset = m_vertexIndexData.alloc(subBufferSize); + + void *buffer; + ThrowIfFailed(m_vertexIndexData.m_heap->Map(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize), (void**)&buffer)); + void *bufferMap = (char*)buffer + heapOffset; + u32 min_index = (u32)-1, max_index = 0; + for (const auto &pair : m_first_count_pairs) + { + size_t element_count = getIndexCount(draw_mode, pair.second); + write_index_array_data_to_buffer((char*)bufferMap, draw_mode, pair.first, pair.second, min_index, max_index); + bufferMap = (char*)bufferMap + element_count * indexSize; + } + m_vertexIndexData.m_heap->Unmap(0, &CD3DX12_RANGE(heapOffset, heapOffset + subBufferSize)); + D3D12_INDEX_BUFFER_VIEW indexBufferView = { + m_vertexIndexData.m_heap->GetGPUVirtualAddress() + heapOffset, + (UINT)subBufferSize, + (indexed_type == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT + }; + m_timers.m_bufferUploadSize += subBufferSize; + cmdlist->IASetIndexBuffer(&indexBufferView); + m_renderingInfo.m_indexed = true; + + upload_vertex_attributes({ std::make_pair(0, max_index + 1) }); + cmdlist->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data()); + } +} + #endif diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp index 43512c1003..f9c7cabd9a 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.cpp @@ -244,7 +244,7 @@ D3D12GSRender::D3D12GSRender() m_rtts.Init(m_device.Get()); m_constantsData.Init(m_device.Get(), 1024 * 1024 * 64, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); - m_vertexIndexData.Init(m_device.Get(), 1024 * 1024 * 256, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); + m_vertexIndexData.Init(m_device.Get(), 1024 * 1024 * 384, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); m_textureUploadData.Init(m_device.Get(), 1024 * 1024 * 512, D3D12_HEAP_TYPE_UPLOAD, D3D12_HEAP_FLAG_NONE); if (rpcs3::config.rsx.d3d12.overlay.value()) @@ -413,13 +413,7 @@ void D3D12GSRender::end() std::chrono::time_point vertexIndexDurationStart = std::chrono::system_clock::now(); if (!vertex_index_array.empty() || vertex_draw_count) - { - upload_vertex_attributes(); - const D3D12_INDEX_BUFFER_VIEW &indexBufferView = uploadIndexBuffers(!vertex_index_array.empty()); - getCurrentResourceStorage().m_commandList->IASetVertexBuffers(0, (UINT)m_vertex_buffer_views.size(), m_vertex_buffer_views.data()); - if (m_renderingInfo.m_indexed) - getCurrentResourceStorage().m_commandList->IASetIndexBuffer(&indexBufferView); - } + upload_vertex_index_data(getCurrentResourceStorage().m_commandList.Get()); std::chrono::time_point vertexIndexDurationEnd = std::chrono::system_clock::now(); m_timers.m_vertexIndexDuration += std::chrono::duration_cast(vertexIndexDurationEnd - vertexIndexDurationStart).count(); @@ -434,7 +428,7 @@ void D3D12GSRender::end() std::chrono::time_point programLoadEnd = std::chrono::system_clock::now(); m_timers.m_programLoadDuration += std::chrono::duration_cast(programLoadEnd - programLoadStart).count(); - getCurrentResourceStorage().m_commandList->SetGraphicsRootSignature(m_rootSignatures[m_PSO->second].Get()); + getCurrentResourceStorage().m_commandList->SetGraphicsRootSignature(m_rootSignatures[std::get<2>(*m_PSO)].Get()); getCurrentResourceStorage().m_commandList->OMSetStencilRef(rsx::method_registers[NV4097_SET_STENCIL_FUNC_REF]); std::chrono::time_point constantsDurationStart = std::chrono::system_clock::now(); @@ -448,15 +442,15 @@ void D3D12GSRender::end() std::chrono::time_point constantsDurationEnd = std::chrono::system_clock::now(); m_timers.m_constantsDuration += std::chrono::duration_cast(constantsDurationEnd - constantsDurationStart).count(); - getCurrentResourceStorage().m_commandList->SetPipelineState(m_PSO->first); + getCurrentResourceStorage().m_commandList->SetPipelineState(std::get<0>(*m_PSO)); std::chrono::time_point textureDurationStart = std::chrono::system_clock::now(); - if (m_PSO->second > 0) + if (std::get<2>(*m_PSO) > 0) { size_t usedTexture = UploadTextures(getCurrentResourceStorage().m_commandList.Get(), currentDescriptorIndex + 3); // Fill empty slots - for (; usedTexture < m_PSO->second; usedTexture++) + for (; usedTexture < std::get<2>(*m_PSO); usedTexture++) { D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; @@ -595,9 +589,9 @@ void D3D12GSRender::end() } if (m_renderingInfo.m_indexed) - getCurrentResourceStorage().m_commandList->DrawIndexedInstanced((UINT)m_renderingInfo.m_count, 1, 0, (UINT)m_renderingInfo.m_baseVertex, 0); + getCurrentResourceStorage().m_commandList->DrawIndexedInstanced((UINT)m_renderingInfo.m_count, 1, 0, 0, 0); else - getCurrentResourceStorage().m_commandList->DrawInstanced((UINT)m_renderingInfo.m_count, 1, (UINT)m_renderingInfo.m_baseVertex, 0); + getCurrentResourceStorage().m_commandList->DrawInstanced((UINT)m_renderingInfo.m_count, 1, 0, 0); vertex_index_array.clear(); std::chrono::time_point endDuration = std::chrono::system_clock::now(); @@ -610,7 +604,8 @@ void D3D12GSRender::end() m_commandQueueGraphic->ExecuteCommandLists(1, (ID3D12CommandList**)getCurrentResourceStorage().m_commandList.GetAddressOf()); getCurrentResourceStorage().setNewCommandList(); } - + m_first_count_pairs.clear(); + m_renderingInfo.m_indexed = false; thread::end(); } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h index b6dad40eb7..e8dd4f6d14 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12GSRender.h @@ -309,7 +309,7 @@ private: RSXFragmentProgram fragment_program; PipelineStateObjectCache m_cachePSO; - std::pair *m_PSO; + std::tuple, size_t> *m_PSO; std::unordered_map local_transform_constants; struct @@ -405,19 +405,18 @@ private: ResourceStorage &getNonCurrentResourceStorage(); // Constants storage - DataHeap m_constantsData; + DataHeap m_constantsData; // Vertex storage - DataHeap m_vertexIndexData; + DataHeap m_vertexIndexData; // Texture storage - DataHeap m_textureUploadData; - DataHeap m_UAVHeap; - DataHeap m_readbackResources; + DataHeap m_textureUploadData; + DataHeap m_UAVHeap; + DataHeap m_readbackResources; struct { bool m_indexed; /* > m_first_count_pairs; /** - * Upload all vertex attribute whose (first, count) info were previously accumulated. + * Upload all enabled vertex attributes for vertex in ranges described by vertex_ranges. + * A range in vertex_range is a pair whose first element is the index of the beginning of the + * range, and whose second element is the number of vertex in this range. */ - void upload_vertex_attributes(); - - /** - * Create index buffer for indexed rendering and non native primitive format if nedded, and - * update m_renderingInfo member accordingly. If m_renderingInfo::m_indexed is true, - * returns an index buffer view that can be passed to a command list. - */ - D3D12_INDEX_BUFFER_VIEW uploadIndexBuffers(bool indexed_draw = false); - + void upload_vertex_attributes(const std::vector > &vertex_ranges); void setScaleOffset(size_t descriptorIndex); void FillVertexShaderConstantsBuffer(size_t descriptorIndex); @@ -504,4 +503,5 @@ protected: virtual void flip(int buffer) override; virtual void load_vertex_data(u32 first, u32 count) override; + virtual void load_vertex_index_data(u32 first, u32 count) override; }; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp index e833baa2a2..ca03098028 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.cpp @@ -292,6 +292,9 @@ bool D3D12GSRender::LoadProgram() prop.Blend.RenderTarget[i].RenderTargetWriteMask = mask; prop.IASet = m_IASet; + if (!!rsx::method_registers[NV4097_SET_RESTART_INDEX_ENABLE]) + prop.CutValue = ((rsx::method_registers[NV4097_SET_INDEX_ARRAY_DMA] >> 4) == CELL_GCM_DRAW_INDEX_ARRAY_TYPE_32) ? + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF : D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF; m_PSO = m_cachePSO.getGraphicPipelineState(&vertex_program, &fragment_program, prop, std::make_pair(m_device.Get(), m_rootSignatures)); return m_PSO != nullptr; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h index fee5c8fd6a..721f33d9d7 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12PipelineState.h @@ -16,6 +16,7 @@ struct D3D12PipelineProperties unsigned numMRT : 3; D3D12_DEPTH_STENCIL_DESC DepthStencil; D3D12_RASTERIZER_DESC Rasterization; + D3D12_INDEX_BUFFER_STRIP_CUT_VALUE CutValue; bool operator==(const D3D12PipelineProperties &in) const { @@ -90,6 +91,7 @@ public: u32 id; ComPtr bytecode; + std::vector vertex_shader_inputs; std::vector FragmentConstantOffsetCache; size_t m_textureCount; @@ -103,11 +105,40 @@ public: void Compile(const std::string &code, enum class SHADER_TYPE st); }; +static +bool has_attribute(size_t attribute, const std::vector &desc) +{ + for (const auto &attribute_desc : desc) + { + if (attribute_desc.SemanticIndex == attribute) + return true; + } + return false; +} + +static +std::vector completes_IA_desc(const std::vector &desc, const std::vector &inputs) +{ + std::vector result(desc); + for (size_t attribute : inputs) + { + if (has_attribute(attribute, desc)) + continue; + D3D12_INPUT_ELEMENT_DESC extra_ia_desc = {}; + extra_ia_desc.SemanticIndex = (UINT)attribute; + extra_ia_desc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + extra_ia_desc.SemanticName = "TEXCOORD"; + extra_ia_desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + result.push_back(extra_ia_desc); + } + return result; +} + struct D3D12Traits { typedef Shader VertexProgramData; typedef Shader FragmentProgramData; - typedef std::pair PipelineData; + typedef std::tuple, size_t> PipelineData; typedef D3D12PipelineProperties PipelineProperties; typedef std::pair *> ExtraData; @@ -144,7 +175,7 @@ struct D3D12Traits D3D12VertexProgramDecompiler VS(RSXVP->data); std::string shaderCode = VS.Decompile(); vertexProgramData.Compile(shaderCode, Shader::SHADER_TYPE::SHADER_TYPE_VERTEX); - + vertexProgramData.vertex_shader_inputs = VS.input_slots; // TODO: This shouldn't use current dir std::string filename = "./VertexProgram" + std::to_string(ID) + ".hlsl"; fs::file(filename, fom::write | fom::create | fom::trunc).write(shaderCode.c_str(), shaderCode.size()); @@ -155,7 +186,7 @@ struct D3D12Traits PipelineData *BuildProgram(VertexProgramData &vertexProgramData, FragmentProgramData &fragmentProgramData, const PipelineProperties &pipelineProperties, const ExtraData& extraData) { - std::pair *result = new std::pair(); + std::tuple, size_t> *result = new std::tuple, size_t>(); D3D12_GRAPHICS_PIPELINE_STATE_DESC graphicPipelineStateDesc = {}; if (vertexProgramData.bytecode == nullptr) @@ -169,7 +200,7 @@ struct D3D12Traits graphicPipelineStateDesc.PS.pShaderBytecode = fragmentProgramData.bytecode->GetBufferPointer(); graphicPipelineStateDesc.pRootSignature = extraData.second[fragmentProgramData.m_textureCount].Get(); - result->second = fragmentProgramData.m_textureCount; + std::get<2>(*result) = fragmentProgramData.m_textureCount; graphicPipelineStateDesc.BlendState = pipelineProperties.Blend; graphicPipelineStateDesc.DepthStencilState = pipelineProperties.DepthStencil; @@ -181,20 +212,25 @@ struct D3D12Traits graphicPipelineStateDesc.RTVFormats[i] = pipelineProperties.RenderTargetsFormat; graphicPipelineStateDesc.DSVFormat = pipelineProperties.DepthStencilFormat; - graphicPipelineStateDesc.InputLayout.pInputElementDescs = pipelineProperties.IASet.data(); - graphicPipelineStateDesc.InputLayout.NumElements = (UINT)pipelineProperties.IASet.size(); + const std::vector &completed_IA_desc = completes_IA_desc(pipelineProperties.IASet, vertexProgramData.vertex_shader_inputs); + + graphicPipelineStateDesc.InputLayout.pInputElementDescs = completed_IA_desc.data(); + graphicPipelineStateDesc.InputLayout.NumElements = (UINT)completed_IA_desc.size(); graphicPipelineStateDesc.SampleDesc.Count = 1; graphicPipelineStateDesc.SampleMask = UINT_MAX; graphicPipelineStateDesc.NodeMask = 1; - extraData.first->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&result->first)); + graphicPipelineStateDesc.IBStripCutValue = pipelineProperties.CutValue; + + extraData.first->CreateGraphicsPipelineState(&graphicPipelineStateDesc, IID_PPV_ARGS(&std::get<0>(*result))); + std::get<1>(*result) = vertexProgramData.vertex_shader_inputs; return result; } static void DeleteProgram(PipelineData *ptr) { - ptr->first->Release(); + std::get<0>(*ptr)->Release(); delete ptr; } }; diff --git a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp index 15a7936588..78b6df4c7b 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12Texture.cpp @@ -130,7 +130,7 @@ ComPtr uploadSingleTexture( const rsx::texture &texture, ID3D12Device *device, ID3D12GraphicsCommandList *commandList, - DataHeap &textureBuffersHeap) + DataHeap &textureBuffersHeap) { ComPtr vramTexture; size_t w = texture.width(), h = texture.height(); @@ -181,7 +181,7 @@ static void updateExistingTexture( const rsx::texture &texture, ID3D12GraphicsCommandList *commandList, - DataHeap &textureBuffersHeap, + DataHeap &textureBuffersHeap, ID3D12Resource *existingTexture) { size_t w = texture.width(), h = texture.height(); diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp index 856a53de4c..0c2c986baa 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.cpp @@ -43,9 +43,13 @@ void D3D12VertexProgramDecompiler::insertInputs(std::stringstream & OS, const st for (const ParamType PT : inputs) { for (const ParamItem &PI : PT.items) + { OS << " " << PT.type << " " << PI.name << ": TEXCOORD" << PI.location << ";" << std::endl; + input_slots.push_back(PI.location); + } } OS << "};" << std::endl; + } void D3D12VertexProgramDecompiler::insertConstants(std::stringstream & OS, const std::vector & constants) @@ -132,6 +136,8 @@ void D3D12VertexProgramDecompiler::insertMainStart(std::stringstream & OS) OS << " " << PT.type << " " << PI.name; if (!PI.value.empty()) OS << " = " << PI.value; + else + OS << " = " << "float4(0., 0., 0., 0.);"; OS << ";" << std::endl; } } diff --git a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h index 45165fdf2c..bdc1655cd4 100644 --- a/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h +++ b/rpcs3/Emu/RSX/D3D12/D3D12VertexProgramDecompiler.h @@ -19,5 +19,6 @@ protected: virtual void insertMainStart(std::stringstream &OS); virtual void insertMainEnd(std::stringstream &OS); public: + std::vector input_slots; D3D12VertexProgramDecompiler(std::vector& data); }; diff --git a/rpcs3/Emu/RSX/RSXTexture.cpp b/rpcs3/Emu/RSX/RSXTexture.cpp index 066ac3b363..017b4e9146 100644 --- a/rpcs3/Emu/RSX/RSXTexture.cpp +++ b/rpcs3/Emu/RSX/RSXTexture.cpp @@ -144,9 +144,9 @@ namespace rsx return (method_registers[NV4097_SET_TEXTURE_CONTROL1 + (m_index * 8)]); } - u16 texture::bias() const + float texture::bias() const { - return ((method_registers[NV4097_SET_TEXTURE_FILTER + (m_index * 8)]) & 0x1fff); + return float(f16((method_registers[NV4097_SET_TEXTURE_FILTER + (m_index * 8)]) & 0x1fff)); } u8 texture::min_filter() const diff --git a/rpcs3/Emu/RSX/RSXTexture.h b/rpcs3/Emu/RSX/RSXTexture.h index 014d862340..20b0783360 100644 --- a/rpcs3/Emu/RSX/RSXTexture.h +++ b/rpcs3/Emu/RSX/RSXTexture.h @@ -44,7 +44,7 @@ namespace rsx u32 remap() const; // Filter - u16 bias() const; + float bias() const; u8 min_filter() const; u8 mag_filter() const; u8 convolution_filter() const; diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index c84c1714a3..2131b26d82 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -164,7 +164,7 @@ namespace rsx u32 transform_program[512 * 4] = {}; virtual void load_vertex_data(u32 first, u32 count); - void load_vertex_index_data(u32 first, u32 count); + virtual void load_vertex_index_data(u32 first, u32 count); public: u32 ioAddress, ioSize;