diff --git a/src/Cafe/HW/Latte/Core/FetchShader.cpp b/src/Cafe/HW/Latte/Core/FetchShader.cpp index 5933fe05..6da6100b 100644 --- a/src/Cafe/HW/Latte/Core/FetchShader.cpp +++ b/src/Cafe/HW/Latte/Core/FetchShader.cpp @@ -8,8 +8,12 @@ #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/ISA/LatteInstructions.h" +#include "HW/Latte/Renderer/Renderer.h" #include "util/containers/LookupTableL3.h" #include "util/helpers/fspinlock.h" +#if BOOST_OS_MACOS +#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" +#endif #include /* SHA1_DIGEST_LENGTH */ #include /* EVP_Digest */ @@ -71,7 +75,7 @@ uint32 LatteShaderRecompiler_getAttributeAlignment(LatteParsedFetchShaderAttribu return 4; } -void LatteShader_calculateFSKey(LatteFetchShader* fetchShader) +void LatteShader_calculateFSKey(LatteFetchShader* fetchShader, uint32* contextRegister) { uint64 key = 0; for (sint32 g = 0; g < fetchShader->bufferGroups.size(); g++) @@ -104,11 +108,25 @@ void LatteShader_calculateFSKey(LatteFetchShader* fetchShader) key = std::rotl(key, 8); key += (uint64)attrib->semanticId; key = std::rotl(key, 8); - key += (uint64)(attrib->offset & 3); - key = std::rotl(key, 2); + if (g_renderer->GetType() == RendererAPI::Metal) + key += (uint64)attrib->offset; + else + key += (uint64)(attrib->offset & 3); + key = std::rotl(key, 7); } } // todo - also hash invalid buffer groups? + + if (g_renderer->GetType() == RendererAPI::Metal) + { + for (sint32 g = 0; g < fetchShader->bufferGroups.size(); g++) + { + LatteParsedFetchShaderBufferGroup_t& group = fetchShader->bufferGroups[g]; + key += (uint64)group.attributeBufferIndex; + key = std::rotl(key, 5); + } + } + fetchShader->key = key; } @@ -146,8 +164,8 @@ void LatteFetchShader::CalculateFetchShaderVkHash() this->vkPipelineHashFragment = h; } -void LatteFetchShader::CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister) -{uint64 key = 0; +void LatteFetchShader::CheckIfVerticesNeedManualFetchMtl(uint32* contextRegister) +{ for (sint32 g = 0; g < bufferGroups.size(); g++) { LatteParsedFetchShaderBufferGroup_t& group = bufferGroups[g]; @@ -155,12 +173,16 @@ void LatteFetchShader::CalculateFetchShaderMtlObjectShaderHash(uint32* contextRe uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; uint32 bufferStride = (contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; - key += (uint64)bufferIndex; - key = std::rotl(key, 5); - key += (uint64)bufferStride; - key = std::rotl(key, 5); + if (bufferStride % 4 != 0) + mtlFetchVertexManually = true; + + for (sint32 f = 0; f < group.attribCount; f++) + { + auto& attr = group.attrib[f]; + if (attr.offset + GetMtlVertexFormatSize(attr.format) > bufferStride) + mtlFetchVertexManually = true; + } } - mtlShaderHashObject = key; } void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* parsedFetchShader, uint32* contextRegister, const LatteClauseInstruction_VTX* instr) @@ -343,9 +365,9 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach { // empty fetch shader, seen in Minecraft // these only make sense when vertex shader does not call FS? - LatteShader_calculateFSKey(newFetchShader); + LatteShader_calculateFSKey(newFetchShader, contextRegister); newFetchShader->CalculateFetchShaderVkHash(); - newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister); + newFetchShader->CheckIfVerticesNeedManualFetchMtl(contextRegister); return newFetchShader; } @@ -403,9 +425,9 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach } bufferGroup.vboStride = vboOffset; } - LatteShader_calculateFSKey(newFetchShader); + LatteShader_calculateFSKey(newFetchShader, contextRegister); newFetchShader->CalculateFetchShaderVkHash(); - newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister); + newFetchShader->CheckIfVerticesNeedManualFetchMtl(contextRegister); // register in cache // its possible that during multi-threaded shader cache loading, two identical (same hash) fetch shaders get created simultaneously diff --git a/src/Cafe/HW/Latte/Core/FetchShader.h b/src/Cafe/HW/Latte/Core/FetchShader.h index 9aeed6bd..1e580f43 100644 --- a/src/Cafe/HW/Latte/Core/FetchShader.h +++ b/src/Cafe/HW/Latte/Core/FetchShader.h @@ -47,16 +47,15 @@ struct LatteFetchShader uint64 vkPipelineHashFragment{}; // hash of all fetch shader state that influences the Vulkan graphics pipeline // Metal - uint64 mtlShaderHashObject{}; + bool mtlFetchVertexManually{}; // cache info CacheHash m_cacheHash{}; bool m_isRegistered{}; // if true, fetch shader is referenced by cache (RegisterInCache() succeeded) - void CalculateFetchShaderVkHash(); - void CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister); + void CheckIfVerticesNeedManualFetchMtl(uint32* contextRegister); uint64 getVkPipelineHashFragment() const { return vkPipelineHashFragment; }; diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index 0af3b577..bc1279c3 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -503,11 +503,21 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize, uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL); if (g_renderer->GetType() == RendererAPI::Metal) { - if (usesGeometryShader) + if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually) { - vsHash += _activeFetchShader->mtlShaderHashObject; + for (sint32 g = 0; g < _activeFetchShader->bufferGroups.size(); g++) + { + LatteParsedFetchShaderBufferGroup_t& group = _activeFetchShader->bufferGroups[g]; + uint32 bufferIndex = group.attributeBufferIndex; + uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; + uint32 bufferStride = (LatteGPUState.contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + + vsHash += (uint64)bufferStride; + vsHash = std::rotl(vsHash, 7); + } } - else + + if (!usesGeometryShader) { // Rasterization bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); @@ -524,6 +534,10 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize, if (rasterizationEnabled) vsHash += 51ULL; + + // Vertex fetch + if (_activeFetchShader->mtlFetchVertexManually) + vsHash += 349ULL; } } @@ -531,6 +545,7 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize, vsHash += tmp; auto primitiveType = LatteGPUState.contextNew.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE(); + // TODO: include always in the hash in case of geometry shader or rect shader if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS) { vsHash += 13ULL; diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index 615c8a5f..05ba6aba 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -3854,10 +3854,12 @@ static void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* sh void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader) { bool isRectVertexShader = (static_cast(shaderContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]) == LattePrimitiveMode::RECTS); + bool usesGeometryShader = (shaderContext->options->usesGeometryShader || isRectVertexShader); + bool fetchVertexManually = (usesGeometryShader || (shaderContext->fetchShader && shaderContext->fetchShader->mtlFetchVertexManually)); // Rasterization rasterizationEnabled = true; - if (shader->shaderType == LatteConst::ShaderType::Vertex && !(shaderContext->options->usesGeometryShader || isRectVertexShader)) + if (shader->shaderType == LatteConst::ShaderType::Vertex && !usesGeometryShader) { rasterizationEnabled = !shaderContext->contextRegistersNew->PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); @@ -3885,7 +3887,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, src->add("#include " _CRLF); src->add("using namespace metal;" _CRLF); // header part (definitions for inputs and outputs) - LatteDecompiler::emitHeader(shaderContext, isRectVertexShader, rasterizationEnabled); + LatteDecompiler::emitHeader(shaderContext, isRectVertexShader, fetchVertexManually, rasterizationEnabled); // helper functions LatteDecompiler_emitHelperFunctions(shaderContext, src); const char* functionType = ""; @@ -3893,21 +3895,32 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: - if (shaderContext->options->usesGeometryShader || isRectVertexShader) + if (fetchVertexManually) { // TODO: clean this up - // fetchVertex will modify vid in case of an indexed draw + // fetchVertex will modify vid in case of an object shader and an indexed draw // Vertex buffers std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS "; std::string vertexBuffers = "#define VERTEX_BUFFERS "; - std::string inputFetchDefinition = "VertexIn fetchVertex(thread uint& vid, device uint* indexBuffer, uchar indexType VERTEX_BUFFER_DEFINITIONS) {\n"; + std::string inputFetchDefinition = "VertexIn fetchVertex("; + if (usesGeometryShader) + inputFetchDefinition += "thread uint&"; + else + inputFetchDefinition += "uint"; + inputFetchDefinition += " vid, uint iid"; + if (usesGeometryShader) + inputFetchDefinition += ", device uint* indexBuffer, uchar indexType"; + inputFetchDefinition += " VERTEX_BUFFER_DEFINITIONS) {\n"; // Index buffer - inputFetchDefinition += "if (indexType == 1) // UShort\n"; - inputFetchDefinition += "vid = ((device ushort*)indexBuffer)[vid];\n"; - inputFetchDefinition += "else if (indexType == 2) // UInt\n"; - inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid];\n"; + if (usesGeometryShader) + { + inputFetchDefinition += "if (indexType == 1) // UShort\n"; + inputFetchDefinition += "vid = ((device ushort*)indexBuffer)[vid];\n"; + inputFetchDefinition += "else if (indexType == 2) // UInt\n"; + inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid];\n"; + } inputFetchDefinition += "VertexIn in;\n"; for (auto& bufferGroup : shaderContext->fetchShader->bufferGroups) @@ -3980,11 +3993,22 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, break; } + // Get the fetch type + std::string fetchTypeStr; + if (attr.fetchType == LatteConst::VertexFetchType2::VERTEX_DATA) + fetchTypeStr = "vid"; + else if (attr.fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA) + fetchTypeStr = "iid"; + else if (attr.fetchType == LatteConst::VertexFetchType2::NO_INDEX_OFFSET_DATA) + fetchTypeStr = "0"; // TODO: correct? + // Fetch the attribute - inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = ", semanticId); - inputFetchDefinition += fmt::format("uint4(*(device {}*)", formatName); + inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = uint4(uint", semanticId); + if (componentCount != 1) + inputFetchDefinition += fmt::format("{}", componentCount); + inputFetchDefinition += fmt::format("(*(device {}*)", formatName); inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex); - inputFetchDefinition += fmt::format(" + vid * {} + {})", bufferStride, attr.offset); + inputFetchDefinition += fmt::format(" + {} * {} + {}))", fetchTypeStr, bufferStride, attr.offset); for (uint8 i = 0; i < (4 - componentCount); i++) inputFetchDefinition += ", 0"; inputFetchDefinition += ");\n"; @@ -4014,7 +4038,10 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, src->add(vertexBuffers.c_str()); src->add("\n"); src->add(inputFetchDefinition.c_str()); + } + if (usesGeometryShader) + { functionType = "[[object, max_total_threads_per_threadgroup(VERTICES_PER_VERTEX_PRIMITIVE), max_total_threadgroups_per_mesh_grid(1)]]"; outputTypeName = "void"; } @@ -4038,20 +4065,33 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, } // start of main src->addFmt("{} {} main0(", functionType, outputTypeName); - LatteDecompiler::emitInputs(shaderContext, isRectVertexShader); + LatteDecompiler::emitInputs(shaderContext, isRectVertexShader, fetchVertexManually); src->add(") {" _CRLF); - if ((shaderContext->options->usesGeometryShader || isRectVertexShader) && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)) + if (fetchVertexManually && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)) { if (shader->shaderType == LatteConst::ShaderType::Vertex) { - // Calculate the imaginary vertex id - src->add("uint vid = tig * VERTICES_PER_VERTEX_PRIMITIVE + tid;" _CRLF); - src->add("uint iid = vid / verticesPerInstance;" _CRLF); - src->add("vid %= verticesPerInstance;" _CRLF); - // Fetch the input - src->add("VertexIn in = fetchVertex(vid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF); - // Output is defined as object payload - src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF); + if (usesGeometryShader) + { + // Calculate the imaginary vertex id + src->add("uint vid = tig * VERTICES_PER_VERTEX_PRIMITIVE + tid;" _CRLF); + src->add("uint iid = vid / verticesPerInstance;" _CRLF); + src->add("vid %= verticesPerInstance;" _CRLF); + + // Fetch the input + src->add("VertexIn in = fetchVertex(vid, iid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF); + + // Output is defined as object payload + src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF); + } + else + { + // Fetch the input + src->add("VertexIn in = fetchVertex(vid, iid VERTEX_BUFFERS);" _CRLF); + + if (rasterizationEnabled) + src->add("VertexOut out;" _CRLF); + } } else if (shader->shaderType == LatteConst::ShaderType::Geometry) { @@ -4258,11 +4298,11 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, } // TODO: is the if statement even needed? - if (shaderContext->options->usesGeometryShader || isRectVertexShader) + if (usesGeometryShader) { // import from geometry shader if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) - src->addFmt("{} = as_type(in.passParameterSem{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F); + src->addFmt("{} = bitCast(in.passParameterSem{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt("{} = in.passParameterSem{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F); else @@ -4306,7 +4346,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, src->add("out.pointSize = supportBuffer.pointSize;" _CRLF); } - if ((shaderContext->options->usesGeometryShader || isRectVertexShader) && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)) + if (usesGeometryShader && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)) { if (shader->shaderType == LatteConst::ShaderType::Vertex) { @@ -4346,7 +4386,7 @@ void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, src->add("out.position.z = (out.position.z + out.position.w) / 2.0;" _CRLF); // Return - if (!(shaderContext->options->usesGeometryShader || isRectVertexShader) || shader->shaderType == LatteConst::ShaderType::Pixel) + if (!usesGeometryShader || shader->shaderType == LatteConst::ShaderType::Pixel) src->add("return out;" _CRLF); } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 9770c595..5a2c54ac 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -143,7 +143,7 @@ namespace LatteDecompiler } } - static void _emitAttributes(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader) + static void _emitAttributes(LatteDecompilerShaderContext* decompilerContext, bool fetchVertexManually) { auto src = decompilerContext->shaderSource; std::string attributeNames; @@ -159,7 +159,7 @@ namespace LatteDecompiler cemu_assert_debug(decompilerContext->output->resourceMappingMTL.attributeMapping[i] >= 0); src->addFmt("uint4 attrDataSem{}", i); - if (decompilerContext->options->usesGeometryShader || isRectVertexShader) + if (fetchVertexManually) attributeNames += "#define ATTRIBUTE_NAME" + std::to_string((sint32)decompilerContext->output->resourceMappingMTL.attributeMapping[i]) + " attrDataSem" + std::to_string(i) + "\n"; else src->addFmt(" [[attribute({})]]", (sint32)decompilerContext->output->resourceMappingMTL.attributeMapping[i]); @@ -250,13 +250,13 @@ namespace LatteDecompiler src->add("};" _CRLF _CRLF); } - static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool rasterizationEnabled) + static void _emitInputsAndOutputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool fetchVertexManually, bool rasterizationEnabled) { auto src = decompilerContext->shaderSource; if (decompilerContext->shaderType == LatteConst::ShaderType::Vertex) { - _emitAttributes(decompilerContext, isRectVertexShader); + _emitAttributes(decompilerContext, fetchVertexManually); } else if (decompilerContext->shaderType == LatteConst::ShaderType::Pixel) { @@ -339,13 +339,12 @@ namespace LatteDecompiler } } - static void emitHeader(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool rasterizationEnabled) + static void emitHeader(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool fetchVertexManually, bool rasterizationEnabled) { auto src = decompilerContext->shaderSource; if ((decompilerContext->options->usesGeometryShader || isRectVertexShader) && (decompilerContext->shaderType == LatteConst::ShaderType::Vertex || decompilerContext->shaderType == LatteConst::ShaderType::Geometry)) { - // TODO: make vsOutPrimType parth of the shader hash LattePrimitiveMode vsOutPrimType = static_cast(decompilerContext->contextRegisters[mmVGT_PRIMITIVE_TYPE]); uint32 gsOutPrimType = decompilerContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE]; @@ -398,7 +397,7 @@ namespace LatteDecompiler // uniform buffers _emitUniformBuffers(decompilerContext); // inputs and outputs - _emitInputsAndOutputs(decompilerContext, isRectVertexShader, rasterizationEnabled); + _emitInputsAndOutputs(decompilerContext, isRectVertexShader, fetchVertexManually, rasterizationEnabled); if (dump_shaders_enabled) decompilerContext->shaderSource->add("// end of shader inputs/outputs" _CRLF); @@ -472,7 +471,7 @@ namespace LatteDecompiler } } - static void emitInputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader) + static void emitInputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool fetchVertexManually) { auto src = decompilerContext->shaderSource; @@ -491,14 +490,18 @@ namespace LatteDecompiler src->addFmt(", device uint* indexBuffer [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexBufferBinding); // TODO: put into the support buffer? src->addFmt(", constant uchar& indexType [[buffer({})]]", decompilerContext->output->resourceMappingMTL.indexTypeBinding); - src->add(" VERTEX_BUFFER_DEFINITIONS"); } else { - src->add("VertexIn in [[stage_in]]"); - src->add(", uint vid [[vertex_id]]"); + src->add("uint vid [[vertex_id]]"); src->add(", uint iid [[instance_id]]"); } + + if (fetchVertexManually) + src->add(" VERTEX_BUFFER_DEFINITIONS"); + else + src->add(", VertexIn in [[stage_in]]"); + break; case LatteConst::ShaderType::Geometry: src->add("MeshType mesh"); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp index 436ef99c..b7f5c88c 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalPipelineCache.cpp @@ -326,76 +326,81 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte if (pipeline) return pipeline; - // Vertex descriptor - MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init(); - for (auto& bufferGroup : fetchShader->bufferGroups) - { - std::optional fetchType; - - uint32 minBufferStride = 0; - for (sint32 j = 0; j < bufferGroup.attribCount; ++j) - { - auto& attr = bufferGroup.attrib[j]; - - uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId]; - if (semanticId == (uint32)-1) - continue; // attribute not used? - - auto attribute = vertexDescriptor->attributes()->object(semanticId); - attribute->setOffset(attr.offset); - attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex)); - attribute->setFormat(GetMtlVertexFormat(attr.format)); - - minBufferStride = std::max(minBufferStride, attr.offset + GetMtlVertexFormatSize(attr.format)); - - if (fetchType.has_value()) - cemu_assert_debug(fetchType == attr.fetchType); - else - fetchType = attr.fetchType; - - if (attr.fetchType == LatteConst::INSTANCE_DATA) - { - cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported - } - } - - uint32 bufferIndex = bufferGroup.attributeBufferIndex; - uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; - uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; - - auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); - if (bufferStride == 0) - { - // Buffer stride cannot be zero, let's use the minimum stride - bufferStride = minBufferStride; - - // Additionally, constant vertex function must be used - layout->setStepFunction(MTL::VertexStepFunctionConstant); - layout->setStepRate(0); - } - else - { - if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA) - layout->setStepFunction(MTL::VertexStepFunctionPerVertex); - else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA) - layout->setStepFunction(MTL::VertexStepFunctionPerInstance); - else - { - debug_printf("unimplemented vertex fetch type %u\n", (uint32)fetchType.value()); - cemu_assert(false); - } - } - bufferStride = Align(bufferStride, 4); - layout->setStride(bufferStride); - } - auto vertexShaderMtl = static_cast(vertexShader->shader); // Render pipeline state MTL::RenderPipelineDescriptor* desc = MTL::RenderPipelineDescriptor::alloc()->init(); desc->setVertexFunction(vertexShaderMtl->GetFunction()); - // TODO: don't always set the vertex descriptor? - desc->setVertexDescriptor(vertexDescriptor); + + // Vertex descriptor + if (!fetchShader->mtlFetchVertexManually) + { + MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::alloc()->init(); + for (auto& bufferGroup : fetchShader->bufferGroups) + { + std::optional fetchType; + + uint32 minBufferStride = 0; + for (sint32 j = 0; j < bufferGroup.attribCount; ++j) + { + auto& attr = bufferGroup.attrib[j]; + + uint32 semanticId = vertexShader->resourceMapping.attributeMapping[attr.semanticId]; + if (semanticId == (uint32)-1) + continue; // attribute not used? + + auto attribute = vertexDescriptor->attributes()->object(semanticId); + attribute->setOffset(attr.offset); + attribute->setBufferIndex(GET_MTL_VERTEX_BUFFER_INDEX(attr.attributeBufferIndex)); + attribute->setFormat(GetMtlVertexFormat(attr.format)); + + minBufferStride = std::max(minBufferStride, attr.offset + GetMtlVertexFormatSize(attr.format)); + + if (fetchType.has_value()) + cemu_assert_debug(fetchType == attr.fetchType); + else + fetchType = attr.fetchType; + + if (attr.fetchType == LatteConst::INSTANCE_DATA) + { + cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported + } + } + + uint32 bufferIndex = bufferGroup.attributeBufferIndex; + uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; + uint32 bufferStride = (lcr.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + + auto layout = vertexDescriptor->layouts()->object(GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); + if (bufferStride == 0) + { + // Buffer stride cannot be zero, let's use the minimum stride + bufferStride = minBufferStride; + + // Additionally, constant vertex function must be used + layout->setStepFunction(MTL::VertexStepFunctionConstant); + layout->setStepRate(0); + } + else + { + if (!fetchType.has_value() || fetchType == LatteConst::VertexFetchType2::VERTEX_DATA) + layout->setStepFunction(MTL::VertexStepFunctionPerVertex); + else if (fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA) + layout->setStepFunction(MTL::VertexStepFunctionPerInstance); + else + { + debug_printf("unimplemented vertex fetch type %u\n", (uint32)fetchType.value()); + cemu_assert(false); + } + } + bufferStride = Align(bufferStride, 4); + layout->setStride(bufferStride); + } + + // TODO: don't always set the vertex descriptor? + desc->setVertexDescriptor(vertexDescriptor); + vertexDescriptor->release(); + } SetFragmentState(desc, lastUsedFBO, activeFBO, pixelShader, lcr); @@ -448,7 +453,6 @@ MTL::RenderPipelineState* MetalPipelineCache::GetRenderPipelineState(const Latte } } desc->release(); - vertexDescriptor->release(); return pipeline; } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp index f33f8af2..41f48b11 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp @@ -18,11 +18,11 @@ #include "Cafe/HW/Latte/Core/LatteIndices.h" #include "Cemu/Logging/CemuDebugLogging.h" #include "Cemu/Logging/CemuLogging.h" -#include "HW/Latte/Core/LatteConst.h" -#include "HW/Latte/Renderer/Metal/MetalCommon.h" -#include "HW/Latte/Renderer/Metal/MetalLayerHandle.h" -#include "HW/Latte/Renderer/Renderer.h" -#include "Metal/MTLRenderPipeline.hpp" +#include "Cafe/HW/Latte/Core/FetchShader.h" +#include "Cafe/HW/Latte/Core/LatteConst.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" +#include "Cafe/HW/Latte/Renderer/Metal/MetalLayerHandle.h" +#include "Cafe/HW/Latte/Renderer/Renderer.h" #include "config/CemuConfig.h" #define IMGUI_IMPL_METAL_CPP @@ -975,6 +975,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 bool isPrimitiveRect = (primitiveMode == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS); bool usesGeometryShader = (geometryShader != nullptr || isPrimitiveRect); + //bool fetchVertexManually = (usesGeometryShader || fetchShader->mtlFetchVertexManually); // Index buffer Renderer::INDEX_TYPE hostIndexType; @@ -1174,26 +1175,8 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 auto& vertexBufferRange = m_state.m_vertexBuffers[i]; if (vertexBufferRange.offset != INVALID_OFFSET) { - MTL::Buffer* buffer; - size_t offset; - - // Restride - if (usesGeometryShader) - { - // Object shaders don't need restriding, since the attributes are fetched in the shader - buffer = m_memoryManager->GetBufferCache(); - offset = m_state.m_vertexBuffers[i].offset; - } - else - { - uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + i * 7; - uint32 bufferStride = (LatteGPUState.contextNew.GetRawView()[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; - - auto restridedBuffer = m_memoryManager->RestrideBufferIfNeeded(i, bufferStride, barrierBuffers); - - buffer = restridedBuffer.buffer; - offset = restridedBuffer.offset; - } + MTL::Buffer* buffer = m_memoryManager->GetBufferCache(); + size_t offset = m_state.m_vertexBuffers[i].offset; // Bind SetBuffer(renderCommandEncoder, GetMtlShaderType(vertexShader->shaderType, usesGeometryShader), buffer, offset, GET_MTL_VERTEX_BUFFER_INDEX(i)); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index 84751eee..359b9fd0 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -20,7 +20,7 @@ RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(mslCode), nullptr, &error); if (error) { - cemuLog_log(LogType::Force, "failed to create library: {}", error->localizedDescription()->utf8String()); + cemuLog_log(LogType::Force, "failed to create library: {} -> {}", error->localizedDescription()->utf8String(), mslCode.c_str()); error->release(); return; }