diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index f46c68d4..7ad25884 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -504,45 +504,6 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize, uint64 vsHash2 = 0; _calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2); uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL); - if (g_renderer->GetType() == RendererAPI::Metal) - { - if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually) - { - for (sint32 g = 0; g < _activeFetchShader->bufferGroups.size(); g++) - { - LatteParsedFetchShaderBufferGroup_t& group = _activeFetchShader->bufferGroups[g]; - uint32 bufferIndex = group.attributeBufferIndex; - uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; - uint32 bufferStride = (LatteGPUState.contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; - - vsHash += (uint64)bufferStride; - vsHash = std::rotl(vsHash, 7); - } - } - - if (!usesGeometryShader) - { - // Rasterization - bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); - - // HACK - if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) - rasterizationEnabled = true; - - const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL; - uint32 cullFront = polygonControlReg.get_CULL_FRONT(); - uint32 cullBack = polygonControlReg.get_CULL_BACK(); - if (cullFront && cullBack) - rasterizationEnabled = false; - - if (rasterizationEnabled) - vsHash += 51ULL; - - // Vertex fetch - if (_activeFetchShader->mtlFetchVertexManually) - vsHash += 349ULL; - } - } uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F; vsHash += tmp; @@ -563,6 +524,46 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize, if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF()) vsHash += 0x1537; + if (g_renderer->GetType() == RendererAPI::Metal) + { + if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually) + { + for (sint32 g = 0; g < _activeFetchShader->bufferGroups.size(); g++) + { + LatteParsedFetchShaderBufferGroup_t& group = _activeFetchShader->bufferGroups[g]; + uint32 bufferIndex = group.attributeBufferIndex; + uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; + uint32 bufferStride = (LatteGPUState.contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; + + vsHash += (uint64)bufferStride; + vsHash = std::rotl(vsHash, 7); + } + } + + if (!usesGeometryShader) + { + // Rasterization + bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); + + // HACK + if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) + rasterizationEnabled = true; + + const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL; + uint32 cullFront = polygonControlReg.get_CULL_FRONT(); + uint32 cullBack = polygonControlReg.get_CULL_BACK(); + if (cullFront && cullBack) + rasterizationEnabled = false; + + if (rasterizationEnabled) + vsHash += 51ULL; + + // Vertex fetch + if (_activeFetchShader->mtlFetchVertexManually) + vsHash += 349ULL; + } + } + _shaderBaseHash_vs = vsHash; } @@ -589,19 +590,6 @@ void LatteSHRC_UpdatePSBaseHash(uint8* pixelShaderPtr, uint32 pixelShaderSize, b // get vertex shader uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL); -#if ENABLE_METAL - if (g_renderer->GetType() == RendererAPI::Metal) - { - for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) - { - auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew); - uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType; - psHash += (uint64)dataType; - psHash = std::rotl(psHash, 7); - } - } -#endif - _shaderBaseHash_ps = psHash; } @@ -635,6 +623,7 @@ uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* cont auxHashTex += 0x333; } } + return auxHash + auxHashTex; } @@ -668,6 +657,28 @@ uint64 LatteSHRC_CalcPSAuxHash(LatteDecompilerShader* pixelShader, uint32* conte auxHash = (auxHash << 3) | (auxHash >> 61); auxHash += (uint64)dim; } + + // Textures as render targets + for (uint32 i = 0; i < pixelShader->textureUnitListCount; i++) + { + uint8 t = pixelShader->textureUnitList[i]; + auxHash = std::rotl(auxHash, 11); + auxHash += (uint64)pixelShader->textureRenderTargetIndex[t]; + } + +#if ENABLE_METAL + if (g_renderer->GetType() == RendererAPI::Metal) + { + for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++) + { + auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew); + uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType; + auxHash = std::rotl(auxHash, 7); + auxHash += (uint64)dataType; + } + } +#endif + return auxHash; } diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h index 2812facc..64aa1413 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h @@ -183,6 +183,7 @@ struct LatteDecompilerShader std::bitset textureUnitMask2; uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]{ 0 }; // LATTE_DECOMPILER_SAMPLER_NONE means undefined bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]{}; + uint8 textureRenderTargetIndex[LATTE_NUM_MAX_TEX_UNITS] = {255}; // analyzer stage (pixel outputs) uint32 pixelColorOutputMask{ 0 }; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp index b9ca7b5d..2d0c7f76 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerAnalyzer.cpp @@ -850,6 +850,42 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD shader->textureUnitList[shader->textureUnitListCount] = i; shader->textureUnitListCount++; } + shader->textureRenderTargetIndex[i] = 255; + } + // check if textures are used as render targets + if (shader->shaderType == LatteConst::ShaderType::Pixel) + { + for (sint32 i = 0; i < shader->textureUnitListCount; i++) + { + sint32 textureIndex = shader->textureUnitList[i]; + const auto& texRegister = texRegs[textureIndex]; + + // get physical address of texture data + MPTR physAddr = (texRegister.word2.get_BASE_ADDRESS() << 8); + if (physAddr == MPTR_NULL) + continue; // invalid data + + for (sint32 j = 0; j < LATTE_NUM_COLOR_TARGET; j++) + { + uint32* colorBufferRegBase = shaderContext->contextRegisters + (mmCB_COLOR0_BASE + j); + uint32 regColorBufferBase = colorBufferRegBase[mmCB_COLOR0_BASE - mmCB_COLOR0_BASE] & 0xFFFFFF00; // the low 8 bits are ignored? How to Survive seems to rely on this + uint32 regColorSize = colorBufferRegBase[mmCB_COLOR0_SIZE - mmCB_COLOR0_BASE]; + uint32 regColorInfo = colorBufferRegBase[mmCB_COLOR0_INFO - mmCB_COLOR0_BASE]; + uint32 regColorView = colorBufferRegBase[mmCB_COLOR0_VIEW - mmCB_COLOR0_BASE]; + // decode color buffer reg info + Latte::E_HWTILEMODE colorBufferTileMode = (Latte::E_HWTILEMODE)((regColorInfo >> 8) & 0xF); + uint32 numberType = (regColorInfo >> 12) & 7; + Latte::E_GX2SURFFMT colorBufferFormat = LatteMRT::GetColorBufferFormat(j, *shaderContext->contextRegistersNew); + + MPTR colorBufferPhysMem = regColorBufferBase; + + if (physAddr == colorBufferPhysMem) + { + shader->textureRenderTargetIndex[i] = j; + break; + } + } + } } // for geometry shaders check the copy shader for stream writes if (shader->shaderType == LatteConst::ShaderType::Geometry && shaderContext->parsedGSCopyShader->list_streamWrites.empty() == false) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp index b925f862..22c511ba 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSL.cpp @@ -2260,6 +2260,22 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex return; } + // Do a framebuffer fetch if possible + // TODO: filter out more? + uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex]; + if (renderTargetIndex != 255) + { + src->addFmt("col{}.", renderTargetIndex); + // TODO: clean up + std::string components[] = {"x", "y", "z", "w"}; + for (sint32 i = 0; i < numWrittenElements; i++) + { + src->addFmt("{}", components[i]); + } + src->add(");" _CRLF); + return; + } + if (emulateCompare) { cemu_assert_debug(!isGather); @@ -2630,20 +2646,28 @@ static void _emitTEXGetTextureResInfoCode(LatteDecompilerShaderContext* shaderCo // todo - mip index parameter? - auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex]; - - if (texDim == Latte::E_DIM::DIM_1D) - src->addFmt(" = int4(tex{}.get_width(), 1, 1, 1).", texInstruction->textureFetch.textureIndex); - else if (texDim == Latte::E_DIM::DIM_1D_ARRAY) - src->addFmt(" = int4(tex{}.get_width(), tex{}.get_array_size(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); - else if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA) - src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); - else if (texDim == Latte::E_DIM::DIM_2D_ARRAY) - src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), tex{}.get_array_size(), 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); + if (shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex] != 255) + { + // TODO: use the render target size + src->addFmt(" = int4(1920, 1080, 1, 1)."); + } else { - cemu_assert_debug(false); - src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); + auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex]; + + if (texDim == Latte::E_DIM::DIM_1D) + src->addFmt(" = int4(tex{}.get_width(), 1, 1, 1).", texInstruction->textureFetch.textureIndex); + else if (texDim == Latte::E_DIM::DIM_1D_ARRAY) + src->addFmt(" = int4(tex{}.get_width(), tex{}.get_array_size(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); + else if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA) + src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); + else if (texDim == Latte::E_DIM::DIM_2D_ARRAY) + src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), tex{}.get_array_size(), 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); + else + { + cemu_assert_debug(false); + src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); + } } for(sint32 f=0; f<4; f++) diff --git a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp index 2d871d99..40e70455 100644 --- a/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp +++ b/src/Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp @@ -448,6 +448,8 @@ namespace LatteDecompiler static void _emitTextureDefinitions(LatteDecompilerShaderContext* shaderContext) { + bool renderTargetIndexUsed[LATTE_NUM_COLOR_TARGET] = {false}; + auto src = shaderContext->shaderSource; // texture sampler definition for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++) @@ -455,44 +457,56 @@ namespace LatteDecompiler if (!shaderContext->output->textureUnitMask[i]) continue; - src->add(", "); - - // Only 2D and 2D array textures can be used with comparison samplers - if (shaderContext->shader->textureUsesDepthCompare[i] && IsValidDepthTextureType(shaderContext->shader->textureUnitDim[i])) - src->add("depth"); - else - src->add("texture"); - - if (shaderContext->shader->textureIsIntegerFormat[i]) + uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[i]; + if (renderTargetIndex == 255) { - // integer samplers - if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D) - src->add("1d"); - else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA) - src->add("2d"); - else - cemu_assert_unimplemented(); + src->add(", "); + + // Only 2D and 2D array textures can be used with comparison samplers + if (shaderContext->shader->textureUsesDepthCompare[i] && IsValidDepthTextureType(shaderContext->shader->textureUnitDim[i])) + src->add("depth"); + else + src->add("texture"); + + if (shaderContext->shader->textureIsIntegerFormat[i]) + { + // integer samplers + if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D) + src->add("1d"); + else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA) + src->add("2d"); + else + cemu_assert_unimplemented(); + } + else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA) + src->add("2d"); + else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D) + src->add("1d"); + else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_ARRAY) + src->add("2d_array"); + else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_CUBEMAP) + src->add("cube_array"); + else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_3D) + src->add("3d"); + else + { + cemu_assert_unimplemented(); + } + + uint32 binding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i]; + //uint32 textureBinding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] % 31; + //uint32 samplerBinding = textureBinding % 16; + src->addFmt(" tex{} [[texture({})]]", i, binding); + src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding); } - else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA) - src->add("2d"); - else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D) - src->add("1d"); - else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_ARRAY) - src->add("2d_array"); - else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_CUBEMAP) - src->add("cube_array"); - else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_3D) - src->add("3d"); else { - cemu_assert_unimplemented(); + if (!renderTargetIndexUsed[renderTargetIndex]) + { + src->addFmt(", {} col{} [[color({})]]", GetDataTypeStr(GetColorBufferDataType(renderTargetIndex, *shaderContext->contextRegistersNew)), renderTargetIndex, renderTargetIndex); + renderTargetIndexUsed[renderTargetIndex] = true; + } } - - uint32 binding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i]; - //uint32 textureBinding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] % 31; - //uint32 samplerBinding = textureBinding % 16; - src->addFmt(" tex{} [[texture({})]]", i, binding); - src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding); } } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h index 7544ceed..ef25ca5d 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h @@ -55,7 +55,7 @@ inline const char* GetDataTypeStr(MetalDataType dataType) return "float4"; default: cemu_assert_suspicious(); - return ""; + return "INVALID"; } }