implement framebuffer fetch

This commit is contained in:
Samuliak 2025-01-08 16:09:15 +01:00
parent 68aa40518d
commit e4068856bc
No known key found for this signature in database
6 changed files with 184 additions and 98 deletions

View file

@ -504,45 +504,6 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
uint64 vsHash2 = 0; uint64 vsHash2 = 0;
_calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2); _calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2);
uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL); uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL);
if (g_renderer->GetType() == RendererAPI::Metal)
{
if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually)
{
for (sint32 g = 0; g < _activeFetchShader->bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = _activeFetchShader->bufferGroups[g];
uint32 bufferIndex = group.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (LatteGPUState.contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
vsHash += (uint64)bufferStride;
vsHash = std::rotl<uint64>(vsHash, 7);
}
}
if (!usesGeometryShader)
{
// Rasterization
bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
// HACK
if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA())
rasterizationEnabled = true;
const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL;
uint32 cullFront = polygonControlReg.get_CULL_FRONT();
uint32 cullBack = polygonControlReg.get_CULL_BACK();
if (cullFront && cullBack)
rasterizationEnabled = false;
if (rasterizationEnabled)
vsHash += 51ULL;
// Vertex fetch
if (_activeFetchShader->mtlFetchVertexManually)
vsHash += 349ULL;
}
}
uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F; uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F;
vsHash += tmp; vsHash += tmp;
@ -563,6 +524,46 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF()) if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF())
vsHash += 0x1537; vsHash += 0x1537;
if (g_renderer->GetType() == RendererAPI::Metal)
{
if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually)
{
for (sint32 g = 0; g < _activeFetchShader->bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = _activeFetchShader->bufferGroups[g];
uint32 bufferIndex = group.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (LatteGPUState.contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
vsHash += (uint64)bufferStride;
vsHash = std::rotl<uint64>(vsHash, 7);
}
}
if (!usesGeometryShader)
{
// Rasterization
bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
// HACK
if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA())
rasterizationEnabled = true;
const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL;
uint32 cullFront = polygonControlReg.get_CULL_FRONT();
uint32 cullBack = polygonControlReg.get_CULL_BACK();
if (cullFront && cullBack)
rasterizationEnabled = false;
if (rasterizationEnabled)
vsHash += 51ULL;
// Vertex fetch
if (_activeFetchShader->mtlFetchVertexManually)
vsHash += 349ULL;
}
}
_shaderBaseHash_vs = vsHash; _shaderBaseHash_vs = vsHash;
} }
@ -589,19 +590,6 @@ void LatteSHRC_UpdatePSBaseHash(uint8* pixelShaderPtr, uint32 pixelShaderSize, b
// get vertex shader // get vertex shader
uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL); uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL);
#if ENABLE_METAL
if (g_renderer->GetType() == RendererAPI::Metal)
{
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew);
uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType;
psHash += (uint64)dataType;
psHash = std::rotl<uint64>(psHash, 7);
}
}
#endif
_shaderBaseHash_ps = psHash; _shaderBaseHash_ps = psHash;
} }
@ -635,6 +623,7 @@ uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* cont
auxHashTex += 0x333; auxHashTex += 0x333;
} }
} }
return auxHash + auxHashTex; return auxHash + auxHashTex;
} }
@ -668,6 +657,28 @@ uint64 LatteSHRC_CalcPSAuxHash(LatteDecompilerShader* pixelShader, uint32* conte
auxHash = (auxHash << 3) | (auxHash >> 61); auxHash = (auxHash << 3) | (auxHash >> 61);
auxHash += (uint64)dim; auxHash += (uint64)dim;
} }
// Textures as render targets
for (uint32 i = 0; i < pixelShader->textureUnitListCount; i++)
{
uint8 t = pixelShader->textureUnitList[i];
auxHash = std::rotl<uint64>(auxHash, 11);
auxHash += (uint64)pixelShader->textureRenderTargetIndex[t];
}
#if ENABLE_METAL
if (g_renderer->GetType() == RendererAPI::Metal)
{
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew);
uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType;
auxHash = std::rotl<uint64>(auxHash, 7);
auxHash += (uint64)dataType;
}
}
#endif
return auxHash; return auxHash;
} }

View file

@ -183,6 +183,7 @@ struct LatteDecompilerShader
std::bitset<LATTE_NUM_MAX_TEX_UNITS> textureUnitMask2; std::bitset<LATTE_NUM_MAX_TEX_UNITS> textureUnitMask2;
uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]{ 0 }; // LATTE_DECOMPILER_SAMPLER_NONE means undefined uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]{ 0 }; // LATTE_DECOMPILER_SAMPLER_NONE means undefined
bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]{}; bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]{};
uint8 textureRenderTargetIndex[LATTE_NUM_MAX_TEX_UNITS] = {255};
// analyzer stage (pixel outputs) // analyzer stage (pixel outputs)
uint32 pixelColorOutputMask{ 0 }; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments) uint32 pixelColorOutputMask{ 0 }; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments)

View file

@ -850,6 +850,42 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
shader->textureUnitList[shader->textureUnitListCount] = i; shader->textureUnitList[shader->textureUnitListCount] = i;
shader->textureUnitListCount++; shader->textureUnitListCount++;
} }
shader->textureRenderTargetIndex[i] = 255;
}
// check if textures are used as render targets
if (shader->shaderType == LatteConst::ShaderType::Pixel)
{
for (sint32 i = 0; i < shader->textureUnitListCount; i++)
{
sint32 textureIndex = shader->textureUnitList[i];
const auto& texRegister = texRegs[textureIndex];
// get physical address of texture data
MPTR physAddr = (texRegister.word2.get_BASE_ADDRESS() << 8);
if (physAddr == MPTR_NULL)
continue; // invalid data
for (sint32 j = 0; j < LATTE_NUM_COLOR_TARGET; j++)
{
uint32* colorBufferRegBase = shaderContext->contextRegisters + (mmCB_COLOR0_BASE + j);
uint32 regColorBufferBase = colorBufferRegBase[mmCB_COLOR0_BASE - mmCB_COLOR0_BASE] & 0xFFFFFF00; // the low 8 bits are ignored? How to Survive seems to rely on this
uint32 regColorSize = colorBufferRegBase[mmCB_COLOR0_SIZE - mmCB_COLOR0_BASE];
uint32 regColorInfo = colorBufferRegBase[mmCB_COLOR0_INFO - mmCB_COLOR0_BASE];
uint32 regColorView = colorBufferRegBase[mmCB_COLOR0_VIEW - mmCB_COLOR0_BASE];
// decode color buffer reg info
Latte::E_HWTILEMODE colorBufferTileMode = (Latte::E_HWTILEMODE)((regColorInfo >> 8) & 0xF);
uint32 numberType = (regColorInfo >> 12) & 7;
Latte::E_GX2SURFFMT colorBufferFormat = LatteMRT::GetColorBufferFormat(j, *shaderContext->contextRegistersNew);
MPTR colorBufferPhysMem = regColorBufferBase;
if (physAddr == colorBufferPhysMem)
{
shader->textureRenderTargetIndex[i] = j;
break;
}
}
}
} }
// for geometry shaders check the copy shader for stream writes // for geometry shaders check the copy shader for stream writes
if (shader->shaderType == LatteConst::ShaderType::Geometry && shaderContext->parsedGSCopyShader->list_streamWrites.empty() == false) if (shader->shaderType == LatteConst::ShaderType::Geometry && shaderContext->parsedGSCopyShader->list_streamWrites.empty() == false)

View file

@ -2260,6 +2260,22 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
return; return;
} }
// Do a framebuffer fetch if possible
// TODO: filter out more?
uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex];
if (renderTargetIndex != 255)
{
src->addFmt("col{}.", renderTargetIndex);
// TODO: clean up
std::string components[] = {"x", "y", "z", "w"};
for (sint32 i = 0; i < numWrittenElements; i++)
{
src->addFmt("{}", components[i]);
}
src->add(");" _CRLF);
return;
}
if (emulateCompare) if (emulateCompare)
{ {
cemu_assert_debug(!isGather); cemu_assert_debug(!isGather);
@ -2630,20 +2646,28 @@ static void _emitTEXGetTextureResInfoCode(LatteDecompilerShaderContext* shaderCo
// todo - mip index parameter? // todo - mip index parameter?
auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex]; if (shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex] != 255)
{
if (texDim == Latte::E_DIM::DIM_1D) // TODO: use the render target size
src->addFmt(" = int4(tex{}.get_width(), 1, 1, 1).", texInstruction->textureFetch.textureIndex); src->addFmt(" = int4(1920, 1080, 1, 1).");
else if (texDim == Latte::E_DIM::DIM_1D_ARRAY) }
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_array_size(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
else if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA)
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
else if (texDim == Latte::E_DIM::DIM_2D_ARRAY)
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), tex{}.get_array_size(), 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
else else
{ {
cemu_assert_debug(false); auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex];
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
if (texDim == Latte::E_DIM::DIM_1D)
src->addFmt(" = int4(tex{}.get_width(), 1, 1, 1).", texInstruction->textureFetch.textureIndex);
else if (texDim == Latte::E_DIM::DIM_1D_ARRAY)
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_array_size(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
else if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA)
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
else if (texDim == Latte::E_DIM::DIM_2D_ARRAY)
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), tex{}.get_array_size(), 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
else
{
cemu_assert_debug(false);
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
}
} }
for(sint32 f=0; f<4; f++) for(sint32 f=0; f<4; f++)

View file

@ -448,6 +448,8 @@ namespace LatteDecompiler
static void _emitTextureDefinitions(LatteDecompilerShaderContext* shaderContext) static void _emitTextureDefinitions(LatteDecompilerShaderContext* shaderContext)
{ {
bool renderTargetIndexUsed[LATTE_NUM_COLOR_TARGET] = {false};
auto src = shaderContext->shaderSource; auto src = shaderContext->shaderSource;
// texture sampler definition // texture sampler definition
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++) for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
@ -455,44 +457,56 @@ namespace LatteDecompiler
if (!shaderContext->output->textureUnitMask[i]) if (!shaderContext->output->textureUnitMask[i])
continue; continue;
src->add(", "); uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[i];
if (renderTargetIndex == 255)
// Only 2D and 2D array textures can be used with comparison samplers
if (shaderContext->shader->textureUsesDepthCompare[i] && IsValidDepthTextureType(shaderContext->shader->textureUnitDim[i]))
src->add("depth");
else
src->add("texture");
if (shaderContext->shader->textureIsIntegerFormat[i])
{ {
// integer samplers src->add(", ");
if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D)
src->add("1d<uint>"); // Only 2D and 2D array textures can be used with comparison samplers
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA) if (shaderContext->shader->textureUsesDepthCompare[i] && IsValidDepthTextureType(shaderContext->shader->textureUnitDim[i]))
src->add("2d<uint>"); src->add("depth");
else else
cemu_assert_unimplemented(); src->add("texture");
if (shaderContext->shader->textureIsIntegerFormat[i])
{
// integer samplers
if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D)
src->add("1d<uint>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA)
src->add("2d<uint>");
else
cemu_assert_unimplemented();
}
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA)
src->add("2d<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D)
src->add("1d<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_ARRAY)
src->add("2d_array<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_CUBEMAP)
src->add("cube_array<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_3D)
src->add("3d<float>");
else
{
cemu_assert_unimplemented();
}
uint32 binding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i];
//uint32 textureBinding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] % 31;
//uint32 samplerBinding = textureBinding % 16;
src->addFmt(" tex{} [[texture({})]]", i, binding);
src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding);
} }
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA)
src->add("2d<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D)
src->add("1d<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_ARRAY)
src->add("2d_array<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_CUBEMAP)
src->add("cube_array<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_3D)
src->add("3d<float>");
else else
{ {
cemu_assert_unimplemented(); if (!renderTargetIndexUsed[renderTargetIndex])
{
src->addFmt(", {} col{} [[color({})]]", GetDataTypeStr(GetColorBufferDataType(renderTargetIndex, *shaderContext->contextRegistersNew)), renderTargetIndex, renderTargetIndex);
renderTargetIndexUsed[renderTargetIndex] = true;
}
} }
uint32 binding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i];
//uint32 textureBinding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] % 31;
//uint32 samplerBinding = textureBinding % 16;
src->addFmt(" tex{} [[texture({})]]", i, binding);
src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding);
} }
} }

View file

@ -55,7 +55,7 @@ inline const char* GetDataTypeStr(MetalDataType dataType)
return "float4"; return "float4";
default: default:
cemu_assert_suspicious(); cemu_assert_suspicious();
return ""; return "INVALID";
} }
} }