implement framebuffer fetch

This commit is contained in:
Samuliak 2025-01-08 16:09:15 +01:00
parent 68aa40518d
commit e4068856bc
No known key found for this signature in database
6 changed files with 184 additions and 98 deletions

View file

@ -504,6 +504,26 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
uint64 vsHash2 = 0; uint64 vsHash2 = 0;
_calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2); _calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2);
uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL); uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL);
uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F;
vsHash += tmp;
auto primitiveType = LatteGPUState.contextNew.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE();
// TODO: include always in the hash in case of geometry shader or rect shader
if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS)
{
vsHash += 13ULL;
}
else if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS)
{
// required for Vulkan since we have to write the pointsize in the shader
vsHash += 71ULL;
}
vsHash += (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] ? 21 : 0);
// halfZ
if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF())
vsHash += 0x1537;
if (g_renderer->GetType() == RendererAPI::Metal) if (g_renderer->GetType() == RendererAPI::Metal)
{ {
if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually) if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually)
@ -544,25 +564,6 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
} }
} }
uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F;
vsHash += tmp;
auto primitiveType = LatteGPUState.contextNew.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE();
// TODO: include always in the hash in case of geometry shader or rect shader
if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS)
{
vsHash += 13ULL;
}
else if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS)
{
// required for Vulkan since we have to write the pointsize in the shader
vsHash += 71ULL;
}
vsHash += (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] ? 21 : 0);
// halfZ
if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF())
vsHash += 0x1537;
_shaderBaseHash_vs = vsHash; _shaderBaseHash_vs = vsHash;
} }
@ -589,19 +590,6 @@ void LatteSHRC_UpdatePSBaseHash(uint8* pixelShaderPtr, uint32 pixelShaderSize, b
// get vertex shader // get vertex shader
uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL); uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL);
#if ENABLE_METAL
if (g_renderer->GetType() == RendererAPI::Metal)
{
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew);
uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType;
psHash += (uint64)dataType;
psHash = std::rotl<uint64>(psHash, 7);
}
}
#endif
_shaderBaseHash_ps = psHash; _shaderBaseHash_ps = psHash;
} }
@ -635,6 +623,7 @@ uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* cont
auxHashTex += 0x333; auxHashTex += 0x333;
} }
} }
return auxHash + auxHashTex; return auxHash + auxHashTex;
} }
@ -668,6 +657,28 @@ uint64 LatteSHRC_CalcPSAuxHash(LatteDecompilerShader* pixelShader, uint32* conte
auxHash = (auxHash << 3) | (auxHash >> 61); auxHash = (auxHash << 3) | (auxHash >> 61);
auxHash += (uint64)dim; auxHash += (uint64)dim;
} }
// Textures as render targets
for (uint32 i = 0; i < pixelShader->textureUnitListCount; i++)
{
uint8 t = pixelShader->textureUnitList[i];
auxHash = std::rotl<uint64>(auxHash, 11);
auxHash += (uint64)pixelShader->textureRenderTargetIndex[t];
}
#if ENABLE_METAL
if (g_renderer->GetType() == RendererAPI::Metal)
{
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew);
uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType;
auxHash = std::rotl<uint64>(auxHash, 7);
auxHash += (uint64)dataType;
}
}
#endif
return auxHash; return auxHash;
} }

View file

@ -183,6 +183,7 @@ struct LatteDecompilerShader
std::bitset<LATTE_NUM_MAX_TEX_UNITS> textureUnitMask2; std::bitset<LATTE_NUM_MAX_TEX_UNITS> textureUnitMask2;
uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]{ 0 }; // LATTE_DECOMPILER_SAMPLER_NONE means undefined uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]{ 0 }; // LATTE_DECOMPILER_SAMPLER_NONE means undefined
bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]{}; bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]{};
uint8 textureRenderTargetIndex[LATTE_NUM_MAX_TEX_UNITS] = {255};
// analyzer stage (pixel outputs) // analyzer stage (pixel outputs)
uint32 pixelColorOutputMask{ 0 }; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments) uint32 pixelColorOutputMask{ 0 }; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments)

View file

@ -850,6 +850,42 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
shader->textureUnitList[shader->textureUnitListCount] = i; shader->textureUnitList[shader->textureUnitListCount] = i;
shader->textureUnitListCount++; shader->textureUnitListCount++;
} }
shader->textureRenderTargetIndex[i] = 255;
}
// check if textures are used as render targets
if (shader->shaderType == LatteConst::ShaderType::Pixel)
{
for (sint32 i = 0; i < shader->textureUnitListCount; i++)
{
sint32 textureIndex = shader->textureUnitList[i];
const auto& texRegister = texRegs[textureIndex];
// get physical address of texture data
MPTR physAddr = (texRegister.word2.get_BASE_ADDRESS() << 8);
if (physAddr == MPTR_NULL)
continue; // invalid data
for (sint32 j = 0; j < LATTE_NUM_COLOR_TARGET; j++)
{
uint32* colorBufferRegBase = shaderContext->contextRegisters + (mmCB_COLOR0_BASE + j);
uint32 regColorBufferBase = colorBufferRegBase[mmCB_COLOR0_BASE - mmCB_COLOR0_BASE] & 0xFFFFFF00; // the low 8 bits are ignored? How to Survive seems to rely on this
uint32 regColorSize = colorBufferRegBase[mmCB_COLOR0_SIZE - mmCB_COLOR0_BASE];
uint32 regColorInfo = colorBufferRegBase[mmCB_COLOR0_INFO - mmCB_COLOR0_BASE];
uint32 regColorView = colorBufferRegBase[mmCB_COLOR0_VIEW - mmCB_COLOR0_BASE];
// decode color buffer reg info
Latte::E_HWTILEMODE colorBufferTileMode = (Latte::E_HWTILEMODE)((regColorInfo >> 8) & 0xF);
uint32 numberType = (regColorInfo >> 12) & 7;
Latte::E_GX2SURFFMT colorBufferFormat = LatteMRT::GetColorBufferFormat(j, *shaderContext->contextRegistersNew);
MPTR colorBufferPhysMem = regColorBufferBase;
if (physAddr == colorBufferPhysMem)
{
shader->textureRenderTargetIndex[i] = j;
break;
}
}
}
} }
// for geometry shaders check the copy shader for stream writes // for geometry shaders check the copy shader for stream writes
if (shader->shaderType == LatteConst::ShaderType::Geometry && shaderContext->parsedGSCopyShader->list_streamWrites.empty() == false) if (shader->shaderType == LatteConst::ShaderType::Geometry && shaderContext->parsedGSCopyShader->list_streamWrites.empty() == false)

View file

@ -2260,6 +2260,22 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
return; return;
} }
// Do a framebuffer fetch if possible
// TODO: filter out more?
uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex];
if (renderTargetIndex != 255)
{
src->addFmt("col{}.", renderTargetIndex);
// TODO: clean up
std::string components[] = {"x", "y", "z", "w"};
for (sint32 i = 0; i < numWrittenElements; i++)
{
src->addFmt("{}", components[i]);
}
src->add(");" _CRLF);
return;
}
if (emulateCompare) if (emulateCompare)
{ {
cemu_assert_debug(!isGather); cemu_assert_debug(!isGather);
@ -2630,6 +2646,13 @@ static void _emitTEXGetTextureResInfoCode(LatteDecompilerShaderContext* shaderCo
// todo - mip index parameter? // todo - mip index parameter?
if (shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex] != 255)
{
// TODO: use the render target size
src->addFmt(" = int4(1920, 1080, 1, 1).");
}
else
{
auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex]; auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex];
if (texDim == Latte::E_DIM::DIM_1D) if (texDim == Latte::E_DIM::DIM_1D)
@ -2645,6 +2668,7 @@ static void _emitTEXGetTextureResInfoCode(LatteDecompilerShaderContext* shaderCo
cemu_assert_debug(false); cemu_assert_debug(false);
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
} }
}
for(sint32 f=0; f<4; f++) for(sint32 f=0; f<4; f++)
{ {

View file

@ -448,6 +448,8 @@ namespace LatteDecompiler
static void _emitTextureDefinitions(LatteDecompilerShaderContext* shaderContext) static void _emitTextureDefinitions(LatteDecompilerShaderContext* shaderContext)
{ {
bool renderTargetIndexUsed[LATTE_NUM_COLOR_TARGET] = {false};
auto src = shaderContext->shaderSource; auto src = shaderContext->shaderSource;
// texture sampler definition // texture sampler definition
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++) for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
@ -455,6 +457,9 @@ namespace LatteDecompiler
if (!shaderContext->output->textureUnitMask[i]) if (!shaderContext->output->textureUnitMask[i])
continue; continue;
uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[i];
if (renderTargetIndex == 255)
{
src->add(", "); src->add(", ");
// Only 2D and 2D array textures can be used with comparison samplers // Only 2D and 2D array textures can be used with comparison samplers
@ -494,6 +499,15 @@ namespace LatteDecompiler
src->addFmt(" tex{} [[texture({})]]", i, binding); src->addFmt(" tex{} [[texture({})]]", i, binding);
src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding); src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding);
} }
else
{
if (!renderTargetIndexUsed[renderTargetIndex])
{
src->addFmt(", {} col{} [[color({})]]", GetDataTypeStr(GetColorBufferDataType(renderTargetIndex, *shaderContext->contextRegistersNew)), renderTargetIndex, renderTargetIndex);
renderTargetIndexUsed[renderTargetIndex] = true;
}
}
}
} }
static void emitInputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool fetchVertexManually) static void emitInputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool fetchVertexManually)

View file

@ -55,7 +55,7 @@ inline const char* GetDataTypeStr(MetalDataType dataType)
return "float4"; return "float4";
default: default:
cemu_assert_suspicious(); cemu_assert_suspicious();
return ""; return "INVALID";
} }
} }