fetch vertices manually if needed

This commit is contained in:
Samuliak 2024-10-01 17:38:14 +02:00
parent a3bfde80b0
commit 94e8ed5a46
8 changed files with 215 additions and 149 deletions

View file

@ -8,8 +8,12 @@
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h"
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/ISA/LatteInstructions.h"
#include "HW/Latte/Renderer/Renderer.h"
#include "util/containers/LookupTableL3.h"
#include "util/helpers/fspinlock.h"
#if BOOST_OS_MACOS
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#endif
#include <openssl/sha.h> /* SHA1_DIGEST_LENGTH */
#include <openssl/evp.h> /* EVP_Digest */
@ -71,7 +75,7 @@ uint32 LatteShaderRecompiler_getAttributeAlignment(LatteParsedFetchShaderAttribu
return 4;
}
void LatteShader_calculateFSKey(LatteFetchShader* fetchShader)
void LatteShader_calculateFSKey(LatteFetchShader* fetchShader, uint32* contextRegister)
{
uint64 key = 0;
for (sint32 g = 0; g < fetchShader->bufferGroups.size(); g++)
@ -104,11 +108,25 @@ void LatteShader_calculateFSKey(LatteFetchShader* fetchShader)
key = std::rotl<uint64>(key, 8);
key += (uint64)attrib->semanticId;
key = std::rotl<uint64>(key, 8);
key += (uint64)(attrib->offset & 3);
key = std::rotl<uint64>(key, 2);
if (g_renderer->GetType() == RendererAPI::Metal)
key += (uint64)attrib->offset;
else
key += (uint64)(attrib->offset & 3);
key = std::rotl<uint64>(key, 7);
}
}
// todo - also hash invalid buffer groups?
if (g_renderer->GetType() == RendererAPI::Metal)
{
for (sint32 g = 0; g < fetchShader->bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = fetchShader->bufferGroups[g];
key += (uint64)group.attributeBufferIndex;
key = std::rotl<uint64>(key, 5);
}
}
fetchShader->key = key;
}
@ -146,8 +164,8 @@ void LatteFetchShader::CalculateFetchShaderVkHash()
this->vkPipelineHashFragment = h;
}
void LatteFetchShader::CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister)
{uint64 key = 0;
void LatteFetchShader::CheckIfVerticesNeedManualFetchMtl(uint32* contextRegister)
{
for (sint32 g = 0; g < bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = bufferGroups[g];
@ -155,12 +173,16 @@ void LatteFetchShader::CalculateFetchShaderMtlObjectShaderHash(uint32* contextRe
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
key += (uint64)bufferIndex;
key = std::rotl<uint64>(key, 5);
key += (uint64)bufferStride;
key = std::rotl<uint64>(key, 5);
if (bufferStride % 4 != 0)
mtlFetchVertexManually = true;
for (sint32 f = 0; f < group.attribCount; f++)
{
auto& attr = group.attrib[f];
if (attr.offset + GetMtlVertexFormatSize(attr.format) > bufferStride)
mtlFetchVertexManually = true;
}
}
mtlShaderHashObject = key;
}
void _fetchShaderDecompiler_parseInstruction_VTX_SEMANTIC(LatteFetchShader* parsedFetchShader, uint32* contextRegister, const LatteClauseInstruction_VTX* instr)
@ -343,9 +365,9 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
{
// empty fetch shader, seen in Minecraft
// these only make sense when vertex shader does not call FS?
LatteShader_calculateFSKey(newFetchShader);
LatteShader_calculateFSKey(newFetchShader, contextRegister);
newFetchShader->CalculateFetchShaderVkHash();
newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister);
newFetchShader->CheckIfVerticesNeedManualFetchMtl(contextRegister);
return newFetchShader;
}
@ -403,9 +425,9 @@ LatteFetchShader* LatteShaderRecompiler_createFetchShader(LatteFetchShader::Cach
}
bufferGroup.vboStride = vboOffset;
}
LatteShader_calculateFSKey(newFetchShader);
LatteShader_calculateFSKey(newFetchShader, contextRegister);
newFetchShader->CalculateFetchShaderVkHash();
newFetchShader->CalculateFetchShaderMtlObjectShaderHash(contextRegister);
newFetchShader->CheckIfVerticesNeedManualFetchMtl(contextRegister);
// register in cache
// its possible that during multi-threaded shader cache loading, two identical (same hash) fetch shaders get created simultaneously

View file

@ -47,16 +47,15 @@ struct LatteFetchShader
uint64 vkPipelineHashFragment{}; // hash of all fetch shader state that influences the Vulkan graphics pipeline
// Metal
uint64 mtlShaderHashObject{};
bool mtlFetchVertexManually{};
// cache info
CacheHash m_cacheHash{};
bool m_isRegistered{}; // if true, fetch shader is referenced by cache (RegisterInCache() succeeded)
void CalculateFetchShaderVkHash();
void CalculateFetchShaderMtlObjectShaderHash(uint32* contextRegister);
void CheckIfVerticesNeedManualFetchMtl(uint32* contextRegister);
uint64 getVkPipelineHashFragment() const { return vkPipelineHashFragment; };

View file

@ -503,11 +503,21 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL);
if (g_renderer->GetType() == RendererAPI::Metal)
{
if (usesGeometryShader)
if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually)
{
vsHash += _activeFetchShader->mtlShaderHashObject;
for (sint32 g = 0; g < _activeFetchShader->bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = _activeFetchShader->bufferGroups[g];
uint32 bufferIndex = group.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (LatteGPUState.contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
vsHash += (uint64)bufferStride;
vsHash = std::rotl<uint64>(vsHash, 7);
}
}
else
if (!usesGeometryShader)
{
// Rasterization
bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
@ -524,6 +534,10 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
if (rasterizationEnabled)
vsHash += 51ULL;
// Vertex fetch
if (_activeFetchShader->mtlFetchVertexManually)
vsHash += 349ULL;
}
}
@ -531,6 +545,7 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
vsHash += tmp;
auto primitiveType = LatteGPUState.contextNew.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE();
// TODO: include always in the hash in case of geometry shader or rect shader
if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS)
{
vsHash += 13ULL;