#include "Cafe/HW/Latte/Core/LatteConst.h" #include "Cafe/HW/Latte/Core/LatteShaderAssembly.h" #include "Cafe/HW/Latte/ISA/RegDefines.h" #include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency #include "Cafe/HW/Latte/ISA/LatteReg.h" #include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h" #include "Cafe/GraphicPack/GraphicPack2.h" #include "util/helpers/StringParser.h" #include "config/ActiveSettings.h" #include "util/Zir/EmitterGLSL/ZpIREmitGLSL.h" #include "util/Zir/Core/ZpIRDebug.h" #include "util/containers/flat_hash_map.hpp" struct _ShaderHashCache { uint64 prevHash1; uint64 prevHash2; uint32* prevProgramCode; uint32 prevProgramSize; }; _ShaderHashCache hashCacheVS = { 0 }; _ShaderHashCache hashCacheGS = { 0 }; _ShaderHashCache hashCachePS = { 0 }; LatteFetchShader* _activeFetchShader = nullptr; LatteDecompilerShader* _activeVertexShader = nullptr; LatteDecompilerShader* _activeGeometryShader = nullptr; LatteDecompilerShader* _activePixelShader = nullptr; // runtime shader cache using SHRC_CACHE_TYPE = ska::flat_hash_map; SHRC_CACHE_TYPE sVertexShaders(512); SHRC_CACHE_TYPE sGeometryShaders(512); SHRC_CACHE_TYPE sPixelShaders(512); uint64 _shaderBaseHash_vs; uint64 _shaderBaseHash_gs; uint64 _shaderBaseHash_ps; std::atomic_int g_compiled_shaders_total = 0; std::atomic_int g_compiled_shaders_async = 0; LatteFetchShader* LatteSHRC_GetActiveFetchShader() { return _activeFetchShader; } LatteDecompilerShader* LatteSHRC_GetActiveVertexShader() { return _activeVertexShader; } LatteDecompilerShader* LatteSHRC_GetActiveGeometryShader() { return _activeGeometryShader; } LatteDecompilerShader* LatteSHRC_GetActivePixelShader() { return _activePixelShader; } inline ska::flat_hash_map& LatteSHRC_GetCacheByType(LatteConst::ShaderType shaderType) { if (shaderType == LatteConst::ShaderType::Vertex) return sVertexShaders; else if (shaderType == LatteConst::ShaderType::Geometry) return sGeometryShaders; cemu_assert_debug(shaderType == LatteConst::ShaderType::Pixel); return sPixelShaders; } // calculate hash from shader binary // this algorithm could be more efficient since we could leverage the fact that the size is always aligned to 8 byte // but since this is baked into the shader names used for gfx packs and shader caches we can't really change this void _calcShaderHashGeneric(uint32* programCode, uint32 programSize, uint64& outputHash1, uint64& outputHash2) { outputHash1 = 0; outputHash2 = 0; for (uint32 i = 0; i < programSize / 4; i++) { uint32 temp = programCode[i]; outputHash1 += (uint64)temp; outputHash2 ^= (uint64)temp; outputHash1 = (outputHash1 << 3) | (outputHash1 >> 61); outputHash2 = (outputHash2 >> 7) | (outputHash2 << 57); } } void _calculateShaderProgramHash(uint32* programCode, uint32 programSize, _ShaderHashCache* hashCache, uint64* outputHash1, uint64* outputHash2) { uint64 progHash1 = 0; uint64 progHash2 = 0; if (!programCode) { hashCache->prevProgramCode = NULL; hashCache->prevProgramSize = 0; hashCache->prevHash1 = 0; hashCache->prevHash2 = 0; } else if (hashCache->prevProgramCode != programCode || hashCache->prevProgramSize != programSize) { _calcShaderHashGeneric(programCode, programSize, progHash1, progHash2); hashCache->prevProgramCode = programCode; hashCache->prevProgramSize = programSize; hashCache->prevHash1 = progHash1; hashCache->prevHash2 = progHash2; } else { progHash1 = hashCache->prevHash1; progHash2 = hashCache->prevHash2; } *outputHash1 = progHash1; *outputHash2 = progHash2; } void LatteSHRC_ResetCachedShaderHash() { hashCacheVS.prevProgramCode = 0; hashCacheVS.prevProgramSize = 0; hashCacheGS.prevProgramCode = 0; hashCacheGS.prevProgramSize = 0; hashCachePS.prevProgramCode = 0; hashCachePS.prevProgramSize = 0; } LatteShaderPSInputTable _activePSImportTable; LatteShaderPSInputTable* LatteSHRC_GetPSInputTable() { return &_activePSImportTable; } bool LatteSHRC_RemoveFromCache(LatteDecompilerShader* shader) { bool removed = false; auto& cache = LatteSHRC_GetCacheByType(shader->shaderType); // remove from hashtable auto baseIt = cache.find(shader->baseHash); if (baseIt == cache.end()) { cemu_assert_suspicious(); // deleting from runtime cache but shader is not present? } else if (baseIt->second == shader) { if (baseIt->second->next) cache.emplace(shader->baseHash, baseIt->second->next); else cache.erase(baseIt); removed = true; } else { // remove from chain LatteDecompilerShader* shaderChain = baseIt->second; while (shaderChain->next) { if (shaderChain->next == shader) { shaderChain->next = shaderChain->next->next; removed = true; break; } } } return removed; } void LatteSHRC_RemoveFromCacheByHash(uint64 shader_base_hash, uint64 shader_aux_hash, LatteConst::ShaderType type) { LatteDecompilerShader* shader = nullptr; if (type == LatteConst::ShaderType::Vertex) shader = LatteSHRC_FindVertexShader(shader_base_hash, shader_aux_hash); else if (type == LatteConst::ShaderType::Geometry) shader = LatteSHRC_FindGeometryShader(shader_base_hash, shader_aux_hash); else if (type == LatteConst::ShaderType::Pixel) shader = LatteSHRC_FindPixelShader(shader_base_hash, shader_aux_hash); if (shader) LatteSHRC_RemoveFromCache(shader); } void LatteShader_free(LatteDecompilerShader* shader) { LatteSHRC_RemoveFromCache(shader); if (shader->shader) delete shader->shader; shader->shader = nullptr; delete shader; } // both vertex and geometry/pixel shader depend on PS inputs // we prepare the PS import info in advance void LatteShader_UpdatePSInputs(uint32* contextRegisters) { // PS control uint32 psControl0 = contextRegisters[mmSPI_PS_IN_CONTROL_0]; uint32 spi0_positionEnable = (psControl0 >> 8) & 1; uint32 spi0_positionCentroid = (psControl0 >> 9) & 1; cemu_assert_debug(spi0_positionCentroid == 0); // controls gl_FragCoord uint32 spi0_positionAddr = (psControl0 >> 10) & 0x1F; // controls gl_FragCoord uint32 spi0_paramGen = (psControl0 >> 15) & 0xF; // used for gl_PointCoords uint32 spi0_paramGenAddr = (psControl0 >> 19) & 0x7F; sint32 importIndex = 0; //cemu_assert_debug(((psControl0>>26)&3) == 1); // BARYC_SAMPLE_CNTL //cemu_assert_debug((psControl0&(1 << 28)) == 0); // PERSP_GRADIENT_ENA //cemu_assert_debug((psControl0&(1 << 29)) == 0); // LINEAR_GRADIENT_ENA // if LINEAR_GRADIENT_ENA_bit is enabled, the pixel shader accesses gl_ClipSize? // VS/GS parameters uint32 numPSInputs = contextRegisters[mmSPI_PS_IN_CONTROL_0] & 0x3F; uint64 key = 0; if (spi0_positionEnable) { key += (uint64)spi0_positionAddr + 1; } // parameter gen if (spi0_paramGen != 0) { key += std::rotr(spi0_paramGen, 7); key += std::rotr(spi0_paramGenAddr, 3); _activePSImportTable.paramGen = spi0_paramGen; _activePSImportTable.paramGenGPR = spi0_paramGenAddr; } else { _activePSImportTable.paramGen = 0; } // semantic imports from vertex shader #ifdef CEMU_DEBUG_ASSERT uint8 semanticMask[256 / 8] = { 0 }; #endif cemu_assert_debug(numPSInputs <= GPU7_PS_MAX_INPUTS); numPSInputs = std::min(numPSInputs, GPU7_PS_MAX_INPUTS); for (uint32 f = 0; f < numPSInputs; f++) { uint32 psInputControl = contextRegisters[mmSPI_PS_INPUT_CNTL_0 + f]; uint32 psSemanticId = (psInputControl & 0xFF); uint8 defaultValue = (psInputControl>>8)&3; // default: // 0 -> 0.0 0.0 0.0 0.0 // 1 -> 0.0 0.0 0.0 1.0 // 2 -> 1.0 1.0 1.0 0.0 // 3 -> 1.0 1.0 1.0 1.0 cemu_assert_debug(defaultValue <= 1); uint32 uknBits = psInputControl & ~((0xFF)|(0x3<<8) | (1 << 10) | (1 << 12)); uknBits &= ~0x800; // FLAT_SHADE //cemu_assert_debug(uknBits == 0); //cemu_assert_debug(((psInputControl >> 11) & 1) == 0); // centroid //cemu_assert_debug(((psInputControl >> 17) & 1) == 0); // point sprite coord cemu_assert_debug(psSemanticId != 0xFF); key += (uint64)psInputControl; key = std::rotl(key, 7); if (spi0_positionEnable && f == spi0_positionAddr) { _activePSImportTable.import[f].semanticId = LATTE_ANALYZER_IMPORT_INDEX_SPIPOSITION; _activePSImportTable.import[f].isFlat = false; _activePSImportTable.import[f].isNoPerspective = false; key += (uint64)0x33; } else { #ifdef CEMU_DEBUG_ASSERT if (semanticMask[psSemanticId >> 3] & (1 << (psSemanticId & 7))) { forceLogDebug_printf("SemanticId already used"); } semanticMask[psSemanticId >> 3] |= (1 << (psSemanticId & 7)); #endif _activePSImportTable.import[f].semanticId = psSemanticId; _activePSImportTable.import[f].isFlat = (psInputControl&(1 << 10)) != 0; _activePSImportTable.import[f].isNoPerspective = (psInputControl&(1 << 12)) != 0; } } _activePSImportTable.key = key; _activePSImportTable.count = numPSInputs; } void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compileAsync) { if (shader->hasError ) { forceLog_printf("Unable to compile shader %I64x", shader->baseHash); return; } GraphicPack2::GP_SHADER_TYPE gpShaderType; RendererShader::ShaderType shaderType; if (shader->shaderType == LatteConst::ShaderType::Vertex) { shaderType = RendererShader::ShaderType::kVertex; gpShaderType = GraphicPack2::GP_SHADER_TYPE::VERTEX; } else if (shader->shaderType == LatteConst::ShaderType::Geometry) { shaderType = RendererShader::ShaderType::kGeometry; gpShaderType = GraphicPack2::GP_SHADER_TYPE::GEOMETRY; } else if (shader->shaderType == LatteConst::ShaderType::Pixel) { shaderType = RendererShader::ShaderType::kFragment; gpShaderType = GraphicPack2::GP_SHADER_TYPE::PIXEL; } // check if a custom shader is present std::string shaderSrc; const std::string* customShaderSrc = GraphicPack2::FindCustomShaderSource(shader->baseHash, shader->auxHash, gpShaderType, g_renderer->GetType() == RendererAPI::Vulkan); if (customShaderSrc) { shaderSrc.assign(*customShaderSrc); shader->isCustomShader = true; } else shaderSrc.assign(shader->strBuf_shaderSource->c_str()); if (shaderType == RendererShader::ShaderType::kVertex && (shader->baseHash == 0x15bc7edf9de2ed30 || shader->baseHash == 0x83a697d61a3b9202 || shader->baseHash == 0x97bc44a5028381c6 || shader->baseHash == 0x24838b84d15a1da1)) { forceLogDebug_printf("Filtered shader to avoid AMD crash"); shader->shader = nullptr; shader->hasError = true; return; } // create shader shader->shader = g_renderer->shader_create(shaderType, shader->baseHash, shader->auxHash, shaderSrc, true, shader->isCustomShader); if (shader->shader == nullptr) shader->hasError = true; // after renderer shader creation we can throw away any intermediate info LatteShader_CleanupAfterCompile(shader); } void LatteShader_FinishCompilation(LatteDecompilerShader* shader) { if (shader->hasError) { forceLogDebug_printf("LatteShader_finishCompilation(): Skipped because of error in shader %llx", shader->baseHash); return; } shader->shader->WaitForCompiled(); LatteShader_prepareSeparableUniforms(shader); LatteShader_CleanupAfterCompile(shader); } void LatteSHRC_RegisterShader(LatteDecompilerShader* shader, uint64 baseHash, uint64 auxHash) { auto& cache = LatteSHRC_GetCacheByType(shader->shaderType); shader->baseHash = baseHash; shader->auxHash = auxHash; auto it = cache.find(baseHash); if (it == cache.end()) { shader->next = nullptr; cache.emplace(shader->baseHash, shader); } else { shader->next = it->second->next; it->second->next = shader; } } LatteDecompilerShader* LatteSHRC_GetFromChain(LatteDecompilerShader* baseShader, uint64 baseHash, uint64 auxHash) { while (baseShader && baseShader->auxHash != auxHash) baseShader = baseShader->next; return baseShader; } LatteDecompilerShader* LatteSHRC_Get(SHRC_CACHE_TYPE& cache, uint64 baseHash, uint64 auxHash) { auto it = cache.find(baseHash); if (it == cache.end()) return nullptr; LatteDecompilerShader* baseShader = it->second; if (!baseShader) return nullptr; while (baseShader && baseShader->auxHash != auxHash) baseShader = baseShader->next; return baseShader; } LatteDecompilerShader* LatteSHRC_FindVertexShader(uint64 baseHash, uint64 auxHash) { return LatteSHRC_Get(sVertexShaders, baseHash, auxHash); } LatteDecompilerShader* LatteSHRC_FindGeometryShader(uint64 baseHash, uint64 auxHash) { return LatteSHRC_Get(sGeometryShaders, baseHash, auxHash); } LatteDecompilerShader* LatteSHRC_FindPixelShader(uint64 baseHash, uint64 auxHash) { return LatteSHRC_Get(sPixelShaders, baseHash, auxHash); } // update the currently active fetch shader void LatteShaderSHRC_UpdateFetchShader() { _activeFetchShader = LatteFetchShader::FindByGPUState(); } void LatteShader_CleanupAfterCompile(LatteDecompilerShader* shader) { if (shader->strBuf_shaderSource) { delete shader->strBuf_shaderSource; shader->strBuf_shaderSource = nullptr; } } void LatteShader_DumpShader(uint64 baseHash, uint64 auxHash, LatteDecompilerShader* shader) { if (!ActiveSettings::DumpShadersEnabled()) return; const char* suffix = ""; if (shader->shaderType == LatteConst::ShaderType::Vertex) suffix = "vs"; else if (shader->shaderType == LatteConst::ShaderType::Geometry) suffix = "gs"; else if (shader->shaderType == LatteConst::ShaderType::Pixel) suffix = "ps"; fs::path dumpPath = "dump/shaders"; dumpPath /= fmt::format("{:016x}_{:016x}_{}.txt", baseHash, auxHash, suffix); FileStream* fs = FileStream::createFile2(dumpPath); if (fs) { if (shader->strBuf_shaderSource) fs->writeData(shader->strBuf_shaderSource->c_str(), shader->strBuf_shaderSource->getLen()); delete fs; } } void LatteShader_DumpRawShader(uint64 baseHash, uint64 auxHash, uint32 type, uint8* programCode, uint32 programLen) { if (!ActiveSettings::DumpShadersEnabled()) return; const char* suffix = ""; if (type == SHADER_DUMP_TYPE_FETCH) suffix = "fs"; else if (type == SHADER_DUMP_TYPE_VERTEX) suffix = "vs"; else if (type == SHADER_DUMP_TYPE_GEOMETRY) suffix = "gs"; else if (type == SHADER_DUMP_TYPE_PIXEL) suffix = "ps"; else if (type == SHADER_DUMP_TYPE_COPY) suffix = "copy"; else if (type == SHADER_DUMP_TYPE_COMPUTE) suffix = "compute"; fs::path dumpPath = "dump/shaders"; dumpPath /= fmt::format("{:016x}_{:016x}_{}.bin", baseHash, auxHash, suffix); FileStream* fs = FileStream::createFile2(dumpPath); if (fs) { fs->writeData(programCode, programLen); delete fs; } } void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader) { uint32* vsProgramCode = (uint32*)vertexShaderPtr; // update hash from vertex shader data uint64 vsHash1 = 0; uint64 vsHash2 = 0; _calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2); uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL); uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F; vsHash += tmp; auto primitiveType = LatteGPUState.contextNew.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE(); if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS) { vsHash += 13ULL; } else if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS) { // required for Vulkan since we have to write the pointsize in the shader vsHash += 71ULL; } vsHash += (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] ? 21 : 0); // halfZ if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF()) vsHash += 0x1537; _shaderBaseHash_vs = vsHash; } void LatteSHRC_UpdateGSBaseHash(uint8* geometryShaderPtr, uint32 geometryShaderSize, uint8* geometryCopyShader, uint32 geometryCopyShaderSize) { // update hash from geometry shader data uint64 gsHash1 = 0; uint64 gsHash2 = 0; _calculateShaderProgramHash((uint32*)geometryShaderPtr, geometryShaderSize, &hashCacheVS, &gsHash1, &gsHash2); // get geometry shader uint64 gsHash = gsHash1 + gsHash2; gsHash += (uint64)_activeVertexShader->ringParameterCount; gsHash += (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] ? 21 : 0); _shaderBaseHash_gs = gsHash; } void LatteSHRC_UpdatePSBaseHash(uint8* pixelShaderPtr, uint32 pixelShaderSize, bool usesGeometryShader) { uint32* psProgramCode = (uint32*)pixelShaderPtr; // update hash from pixel shader data uint64 psHash1 = 0; uint64 psHash2 = 0; _calculateShaderProgramHash(psProgramCode, pixelShaderSize, &hashCachePS, &psHash1, &psHash2); // get vertex shader uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL); _shaderBaseHash_ps = psHash; } uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* contextRegisters) { // todo - include texture types in aux hash similar to how it is already done in pixel shader // or maybe there is a way to figure out the proper texture types? uint64 auxHash = 0; if(vertexShader->hasStreamoutBufferWrite) { // hash stride for streamout buffers for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++) { if(!vertexShader->streamoutBufferWriteMask2[i]) continue; uint32 bufferStride = contextRegisters[mmVGT_STRMOUT_VTX_STRIDE_0 + i * 4]; auxHash = std::rotl(auxHash, 7); auxHash += (uint64)bufferStride; } } // textures can affect the shader. Either by their type (2D, 3D, cubemap) or by their format (float vs integer) uint64 auxHashTex = 0; for (uint8 i = 0; i < vertexShader->textureUnitListCount; i++) { uint8 t = vertexShader->textureUnitList[i]; uint32 word4 = contextRegisters[Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_VS + t * 7 + 4]; if ((word4 & 0x300) == 0x100) { // integer format auxHashTex = std::rotl(auxHashTex, 7); auxHashTex += 0x333; } } return auxHash + auxHashTex; } uint64 LatteSHRC_CalcGSAuxHash(LatteDecompilerShader* geometryShader) { // todo - include texture types in aux hash similar to how it is already done in pixel shader return 0; } uint64 LatteSHRC_CalcPSAuxHash(LatteDecompilerShader* pixelShader, uint32* contextRegisters) { uint64 auxHash = 0; // CB_SHADER_MASK can remap pixel shader outputs auxHash = (auxHash >> 3) | (auxHash << 61); auxHash += (uint64)contextRegisters[mmCB_SHADER_MASK]; // alpha test uint8 alphaTestFunc = contextRegisters[Latte::REGADDR::SX_ALPHA_TEST_CONTROL] & 0x7; uint8 alphaTestEnable = (contextRegisters[Latte::REGADDR::SX_ALPHA_TEST_CONTROL] >> 3) & 1; if (alphaTestEnable) { auxHash += (uint64)alphaTestFunc; auxHash = (auxHash >> 3) | (auxHash << 61); auxHash += 1; } // texture types (2D, 3D, cubemap etc.) affect the shader too for (uint8 i = 0; i < pixelShader->textureUnitListCount; i++) { uint8 t = pixelShader->textureUnitList[i]; uint32 word0 = contextRegisters[Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_PS + t * 7 + 0]; uint32 dim = (word0 & 7); auxHash = (auxHash << 3) | (auxHash >> 61); auxHash += (uint64)dim; } return auxHash; } LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompilerOutput_t& decompilerOutput, uint64 baseHash, bool calculateAuxHash, uint64 optionalAuxHash, uint32* contextRegister) { LatteDecompilerShader* shader = decompilerOutput.shader; shader->baseHash = baseHash; // copy resource mapping if(g_renderer->GetType() == RendererAPI::Vulkan) shader->resourceMapping = decompilerOutput.resourceMappingVK; else shader->resourceMapping = decompilerOutput.resourceMappingGL; // copy texture info shader->textureUnitMask2 = decompilerOutput.textureUnitMask; // copy streamout info shader->streamoutBufferWriteMask2 = decompilerOutput.streamoutBufferWriteMask; shader->hasStreamoutBufferWrite = decompilerOutput.streamoutBufferWriteMask.any(); // copy uniform offsets // for OpenGL these are retrieved in _prepareSeparableUniforms() if (g_renderer->GetType() == RendererAPI::Vulkan) { shader->uniform.loc_remapped = decompilerOutput.uniformOffsetsVK.offset_remapped; shader->uniform.loc_uniformRegister = decompilerOutput.uniformOffsetsVK.offset_uniformRegister; shader->uniform.count_uniformRegister = decompilerOutput.uniformOffsetsVK.count_uniformRegister; shader->uniform.loc_windowSpaceToClipSpaceTransform = decompilerOutput.uniformOffsetsVK.offset_windowSpaceToClipSpaceTransform; shader->uniform.loc_alphaTestRef = decompilerOutput.uniformOffsetsVK.offset_alphaTestRef; shader->uniform.loc_pointSize = decompilerOutput.uniformOffsetsVK.offset_pointSize; shader->uniform.loc_fragCoordScale = decompilerOutput.uniformOffsetsVK.offset_fragCoordScale; for (sint32 t = 0; t < LATTE_NUM_MAX_TEX_UNITS; t++) { if (decompilerOutput.uniformOffsetsVK.offset_texScale[t] >= 0) { LatteUniformTextureScaleEntry_t entry = { 0 }; entry.texUnit = t; entry.uniformLocation = decompilerOutput.uniformOffsetsVK.offset_texScale[t]; shader->uniform.list_ufTexRescale.push_back(entry); } } shader->uniform.loc_verticesPerInstance = decompilerOutput.uniformOffsetsVK.offset_verticesPerInstance; for (sint32 t = 0; t < LATTE_NUM_STREAMOUT_BUFFER; t++) shader->uniform.loc_streamoutBufferBase[t] = decompilerOutput.uniformOffsetsVK.offset_streamoutBufferBase[t]; shader->uniform.uniformRangeSize = decompilerOutput.uniformOffsetsVK.offset_endOfBlock; } else { shader->uniform.count_uniformRegister = decompilerOutput.uniformOffsetsVK.count_uniformRegister; } // calculate aux hash if (calculateAuxHash) { if (decompilerOutput.shaderType == LatteConst::ShaderType::Vertex) { uint64 vsAuxHash = LatteSHRC_CalcVSAuxHash(shader, contextRegister); shader->auxHash = vsAuxHash; } else if (decompilerOutput.shaderType == LatteConst::ShaderType::Geometry) { uint64 gsAuxHash = LatteSHRC_CalcGSAuxHash(shader); shader->auxHash = gsAuxHash; } else if (decompilerOutput.shaderType == LatteConst::ShaderType::Pixel) { uint64 psAuxHash = LatteSHRC_CalcPSAuxHash(shader, contextRegister); shader->auxHash = psAuxHash; } else cemu_assert_debug(false); } else { shader->auxHash = optionalAuxHash; } return shader; } #include "Cafe/HW/Latte/Transcompiler/LatteTC.h" #include "Cafe/HW/Latte/ShaderInfo/ShaderInfo.h" LatteDecompilerShader* LatteShader_compileSeparableVertexShader(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader) { /* Analyze shader to gather general information about inputs/outputs */ Latte::ShaderDescription shaderDescription; if (!shaderDescription.analyzeShaderCode(vertexShaderPtr, vertexShaderSize, LatteConst::ShaderType::Vertex)) { assert_dbg(); return nullptr; } /* Create context dependent IO info for this shader */ //Latte::ShaderInstanceInfo assert_dbg(); // todo - Use ShaderInstanceInfo when generating the GLSL (GLSL::Emit() should take a 'GLSLInfoSource' class which has a bunch of virtual methods for retrieving uniform names etc. We then override this class and plug in logic using ShaderInstanceInfo /* Translate R600Plus to GLSL */ ZpIR::DebugPrinter irDebugPrinter; LatteTCGenIR genIR; genIR.setVertexShaderContext(fetchShader, LatteGPUState.contextRegister + mmSQ_VTX_SEMANTIC_0); auto irObj = genIR.transcompileLatteToIR(vertexShaderPtr, vertexShaderSize, LatteTCGenIR::VERTEX); // debug output (before register allocation) irDebugPrinter.setShowPhysicalRegisters(false); irDebugPrinter.debugPrint(irObj); // register allocation ZirPass::RegisterAllocatorForGLSL ra(irObj); ra.applyPass(); // debug output (after register allocation) irDebugPrinter.setShowPhysicalRegisters(true); irDebugPrinter.setPhysicalRegisterNameSource(ZirPass::RegisterAllocatorForGLSL::DebugPrintHelper_getPhysRegisterName); irDebugPrinter.debugPrint(irObj); // gen GLSL StringBuf glslSourceBuffer(64 * 1024); // emit GLSL header assert_dbg(); // todo // emit main ZirEmitter::GLSL emitter; emitter.Emit(irObj, &glslSourceBuffer); // debug copy to string std::string dbg; dbg.insert(0, glslSourceBuffer.c_str(), glslSourceBuffer.getLen()); assert_dbg(); return nullptr; } // compile new vertex shader (relies partially on current state) LatteDecompilerShader* LatteShader_CompileSeparableVertexShader(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader) { // new decompiler //LatteShader_compileSeparableVertexShader(baseHash, vsAuxHash, vertexShaderPtr, vertexShaderSize, usesGeometryShader, fetchShader); // legacy decompiler LatteDecompilerOutput_t decompilerOutput{}; LatteFetchShader* fetchShaderList[1]; fetchShaderList[0] = fetchShader; LatteDecompiler_DecompileVertexShader(_shaderBaseHash_vs, LatteGPUState.contextRegister, vertexShaderPtr, vertexShaderSize, fetchShaderList, 1, LatteGPUState.contextNew.GetSpecialStateValues(), usesGeometryShader, &decompilerOutput); LatteDecompilerShader* vertexShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister); vsAuxHash = vertexShader->auxHash; if (vertexShader->hasError == false) { uint8* fsProgramCode = (uint8*)memory_getPointerFromPhysicalOffset(LatteGPUState.contextRegister[mmSQ_PGM_START_FS + 0] << 8); uint32 fsProgramSize = LatteGPUState.contextRegister[mmSQ_PGM_START_FS + 1] << 3; LatteShaderCache_writeSeparableVertexShader(vertexShader->baseHash, vertexShader->auxHash, fsProgramCode, fsProgramSize, vertexShaderPtr, vertexShaderSize, LatteGPUState.contextRegister, usesGeometryShader); } LatteShader_DumpShader(vertexShader->baseHash, vertexShader->auxHash, vertexShader); LatteShader_DumpRawShader(vertexShader->baseHash, vertexShader->auxHash, SHADER_DUMP_TYPE_VERTEX, vertexShaderPtr, vertexShaderSize); LatteShader_CreateRendererShader(vertexShader, false); performanceMonitor.numCompiledVS++; if (g_renderer->GetType() == RendererAPI::OpenGL) { if (vertexShader->shader) vertexShader->shader->PreponeCompilation(true); LatteShader_FinishCompilation(vertexShader); } LatteSHRC_RegisterShader(vertexShader, vertexShader->baseHash, vertexShader->auxHash); return vertexShader; } LatteDecompilerShader* LatteShader_CompileSeparableGeometryShader(uint64 baseHash, uint8* geometryShaderPtr, uint32 geometryShaderSize, uint8* geometryCopyShader, uint32 geometryCopyShaderSize) { LatteDecompilerOutput_t decompilerOutput{}; LatteFetchShader* fetchShaderList[1]; fetchShaderList[0] = _activeFetchShader; LatteDecompiler_DecompileGeometryShader(_shaderBaseHash_gs, LatteGPUState.contextRegister, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), _activeVertexShader->ringParameterCount, &decompilerOutput); LatteDecompilerShader* geometryShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister); if (geometryShader->hasError == false) { LatteShaderCache_writeSeparableGeometryShader(geometryShader->baseHash, geometryShader->auxHash, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, LatteGPUState.contextRegister, LatteGPUState.contextNew.GetSpecialStateValues(), _activeVertexShader->ringParameterCount); } LatteShader_DumpShader(geometryShader->baseHash, geometryShader->auxHash, geometryShader); LatteShader_DumpRawShader(geometryShader->baseHash, geometryShader->auxHash, SHADER_DUMP_TYPE_GEOMETRY, geometryShaderPtr, geometryShaderSize); LatteShader_DumpRawShader(geometryShader->baseHash, geometryShader->auxHash, SHADER_DUMP_TYPE_COPY, geometryCopyShader, geometryCopyShaderSize); LatteShader_CreateRendererShader(geometryShader, false); performanceMonitor.numCompiledGS++; if (g_renderer->GetType() == RendererAPI::OpenGL) { if (geometryShader->shader) geometryShader->shader->PreponeCompilation(true); LatteShader_FinishCompilation(geometryShader); } LatteSHRC_RegisterShader(geometryShader, geometryShader->baseHash, geometryShader->auxHash); return geometryShader; } LatteDecompilerShader* LatteShader_CompileSeparablePixelShader(uint64 baseHash, uint64& psAuxHash, uint8* pixelShaderPtr, uint32 pixelShaderSize, bool usesGeometryShader) { LatteDecompilerOutput_t decompilerOutput{}; LatteDecompiler_DecompilePixelShader(baseHash, LatteGPUState.contextRegister, pixelShaderPtr, pixelShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), usesGeometryShader, &decompilerOutput); LatteDecompilerShader* pixelShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister); psAuxHash = pixelShader->auxHash; LatteShader_DumpShader(_shaderBaseHash_ps, psAuxHash, pixelShader); LatteShader_DumpRawShader(_shaderBaseHash_ps, psAuxHash, SHADER_DUMP_TYPE_PIXEL, pixelShaderPtr, pixelShaderSize); LatteShader_CreateRendererShader(pixelShader, false); performanceMonitor.numCompiledPS++; if (pixelShader->hasError == false) { LatteShaderCache_writeSeparablePixelShader(_shaderBaseHash_ps, psAuxHash, pixelShaderPtr, pixelShaderSize, LatteGPUState.contextRegister, usesGeometryShader); } if (g_renderer->GetType() == RendererAPI::OpenGL) { if (pixelShader->shader) pixelShader->shader->PreponeCompilation(true); LatteShader_FinishCompilation(pixelShader); } LatteSHRC_RegisterShader(pixelShader, _shaderBaseHash_ps, psAuxHash); return pixelShader; } void LatteSHRC_UpdateVertexShader(uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader) { LatteSHRC_UpdateVSBaseHash(vertexShaderPtr, vertexShaderSize, usesGeometryShader); uint64 vsAuxHash = 0; auto itBaseShader = sVertexShaders.find(_shaderBaseHash_vs); LatteDecompilerShader* vertexShader = nullptr; if (itBaseShader != sVertexShaders.end()) { vsAuxHash = LatteSHRC_CalcVSAuxHash(itBaseShader->second, LatteGPUState.contextRegister); vertexShader = LatteSHRC_GetFromChain(itBaseShader->second, _shaderBaseHash_vs, vsAuxHash); } if (!vertexShader) vertexShader = LatteShader_CompileSeparableVertexShader(_shaderBaseHash_vs, vsAuxHash, vertexShaderPtr, vertexShaderSize, usesGeometryShader, _activeFetchShader); if (vertexShader->hasError) { LatteGPUState.activeShaderHasError = true; return; } g_renderer->shader_bind(vertexShader->shader); _activeVertexShader = vertexShader; } void LatteSHRC_UpdateGeometryShader(bool usesGeometryShader, uint8* geometryShaderPtr, uint32 geometryShaderSize, uint8* geometryCopyShader, uint32 geometryCopyShaderSize) { if (usesGeometryShader == false || _activeVertexShader == nullptr) { g_renderer->shader_unbind(RendererShader::ShaderType::kGeometry); _shaderBaseHash_gs = 0; _activeGeometryShader = nullptr; return; } LatteSHRC_UpdateGSBaseHash(geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize); auto itBaseShader = sGeometryShaders.find(_shaderBaseHash_gs); LatteDecompilerShader* geometryShader; if (itBaseShader != sGeometryShaders.end()) { // geometry shader already known geometryShader = itBaseShader->second; cemu_assert_debug(LatteSHRC_CalcGSAuxHash(geometryShader) == 0); } else { // decompile geometry shader geometryShader = LatteShader_CompileSeparableGeometryShader(_shaderBaseHash_gs, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize); } if (geometryShader->hasError) { LatteGPUState.activeShaderHasError = true; return; } g_renderer->shader_bind(geometryShader->shader); _activeGeometryShader = geometryShader; } void LatteSHRC_UpdatePixelShader(uint8* pixelShaderPtr, uint32 pixelShaderSize, bool usesGeometryShader) { if (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] != 0 && g_renderer->GetType() == RendererAPI::OpenGL) { if (_activePixelShader) { g_renderer->shader_unbind(RendererShader::ShaderType::kFragment); _activePixelShader = nullptr; } return; } LatteSHRC_UpdatePSBaseHash(pixelShaderPtr, pixelShaderSize, usesGeometryShader); uint64 psAuxHash = 0; auto itBaseShader = sPixelShaders.find(_shaderBaseHash_ps); LatteDecompilerShader* pixelShader = nullptr; if (itBaseShader != sPixelShaders.end()) { psAuxHash = LatteSHRC_CalcPSAuxHash(itBaseShader->second, LatteGPUState.contextRegister); pixelShader = LatteSHRC_GetFromChain(itBaseShader->second, _shaderBaseHash_ps, psAuxHash); } if (!pixelShader) pixelShader = LatteShader_CompileSeparablePixelShader(_shaderBaseHash_ps, psAuxHash, pixelShaderPtr, pixelShaderSize, usesGeometryShader); if (pixelShader->hasError) { LatteGPUState.activeShaderHasError = true; return; } g_renderer->shader_bind(pixelShader->shader); _activePixelShader = pixelShader; } void LatteSHRC_UpdateActiveShaders() { // check if geometry shader is used auto gsMode = LatteGPUState.contextNew.VGT_GS_MODE.get_MODE(); cemu_assert_debug(LatteGPUState.contextNew.VGT_GS_MODE.get_ES_PASSTHRU() == false); // todo: Support for ES passthrough and cut mode in mmVGT_GS_MODE bool geometryShaderUsed = false; if (gsMode == Latte::LATTE_VGT_GS_MODE::E_MODE::OFF) { geometryShaderUsed = false; } else if (gsMode == Latte::LATTE_VGT_GS_MODE::E_MODE::SCENARIO_G) { // could also be compute shader? geometryShaderUsed = true; } else { cemu_assert_debug(false); } // get shader programs uint8* psProgramCode = (uint8*)memory_getPointerFromPhysicalOffset((LatteGPUState.contextRegister[mmSQ_PGM_START_PS] & 0xFFFFFF) << 8); uint32 psProgramSize = LatteGPUState.contextRegister[mmSQ_PGM_START_PS + 1] << 3; uint8* gsProgramCode = (uint8*)memory_getPointerFromPhysicalOffset((LatteGPUState.contextRegister[mmSQ_PGM_START_GS] & 0xFFFFFF) << 8); uint32 gsProgramSize = LatteGPUState.contextRegister[mmSQ_PGM_START_GS + 1] << 3; uint8* vsProgramCode; uint32 vsProgramSize; uint8* copyProgramCode = NULL; uint32 copyProgramSize = 0; if (geometryShaderUsed) { vsProgramCode = (uint8*)memory_getPointerFromPhysicalOffset((LatteGPUState.contextRegister[mmSQ_PGM_START_ES] & 0xFFFFFF) << 8); vsProgramSize = LatteGPUState.contextRegister[mmSQ_PGM_START_ES + 1] << 3; copyProgramCode = (uint8*)memory_getPointerFromPhysicalOffset((LatteGPUState.contextRegister[mmSQ_PGM_START_VS] & 0xFFFFFF) << 8); if (LatteGPUState.contextRegister[mmSQ_PGM_START_VS] == 0) { copyProgramCode = NULL; debug_printf("copyProgram is NULL but used. Might be because of unsupported vertex/geometry mode?"); } copyProgramSize = LatteGPUState.contextRegister[mmSQ_PGM_START_VS + 1] << 3; } else { if (LatteGPUState.contextRegister[mmSQ_PGM_START_VS] == 0) { debug_printf("No vertex shader program set\n"); LatteGPUState.activeShaderHasError = true; return; } vsProgramCode = (uint8*)memory_getPointerFromPhysicalOffset((LatteGPUState.contextRegister[mmSQ_PGM_START_VS] & 0xFFFFFF) << 8); vsProgramSize = LatteGPUState.contextRegister[mmSQ_PGM_START_VS + 1] << 3; } // set new shaders LatteGPUState.activeShaderHasError = false; LatteShader_UpdatePSInputs(LatteGPUState.contextRegister); LatteShaderSHRC_UpdateFetchShader(); LatteSHRC_UpdateVertexShader(vsProgramCode, vsProgramSize, geometryShaderUsed); if (LatteGPUState.activeShaderHasError) return; LatteSHRC_UpdateGeometryShader(geometryShaderUsed, gsProgramCode, gsProgramSize, copyProgramCode, copyProgramSize); if (LatteGPUState.activeShaderHasError) return; LatteSHRC_UpdatePixelShader(psProgramCode, psProgramSize, geometryShaderUsed); if (LatteGPUState.activeShaderHasError) return; } // returns the sampler base index for the given shader type sint32 LatteDecompiler_getTextureSamplerBaseIndex(LatteConst::ShaderType shaderType) { uint32 samplerId = LATTE_DECOMPILER_SAMPLER_NONE; if (shaderType == LatteConst::ShaderType::Vertex) return Latte::SAMPLER_BASE_INDEX_VERTEX; else if (shaderType == LatteConst::ShaderType::Pixel) return Latte::SAMPLER_BASE_INDEX_PIXEL; else if (shaderType == LatteConst::ShaderType::Geometry) return Latte::SAMPLER_BASE_INDEX_GEOMETRY; else cemu_assert_suspicious(); return 0; } void LatteSHRC_Init() { cemu_assert_debug(sVertexShaders.empty()); cemu_assert_debug(sGeometryShaders.empty()); cemu_assert_debug(sPixelShaders.empty()); }