Cemu/src/Cafe/HW/Latte/Core/LatteShader.cpp
Exzap 3bceb39966
Remove PUBLIC_RELEASE flag and tie asserts to debug config (#287)
Removes the -DPUBLIC_RELEASE flag. Cemu's debug asserts are now only enabled if the build configuration is Debug. Similarly, on Windows the console is only shown for Debug builds.
2022-09-24 08:43:27 +02:00

988 lines
36 KiB
C++

#include "Cafe/HW/Latte/Core/LatteConst.h"
#include "Cafe/HW/Latte/Core/LatteShaderAssembly.h"
#include "Cafe/HW/Latte/ISA/RegDefines.h"
#include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency
#include "Cafe/HW/Latte/ISA/LatteReg.h"
#include "Cafe/HW/Latte/Core/LatteShader.h"
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
#include "Cafe/GraphicPack/GraphicPack2.h"
#include "util/helpers/StringParser.h"
#include "config/ActiveSettings.h"
#include "util/Zir/EmitterGLSL/ZpIREmitGLSL.h"
#include "util/Zir/Core/ZpIRDebug.h"
#include "util/containers/flat_hash_map.hpp"
struct _ShaderHashCache
{
uint64 prevHash1;
uint64 prevHash2;
uint32* prevProgramCode;
uint32 prevProgramSize;
};
_ShaderHashCache hashCacheVS = { 0 };
_ShaderHashCache hashCacheGS = { 0 };
_ShaderHashCache hashCachePS = { 0 };
LatteFetchShader* _activeFetchShader = nullptr;
LatteDecompilerShader* _activeVertexShader = nullptr;
LatteDecompilerShader* _activeGeometryShader = nullptr;
LatteDecompilerShader* _activePixelShader = nullptr;
// runtime shader cache
using SHRC_CACHE_TYPE = ska::flat_hash_map<uint64, LatteDecompilerShader*>;
SHRC_CACHE_TYPE sVertexShaders(512);
SHRC_CACHE_TYPE sGeometryShaders(512);
SHRC_CACHE_TYPE sPixelShaders(512);
uint64 _shaderBaseHash_vs;
uint64 _shaderBaseHash_gs;
uint64 _shaderBaseHash_ps;
std::atomic_int g_compiled_shaders_total = 0;
std::atomic_int g_compiled_shaders_async = 0;
LatteFetchShader* LatteSHRC_GetActiveFetchShader()
{
return _activeFetchShader;
}
LatteDecompilerShader* LatteSHRC_GetActiveVertexShader()
{
return _activeVertexShader;
}
LatteDecompilerShader* LatteSHRC_GetActiveGeometryShader()
{
return _activeGeometryShader;
}
LatteDecompilerShader* LatteSHRC_GetActivePixelShader()
{
return _activePixelShader;
}
inline ska::flat_hash_map<uint64, LatteDecompilerShader*>& LatteSHRC_GetCacheByType(LatteConst::ShaderType shaderType)
{
if (shaderType == LatteConst::ShaderType::Vertex)
return sVertexShaders;
else if (shaderType == LatteConst::ShaderType::Geometry)
return sGeometryShaders;
cemu_assert_debug(shaderType == LatteConst::ShaderType::Pixel);
return sPixelShaders;
}
// calculate hash from shader binary
// this algorithm could be more efficient since we could leverage the fact that the size is always aligned to 8 byte
// but since this is baked into the shader names used for gfx packs and shader caches we can't really change this
void _calcShaderHashGeneric(uint32* programCode, uint32 programSize, uint64& outputHash1, uint64& outputHash2)
{
outputHash1 = 0;
outputHash2 = 0;
for (uint32 i = 0; i < programSize / 4; i++)
{
uint32 temp = programCode[i];
outputHash1 += (uint64)temp;
outputHash2 ^= (uint64)temp;
outputHash1 = (outputHash1 << 3) | (outputHash1 >> 61);
outputHash2 = (outputHash2 >> 7) | (outputHash2 << 57);
}
}
void _calculateShaderProgramHash(uint32* programCode, uint32 programSize, _ShaderHashCache* hashCache, uint64* outputHash1, uint64* outputHash2)
{
uint64 progHash1 = 0;
uint64 progHash2 = 0;
if (!programCode)
{
hashCache->prevProgramCode = NULL;
hashCache->prevProgramSize = 0;
hashCache->prevHash1 = 0;
hashCache->prevHash2 = 0;
}
else if (hashCache->prevProgramCode != programCode || hashCache->prevProgramSize != programSize)
{
_calcShaderHashGeneric(programCode, programSize, progHash1, progHash2);
hashCache->prevProgramCode = programCode;
hashCache->prevProgramSize = programSize;
hashCache->prevHash1 = progHash1;
hashCache->prevHash2 = progHash2;
}
else
{
progHash1 = hashCache->prevHash1;
progHash2 = hashCache->prevHash2;
}
*outputHash1 = progHash1;
*outputHash2 = progHash2;
}
void LatteSHRC_ResetCachedShaderHash()
{
hashCacheVS.prevProgramCode = 0;
hashCacheVS.prevProgramSize = 0;
hashCacheGS.prevProgramCode = 0;
hashCacheGS.prevProgramSize = 0;
hashCachePS.prevProgramCode = 0;
hashCachePS.prevProgramSize = 0;
}
LatteShaderPSInputTable _activePSImportTable;
LatteShaderPSInputTable* LatteSHRC_GetPSInputTable()
{
return &_activePSImportTable;
}
bool LatteSHRC_RemoveFromCache(LatteDecompilerShader* shader)
{
bool removed = false;
auto& cache = LatteSHRC_GetCacheByType(shader->shaderType);
// remove from hashtable
auto baseIt = cache.find(shader->baseHash);
if (baseIt == cache.end())
{
cemu_assert_suspicious(); // deleting from runtime cache but shader is not present?
}
else if (baseIt->second == shader)
{
if (baseIt->second->next)
cache.emplace(shader->baseHash, baseIt->second->next);
else
cache.erase(baseIt);
removed = true;
}
else
{
// remove from chain
LatteDecompilerShader* shaderChain = baseIt->second;
while (shaderChain->next)
{
if (shaderChain->next == shader)
{
shaderChain->next = shaderChain->next->next;
removed = true;
break;
}
}
}
return removed;
}
void LatteSHRC_RemoveFromCacheByHash(uint64 shader_base_hash, uint64 shader_aux_hash, LatteConst::ShaderType type)
{
LatteDecompilerShader* shader = nullptr;
if (type == LatteConst::ShaderType::Vertex)
shader = LatteSHRC_FindVertexShader(shader_base_hash, shader_aux_hash);
else if (type == LatteConst::ShaderType::Geometry)
shader = LatteSHRC_FindGeometryShader(shader_base_hash, shader_aux_hash);
else if (type == LatteConst::ShaderType::Pixel)
shader = LatteSHRC_FindPixelShader(shader_base_hash, shader_aux_hash);
if (shader)
LatteSHRC_RemoveFromCache(shader);
}
void LatteShader_free(LatteDecompilerShader* shader)
{
LatteSHRC_RemoveFromCache(shader);
if (shader->shader)
delete shader->shader;
shader->shader = nullptr;
delete shader;
}
// both vertex and geometry/pixel shader depend on PS inputs
// we prepare the PS import info in advance
void LatteShader_UpdatePSInputs(uint32* contextRegisters)
{
// PS control
uint32 psControl0 = contextRegisters[mmSPI_PS_IN_CONTROL_0];
uint32 spi0_positionEnable = (psControl0 >> 8) & 1;
uint32 spi0_positionCentroid = (psControl0 >> 9) & 1;
cemu_assert_debug(spi0_positionCentroid == 0); // controls gl_FragCoord
uint32 spi0_positionAddr = (psControl0 >> 10) & 0x1F; // controls gl_FragCoord
uint32 spi0_paramGen = (psControl0 >> 15) & 0xF; // used for gl_PointCoords
uint32 spi0_paramGenAddr = (psControl0 >> 19) & 0x7F;
sint32 importIndex = 0;
//cemu_assert_debug(((psControl0>>26)&3) == 1); // BARYC_SAMPLE_CNTL
//cemu_assert_debug((psControl0&(1 << 28)) == 0); // PERSP_GRADIENT_ENA
//cemu_assert_debug((psControl0&(1 << 29)) == 0); // LINEAR_GRADIENT_ENA
// if LINEAR_GRADIENT_ENA_bit is enabled, the pixel shader accesses gl_ClipSize?
// VS/GS parameters
uint32 numPSInputs = contextRegisters[mmSPI_PS_IN_CONTROL_0] & 0x3F;
uint64 key = 0;
if (spi0_positionEnable)
{
key += (uint64)spi0_positionAddr + 1;
}
// parameter gen
if (spi0_paramGen != 0)
{
key += std::rotr<uint64>(spi0_paramGen, 7);
key += std::rotr<uint64>(spi0_paramGenAddr, 3);
_activePSImportTable.paramGen = spi0_paramGen;
_activePSImportTable.paramGenGPR = spi0_paramGenAddr;
}
else
{
_activePSImportTable.paramGen = 0;
}
// semantic imports from vertex shader
#ifdef CEMU_DEBUG_ASSERT
uint8 semanticMask[256 / 8] = { 0 };
#endif
cemu_assert_debug(numPSInputs <= GPU7_PS_MAX_INPUTS);
numPSInputs = std::min<uint32>(numPSInputs, GPU7_PS_MAX_INPUTS);
for (uint32 f = 0; f < numPSInputs; f++)
{
uint32 psInputControl = contextRegisters[mmSPI_PS_INPUT_CNTL_0 + f];
uint32 psSemanticId = (psInputControl & 0xFF);
uint8 defaultValue = (psInputControl>>8)&3;
// default:
// 0 -> 0.0 0.0 0.0 0.0
// 1 -> 0.0 0.0 0.0 1.0
// 2 -> 1.0 1.0 1.0 0.0
// 3 -> 1.0 1.0 1.0 1.0
cemu_assert_debug(defaultValue <= 1);
uint32 uknBits = psInputControl & ~((0xFF)|(0x3<<8) | (1 << 10) | (1 << 12));
uknBits &= ~0x800; // FLAT_SHADE
//cemu_assert_debug(uknBits == 0);
//cemu_assert_debug(((psInputControl >> 11) & 1) == 0); // centroid
//cemu_assert_debug(((psInputControl >> 17) & 1) == 0); // point sprite coord
cemu_assert_debug(psSemanticId != 0xFF);
key += (uint64)psInputControl;
key = std::rotl<uint64>(key, 7);
if (spi0_positionEnable && f == spi0_positionAddr)
{
_activePSImportTable.import[f].semanticId = LATTE_ANALYZER_IMPORT_INDEX_SPIPOSITION;
_activePSImportTable.import[f].isFlat = false;
_activePSImportTable.import[f].isNoPerspective = false;
key += (uint64)0x33;
}
else
{
#ifdef CEMU_DEBUG_ASSERT
if (semanticMask[psSemanticId >> 3] & (1 << (psSemanticId & 7)))
{
forceLogDebug_printf("SemanticId already used");
}
semanticMask[psSemanticId >> 3] |= (1 << (psSemanticId & 7));
#endif
_activePSImportTable.import[f].semanticId = psSemanticId;
_activePSImportTable.import[f].isFlat = (psInputControl&(1 << 10)) != 0;
_activePSImportTable.import[f].isNoPerspective = (psInputControl&(1 << 12)) != 0;
}
}
_activePSImportTable.key = key;
_activePSImportTable.count = numPSInputs;
}
void LatteShader_CreateRendererShader(LatteDecompilerShader* shader, bool compileAsync)
{
if (shader->hasError )
{
forceLog_printf("Unable to compile shader %I64x", shader->baseHash);
return;
}
GraphicPack2::GP_SHADER_TYPE gpShaderType;
RendererShader::ShaderType shaderType;
if (shader->shaderType == LatteConst::ShaderType::Vertex)
{
shaderType = RendererShader::ShaderType::kVertex;
gpShaderType = GraphicPack2::GP_SHADER_TYPE::VERTEX;
}
else if (shader->shaderType == LatteConst::ShaderType::Geometry)
{
shaderType = RendererShader::ShaderType::kGeometry;
gpShaderType = GraphicPack2::GP_SHADER_TYPE::GEOMETRY;
}
else if (shader->shaderType == LatteConst::ShaderType::Pixel)
{
shaderType = RendererShader::ShaderType::kFragment;
gpShaderType = GraphicPack2::GP_SHADER_TYPE::PIXEL;
}
// check if a custom shader is present
std::string shaderSrc;
const std::string* customShaderSrc = GraphicPack2::FindCustomShaderSource(shader->baseHash, shader->auxHash, gpShaderType, g_renderer->GetType() == RendererAPI::Vulkan);
if (customShaderSrc)
{
shaderSrc.assign(*customShaderSrc);
shader->isCustomShader = true;
}
else
shaderSrc.assign(shader->strBuf_shaderSource->c_str());
if (shaderType == RendererShader::ShaderType::kVertex &&
(shader->baseHash == 0x15bc7edf9de2ed30 || shader->baseHash == 0x83a697d61a3b9202 ||
shader->baseHash == 0x97bc44a5028381c6 || shader->baseHash == 0x24838b84d15a1da1))
{
forceLogDebug_printf("Filtered shader to avoid AMD crash");
shader->shader = nullptr;
shader->hasError = true;
return;
}
// create shader
shader->shader = g_renderer->shader_create(shaderType, shader->baseHash, shader->auxHash, shaderSrc, true, shader->isCustomShader);
if (shader->shader == nullptr)
shader->hasError = true;
// after renderer shader creation we can throw away any intermediate info
LatteShader_CleanupAfterCompile(shader);
}
void LatteShader_FinishCompilation(LatteDecompilerShader* shader)
{
if (shader->hasError)
{
forceLogDebug_printf("LatteShader_finishCompilation(): Skipped because of error in shader %llx", shader->baseHash);
return;
}
shader->shader->WaitForCompiled();
LatteShader_prepareSeparableUniforms(shader);
LatteShader_CleanupAfterCompile(shader);
}
void LatteSHRC_RegisterShader(LatteDecompilerShader* shader, uint64 baseHash, uint64 auxHash)
{
auto& cache = LatteSHRC_GetCacheByType(shader->shaderType);
shader->baseHash = baseHash;
shader->auxHash = auxHash;
auto it = cache.find(baseHash);
if (it == cache.end())
{
shader->next = nullptr;
cache.emplace(shader->baseHash, shader);
}
else
{
shader->next = it->second->next;
it->second->next = shader;
}
}
LatteDecompilerShader* LatteSHRC_GetFromChain(LatteDecompilerShader* baseShader, uint64 baseHash, uint64 auxHash)
{
while (baseShader && baseShader->auxHash != auxHash)
baseShader = baseShader->next;
return baseShader;
}
LatteDecompilerShader* LatteSHRC_Get(SHRC_CACHE_TYPE& cache, uint64 baseHash, uint64 auxHash)
{
auto it = cache.find(baseHash);
if (it == cache.end())
return nullptr;
LatteDecompilerShader* baseShader = it->second;
if (!baseShader)
return nullptr;
while (baseShader && baseShader->auxHash != auxHash)
baseShader = baseShader->next;
return baseShader;
}
LatteDecompilerShader* LatteSHRC_FindVertexShader(uint64 baseHash, uint64 auxHash)
{
return LatteSHRC_Get(sVertexShaders, baseHash, auxHash);
}
LatteDecompilerShader* LatteSHRC_FindGeometryShader(uint64 baseHash, uint64 auxHash)
{
return LatteSHRC_Get(sGeometryShaders, baseHash, auxHash);
}
LatteDecompilerShader* LatteSHRC_FindPixelShader(uint64 baseHash, uint64 auxHash)
{
return LatteSHRC_Get(sPixelShaders, baseHash, auxHash);
}
// update the currently active fetch shader
void LatteShaderSHRC_UpdateFetchShader()
{
_activeFetchShader = LatteFetchShader::FindByGPUState();
}
void LatteShader_CleanupAfterCompile(LatteDecompilerShader* shader)
{
if (shader->strBuf_shaderSource)
{
delete shader->strBuf_shaderSource;
shader->strBuf_shaderSource = nullptr;
}
}
void LatteShader_DumpShader(uint64 baseHash, uint64 auxHash, LatteDecompilerShader* shader)
{
if (!ActiveSettings::DumpShadersEnabled())
return;
const char* suffix = "";
if (shader->shaderType == LatteConst::ShaderType::Vertex)
suffix = "vs";
else if (shader->shaderType == LatteConst::ShaderType::Geometry)
suffix = "gs";
else if (shader->shaderType == LatteConst::ShaderType::Pixel)
suffix = "ps";
fs::path dumpPath = "dump/shaders";
dumpPath /= fmt::format("{:016x}_{:016x}_{}.txt", baseHash, auxHash, suffix);
FileStream* fs = FileStream::createFile2(dumpPath);
if (fs)
{
if (shader->strBuf_shaderSource)
fs->writeData(shader->strBuf_shaderSource->c_str(), shader->strBuf_shaderSource->getLen());
delete fs;
}
}
void LatteShader_DumpRawShader(uint64 baseHash, uint64 auxHash, uint32 type, uint8* programCode, uint32 programLen)
{
if (!ActiveSettings::DumpShadersEnabled())
return;
const char* suffix = "";
if (type == SHADER_DUMP_TYPE_FETCH)
suffix = "fs";
else if (type == SHADER_DUMP_TYPE_VERTEX)
suffix = "vs";
else if (type == SHADER_DUMP_TYPE_GEOMETRY)
suffix = "gs";
else if (type == SHADER_DUMP_TYPE_PIXEL)
suffix = "ps";
else if (type == SHADER_DUMP_TYPE_COPY)
suffix = "copy";
else if (type == SHADER_DUMP_TYPE_COMPUTE)
suffix = "compute";
fs::path dumpPath = "dump/shaders";
dumpPath /= fmt::format("{:016x}_{:016x}_{}.bin", baseHash, auxHash, suffix);
FileStream* fs = FileStream::createFile2(dumpPath);
if (fs)
{
fs->writeData(programCode, programLen);
delete fs;
}
}
void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader)
{
uint32* vsProgramCode = (uint32*)vertexShaderPtr;
// update hash from vertex shader data
uint64 vsHash1 = 0;
uint64 vsHash2 = 0;
_calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2);
uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL);
uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F;
vsHash += tmp;
auto primitiveType = LatteGPUState.contextNew.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE();
if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS)
{
vsHash += 13ULL;
}
else if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS)
{
// required for Vulkan since we have to write the pointsize in the shader
vsHash += 71ULL;
}
vsHash += (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] ? 21 : 0);
// halfZ
if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF())
vsHash += 0x1537;
_shaderBaseHash_vs = vsHash;
}
void LatteSHRC_UpdateGSBaseHash(uint8* geometryShaderPtr, uint32 geometryShaderSize, uint8* geometryCopyShader, uint32 geometryCopyShaderSize)
{
// update hash from geometry shader data
uint64 gsHash1 = 0;
uint64 gsHash2 = 0;
_calculateShaderProgramHash((uint32*)geometryShaderPtr, geometryShaderSize, &hashCacheVS, &gsHash1, &gsHash2);
// get geometry shader
uint64 gsHash = gsHash1 + gsHash2;
gsHash += (uint64)_activeVertexShader->ringParameterCount;
gsHash += (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] ? 21 : 0);
_shaderBaseHash_gs = gsHash;
}
void LatteSHRC_UpdatePSBaseHash(uint8* pixelShaderPtr, uint32 pixelShaderSize, bool usesGeometryShader)
{
uint32* psProgramCode = (uint32*)pixelShaderPtr;
// update hash from pixel shader data
uint64 psHash1 = 0;
uint64 psHash2 = 0;
_calculateShaderProgramHash(psProgramCode, pixelShaderSize, &hashCachePS, &psHash1, &psHash2);
// get vertex shader
uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL);
_shaderBaseHash_ps = psHash;
}
uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* contextRegisters)
{
// todo - include texture types in aux hash similar to how it is already done in pixel shader
// or maybe there is a way to figure out the proper texture types?
uint64 auxHash = 0;
if(vertexShader->hasStreamoutBufferWrite)
{
// hash stride for streamout buffers
for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
{
if(!vertexShader->streamoutBufferWriteMask2[i])
continue;
uint32 bufferStride = contextRegisters[mmVGT_STRMOUT_VTX_STRIDE_0 + i * 4];
auxHash = std::rotl<uint64>(auxHash, 7);
auxHash += (uint64)bufferStride;
}
}
// textures can affect the shader. Either by their type (2D, 3D, cubemap) or by their format (float vs integer)
uint64 auxHashTex = 0;
for (uint8 i = 0; i < vertexShader->textureUnitListCount; i++)
{
uint8 t = vertexShader->textureUnitList[i];
uint32 word4 = contextRegisters[Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_VS + t * 7 + 4];
if ((word4 & 0x300) == 0x100)
{
// integer format
auxHashTex = std::rotl<uint64>(auxHashTex, 7);
auxHashTex += 0x333;
}
}
return auxHash + auxHashTex;
}
uint64 LatteSHRC_CalcGSAuxHash(LatteDecompilerShader* geometryShader)
{
// todo - include texture types in aux hash similar to how it is already done in pixel shader
return 0;
}
uint64 LatteSHRC_CalcPSAuxHash(LatteDecompilerShader* pixelShader, uint32* contextRegisters)
{
uint64 auxHash = 0;
// CB_SHADER_MASK can remap pixel shader outputs
auxHash = (auxHash >> 3) | (auxHash << 61);
auxHash += (uint64)contextRegisters[mmCB_SHADER_MASK];
// alpha test
uint8 alphaTestFunc = contextRegisters[Latte::REGADDR::SX_ALPHA_TEST_CONTROL] & 0x7;
uint8 alphaTestEnable = (contextRegisters[Latte::REGADDR::SX_ALPHA_TEST_CONTROL] >> 3) & 1;
if (alphaTestEnable)
{
auxHash += (uint64)alphaTestFunc;
auxHash = (auxHash >> 3) | (auxHash << 61);
auxHash += 1;
}
// texture types (2D, 3D, cubemap etc.) affect the shader too
for (uint8 i = 0; i < pixelShader->textureUnitListCount; i++)
{
uint8 t = pixelShader->textureUnitList[i];
uint32 word0 = contextRegisters[Latte::REGADDR::SQ_TEX_RESOURCE_WORD0_N_PS + t * 7 + 0];
uint32 dim = (word0 & 7);
auxHash = (auxHash << 3) | (auxHash >> 61);
auxHash += (uint64)dim;
}
return auxHash;
}
LatteDecompilerShader* LatteShader_CreateShaderFromDecompilerOutput(LatteDecompilerOutput_t& decompilerOutput, uint64 baseHash, bool calculateAuxHash, uint64 optionalAuxHash, uint32* contextRegister)
{
LatteDecompilerShader* shader = decompilerOutput.shader;
shader->baseHash = baseHash;
// copy resource mapping
if(g_renderer->GetType() == RendererAPI::Vulkan)
shader->resourceMapping = decompilerOutput.resourceMappingVK;
else
shader->resourceMapping = decompilerOutput.resourceMappingGL;
// copy texture info
shader->textureUnitMask2 = decompilerOutput.textureUnitMask;
// copy streamout info
shader->streamoutBufferWriteMask2 = decompilerOutput.streamoutBufferWriteMask;
shader->hasStreamoutBufferWrite = decompilerOutput.streamoutBufferWriteMask.any();
// copy uniform offsets
// for OpenGL these are retrieved in _prepareSeparableUniforms()
if (g_renderer->GetType() == RendererAPI::Vulkan)
{
shader->uniform.loc_remapped = decompilerOutput.uniformOffsetsVK.offset_remapped;
shader->uniform.loc_uniformRegister = decompilerOutput.uniformOffsetsVK.offset_uniformRegister;
shader->uniform.count_uniformRegister = decompilerOutput.uniformOffsetsVK.count_uniformRegister;
shader->uniform.loc_windowSpaceToClipSpaceTransform = decompilerOutput.uniformOffsetsVK.offset_windowSpaceToClipSpaceTransform;
shader->uniform.loc_alphaTestRef = decompilerOutput.uniformOffsetsVK.offset_alphaTestRef;
shader->uniform.loc_pointSize = decompilerOutput.uniformOffsetsVK.offset_pointSize;
shader->uniform.loc_fragCoordScale = decompilerOutput.uniformOffsetsVK.offset_fragCoordScale;
for (sint32 t = 0; t < LATTE_NUM_MAX_TEX_UNITS; t++)
{
if (decompilerOutput.uniformOffsetsVK.offset_texScale[t] >= 0)
{
LatteUniformTextureScaleEntry_t entry = { 0 };
entry.texUnit = t;
entry.uniformLocation = decompilerOutput.uniformOffsetsVK.offset_texScale[t];
shader->uniform.list_ufTexRescale.push_back(entry);
}
}
shader->uniform.loc_verticesPerInstance = decompilerOutput.uniformOffsetsVK.offset_verticesPerInstance;
for (sint32 t = 0; t < LATTE_NUM_STREAMOUT_BUFFER; t++)
shader->uniform.loc_streamoutBufferBase[t] = decompilerOutput.uniformOffsetsVK.offset_streamoutBufferBase[t];
shader->uniform.uniformRangeSize = decompilerOutput.uniformOffsetsVK.offset_endOfBlock;
}
else
{
shader->uniform.count_uniformRegister = decompilerOutput.uniformOffsetsVK.count_uniformRegister;
}
// calculate aux hash
if (calculateAuxHash)
{
if (decompilerOutput.shaderType == LatteConst::ShaderType::Vertex)
{
uint64 vsAuxHash = LatteSHRC_CalcVSAuxHash(shader, contextRegister);
shader->auxHash = vsAuxHash;
}
else if (decompilerOutput.shaderType == LatteConst::ShaderType::Geometry)
{
uint64 gsAuxHash = LatteSHRC_CalcGSAuxHash(shader);
shader->auxHash = gsAuxHash;
}
else if (decompilerOutput.shaderType == LatteConst::ShaderType::Pixel)
{
uint64 psAuxHash = LatteSHRC_CalcPSAuxHash(shader, contextRegister);
shader->auxHash = psAuxHash;
}
else
cemu_assert_debug(false);
}
else
{
shader->auxHash = optionalAuxHash;
}
return shader;
}
#include "Cafe/HW/Latte/Transcompiler/LatteTC.h"
#include "Cafe/HW/Latte/ShaderInfo/ShaderInfo.h"
LatteDecompilerShader* LatteShader_compileSeparableVertexShader(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader)
{
/* Analyze shader to gather general information about inputs/outputs */
Latte::ShaderDescription shaderDescription;
if (!shaderDescription.analyzeShaderCode(vertexShaderPtr, vertexShaderSize, LatteConst::ShaderType::Vertex))
{
assert_dbg();
return nullptr;
}
/* Create context dependent IO info for this shader */
//Latte::ShaderInstanceInfo
assert_dbg();
// todo - Use ShaderInstanceInfo when generating the GLSL (GLSL::Emit() should take a 'GLSLInfoSource' class which has a bunch of virtual methods for retrieving uniform names etc. We then override this class and plug in logic using ShaderInstanceInfo
/* Translate R600Plus to GLSL */
ZpIR::DebugPrinter irDebugPrinter;
LatteTCGenIR genIR;
genIR.setVertexShaderContext(fetchShader, LatteGPUState.contextRegister + mmSQ_VTX_SEMANTIC_0);
auto irObj = genIR.transcompileLatteToIR(vertexShaderPtr, vertexShaderSize, LatteTCGenIR::VERTEX);
// debug output (before register allocation)
irDebugPrinter.setShowPhysicalRegisters(false);
irDebugPrinter.debugPrint(irObj);
// register allocation
ZirPass::RegisterAllocatorForGLSL ra(irObj);
ra.applyPass();
// debug output (after register allocation)
irDebugPrinter.setShowPhysicalRegisters(true);
irDebugPrinter.setPhysicalRegisterNameSource(ZirPass::RegisterAllocatorForGLSL::DebugPrintHelper_getPhysRegisterName);
irDebugPrinter.debugPrint(irObj);
// gen GLSL
StringBuf glslSourceBuffer(64 * 1024);
// emit GLSL header
assert_dbg(); // todo
// emit main
ZirEmitter::GLSL emitter;
emitter.Emit(irObj, &glslSourceBuffer);
// debug copy to string
std::string dbg;
dbg.insert(0, glslSourceBuffer.c_str(), glslSourceBuffer.getLen());
assert_dbg();
return nullptr;
}
// compile new vertex shader (relies partially on current state)
LatteDecompilerShader* LatteShader_CompileSeparableVertexShader(uint64 baseHash, uint64& vsAuxHash, uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader, LatteFetchShader* fetchShader)
{
// new decompiler
//LatteShader_compileSeparableVertexShader(baseHash, vsAuxHash, vertexShaderPtr, vertexShaderSize, usesGeometryShader, fetchShader);
// legacy decompiler
LatteDecompilerOutput_t decompilerOutput{};
LatteFetchShader* fetchShaderList[1];
fetchShaderList[0] = fetchShader;
LatteDecompiler_DecompileVertexShader(_shaderBaseHash_vs, LatteGPUState.contextRegister, vertexShaderPtr, vertexShaderSize, fetchShaderList, 1, LatteGPUState.contextNew.GetSpecialStateValues(), usesGeometryShader, &decompilerOutput);
LatteDecompilerShader* vertexShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
vsAuxHash = vertexShader->auxHash;
if (vertexShader->hasError == false)
{
uint8* fsProgramCode = (uint8*)memory_getPointerFromPhysicalOffset(LatteGPUState.contextRegister[mmSQ_PGM_START_FS + 0] << 8);
uint32 fsProgramSize = LatteGPUState.contextRegister[mmSQ_PGM_START_FS + 1] << 3;
LatteShaderCache_writeSeparableVertexShader(vertexShader->baseHash, vertexShader->auxHash, fsProgramCode, fsProgramSize, vertexShaderPtr, vertexShaderSize, LatteGPUState.contextRegister, usesGeometryShader);
}
LatteShader_DumpShader(vertexShader->baseHash, vertexShader->auxHash, vertexShader);
LatteShader_DumpRawShader(vertexShader->baseHash, vertexShader->auxHash, SHADER_DUMP_TYPE_VERTEX, vertexShaderPtr, vertexShaderSize);
LatteShader_CreateRendererShader(vertexShader, false);
performanceMonitor.numCompiledVS++;
if (g_renderer->GetType() == RendererAPI::OpenGL)
{
if (vertexShader->shader)
vertexShader->shader->PreponeCompilation(true);
LatteShader_FinishCompilation(vertexShader);
}
LatteSHRC_RegisterShader(vertexShader, vertexShader->baseHash, vertexShader->auxHash);
return vertexShader;
}
LatteDecompilerShader* LatteShader_CompileSeparableGeometryShader(uint64 baseHash, uint8* geometryShaderPtr, uint32 geometryShaderSize, uint8* geometryCopyShader, uint32 geometryCopyShaderSize)
{
LatteDecompilerOutput_t decompilerOutput{};
LatteFetchShader* fetchShaderList[1];
fetchShaderList[0] = _activeFetchShader;
LatteDecompiler_DecompileGeometryShader(_shaderBaseHash_gs, LatteGPUState.contextRegister, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), _activeVertexShader->ringParameterCount, &decompilerOutput);
LatteDecompilerShader* geometryShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
if (geometryShader->hasError == false)
{
LatteShaderCache_writeSeparableGeometryShader(geometryShader->baseHash, geometryShader->auxHash, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize, LatteGPUState.contextRegister, LatteGPUState.contextNew.GetSpecialStateValues(), _activeVertexShader->ringParameterCount);
}
LatteShader_DumpShader(geometryShader->baseHash, geometryShader->auxHash, geometryShader);
LatteShader_DumpRawShader(geometryShader->baseHash, geometryShader->auxHash, SHADER_DUMP_TYPE_GEOMETRY, geometryShaderPtr, geometryShaderSize);
LatteShader_DumpRawShader(geometryShader->baseHash, geometryShader->auxHash, SHADER_DUMP_TYPE_COPY, geometryCopyShader, geometryCopyShaderSize);
LatteShader_CreateRendererShader(geometryShader, false);
performanceMonitor.numCompiledGS++;
if (g_renderer->GetType() == RendererAPI::OpenGL)
{
if (geometryShader->shader)
geometryShader->shader->PreponeCompilation(true);
LatteShader_FinishCompilation(geometryShader);
}
LatteSHRC_RegisterShader(geometryShader, geometryShader->baseHash, geometryShader->auxHash);
return geometryShader;
}
LatteDecompilerShader* LatteShader_CompileSeparablePixelShader(uint64 baseHash, uint64& psAuxHash, uint8* pixelShaderPtr, uint32 pixelShaderSize, bool usesGeometryShader)
{
LatteDecompilerOutput_t decompilerOutput{};
LatteDecompiler_DecompilePixelShader(baseHash, LatteGPUState.contextRegister, pixelShaderPtr, pixelShaderSize, LatteGPUState.contextNew.GetSpecialStateValues(), usesGeometryShader, &decompilerOutput);
LatteDecompilerShader* pixelShader = LatteShader_CreateShaderFromDecompilerOutput(decompilerOutput, baseHash, true, 0, LatteGPUState.contextRegister);
psAuxHash = pixelShader->auxHash;
LatteShader_DumpShader(_shaderBaseHash_ps, psAuxHash, pixelShader);
LatteShader_DumpRawShader(_shaderBaseHash_ps, psAuxHash, SHADER_DUMP_TYPE_PIXEL, pixelShaderPtr, pixelShaderSize);
LatteShader_CreateRendererShader(pixelShader, false);
performanceMonitor.numCompiledPS++;
if (pixelShader->hasError == false)
{
LatteShaderCache_writeSeparablePixelShader(_shaderBaseHash_ps, psAuxHash, pixelShaderPtr, pixelShaderSize, LatteGPUState.contextRegister, usesGeometryShader);
}
if (g_renderer->GetType() == RendererAPI::OpenGL)
{
if (pixelShader->shader)
pixelShader->shader->PreponeCompilation(true);
LatteShader_FinishCompilation(pixelShader);
}
LatteSHRC_RegisterShader(pixelShader, _shaderBaseHash_ps, psAuxHash);
return pixelShader;
}
void LatteSHRC_UpdateVertexShader(uint8* vertexShaderPtr, uint32 vertexShaderSize, bool usesGeometryShader)
{
LatteSHRC_UpdateVSBaseHash(vertexShaderPtr, vertexShaderSize, usesGeometryShader);
uint64 vsAuxHash = 0;
auto itBaseShader = sVertexShaders.find(_shaderBaseHash_vs);
LatteDecompilerShader* vertexShader = nullptr;
if (itBaseShader != sVertexShaders.end())
{
vsAuxHash = LatteSHRC_CalcVSAuxHash(itBaseShader->second, LatteGPUState.contextRegister);
vertexShader = LatteSHRC_GetFromChain(itBaseShader->second, _shaderBaseHash_vs, vsAuxHash);
}
if (!vertexShader)
vertexShader = LatteShader_CompileSeparableVertexShader(_shaderBaseHash_vs, vsAuxHash, vertexShaderPtr, vertexShaderSize, usesGeometryShader, _activeFetchShader);
if (vertexShader->hasError)
{
LatteGPUState.activeShaderHasError = true;
return;
}
g_renderer->shader_bind(vertexShader->shader);
_activeVertexShader = vertexShader;
}
void LatteSHRC_UpdateGeometryShader(bool usesGeometryShader, uint8* geometryShaderPtr, uint32 geometryShaderSize, uint8* geometryCopyShader, uint32 geometryCopyShaderSize)
{
if (usesGeometryShader == false || _activeVertexShader == nullptr)
{
g_renderer->shader_unbind(RendererShader::ShaderType::kGeometry);
_shaderBaseHash_gs = 0;
_activeGeometryShader = nullptr;
return;
}
LatteSHRC_UpdateGSBaseHash(geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize);
auto itBaseShader = sGeometryShaders.find(_shaderBaseHash_gs);
LatteDecompilerShader* geometryShader;
if (itBaseShader != sGeometryShaders.end())
{
// geometry shader already known
geometryShader = itBaseShader->second;
cemu_assert_debug(LatteSHRC_CalcGSAuxHash(geometryShader) == 0);
}
else
{
// decompile geometry shader
geometryShader = LatteShader_CompileSeparableGeometryShader(_shaderBaseHash_gs, geometryShaderPtr, geometryShaderSize, geometryCopyShader, geometryCopyShaderSize);
}
if (geometryShader->hasError)
{
LatteGPUState.activeShaderHasError = true;
return;
}
g_renderer->shader_bind(geometryShader->shader);
_activeGeometryShader = geometryShader;
}
void LatteSHRC_UpdatePixelShader(uint8* pixelShaderPtr, uint32 pixelShaderSize, bool usesGeometryShader)
{
if (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] != 0 && g_renderer->GetType() == RendererAPI::OpenGL)
{
if (_activePixelShader)
{
g_renderer->shader_unbind(RendererShader::ShaderType::kFragment);
_activePixelShader = nullptr;
}
return;
}
LatteSHRC_UpdatePSBaseHash(pixelShaderPtr, pixelShaderSize, usesGeometryShader);
uint64 psAuxHash = 0;
auto itBaseShader = sPixelShaders.find(_shaderBaseHash_ps);
LatteDecompilerShader* pixelShader = nullptr;
if (itBaseShader != sPixelShaders.end())
{
psAuxHash = LatteSHRC_CalcPSAuxHash(itBaseShader->second, LatteGPUState.contextRegister);
pixelShader = LatteSHRC_GetFromChain(itBaseShader->second, _shaderBaseHash_ps, psAuxHash);
}
if (!pixelShader)
pixelShader = LatteShader_CompileSeparablePixelShader(_shaderBaseHash_ps, psAuxHash, pixelShaderPtr, pixelShaderSize, usesGeometryShader);
if (pixelShader->hasError)
{
LatteGPUState.activeShaderHasError = true;
return;
}
g_renderer->shader_bind(pixelShader->shader);
_activePixelShader = pixelShader;
}
void LatteSHRC_UpdateActiveShaders()
{
// check if geometry shader is used
auto gsMode = LatteGPUState.contextNew.VGT_GS_MODE.get_MODE();
cemu_assert_debug(LatteGPUState.contextNew.VGT_GS_MODE.get_ES_PASSTHRU() == false);
// todo: Support for ES passthrough and cut mode in mmVGT_GS_MODE
bool geometryShaderUsed = false;
if (gsMode == Latte::LATTE_VGT_GS_MODE::E_MODE::OFF)
{
geometryShaderUsed = false;
}
else if (gsMode == Latte::LATTE_VGT_GS_MODE::E_MODE::SCENARIO_G)
{
// could also be compute shader?
geometryShaderUsed = true;
}
else
{
cemu_assert_debug(false);
}
// get shader programs
uint8* psProgramCode = (uint8*)memory_getPointerFromPhysicalOffset((LatteGPUState.contextRegister[mmSQ_PGM_START_PS] & 0xFFFFFF) << 8);
uint32 psProgramSize = LatteGPUState.contextRegister[mmSQ_PGM_START_PS + 1] << 3;
uint8* gsProgramCode = (uint8*)memory_getPointerFromPhysicalOffset((LatteGPUState.contextRegister[mmSQ_PGM_START_GS] & 0xFFFFFF) << 8);
uint32 gsProgramSize = LatteGPUState.contextRegister[mmSQ_PGM_START_GS + 1] << 3;
uint8* vsProgramCode;
uint32 vsProgramSize;
uint8* copyProgramCode = NULL;
uint32 copyProgramSize = 0;
if (geometryShaderUsed)
{
vsProgramCode = (uint8*)memory_getPointerFromPhysicalOffset((LatteGPUState.contextRegister[mmSQ_PGM_START_ES] & 0xFFFFFF) << 8);
vsProgramSize = LatteGPUState.contextRegister[mmSQ_PGM_START_ES + 1] << 3;
copyProgramCode = (uint8*)memory_getPointerFromPhysicalOffset((LatteGPUState.contextRegister[mmSQ_PGM_START_VS] & 0xFFFFFF) << 8);
if (LatteGPUState.contextRegister[mmSQ_PGM_START_VS] == 0)
{
copyProgramCode = NULL;
debug_printf("copyProgram is NULL but used. Might be because of unsupported vertex/geometry mode?");
}
copyProgramSize = LatteGPUState.contextRegister[mmSQ_PGM_START_VS + 1] << 3;
}
else
{
if (LatteGPUState.contextRegister[mmSQ_PGM_START_VS] == 0)
{
debug_printf("No vertex shader program set\n");
LatteGPUState.activeShaderHasError = true;
return;
}
vsProgramCode = (uint8*)memory_getPointerFromPhysicalOffset((LatteGPUState.contextRegister[mmSQ_PGM_START_VS] & 0xFFFFFF) << 8);
vsProgramSize = LatteGPUState.contextRegister[mmSQ_PGM_START_VS + 1] << 3;
}
// set new shaders
LatteGPUState.activeShaderHasError = false;
LatteShader_UpdatePSInputs(LatteGPUState.contextRegister);
LatteShaderSHRC_UpdateFetchShader();
LatteSHRC_UpdateVertexShader(vsProgramCode, vsProgramSize, geometryShaderUsed);
if (LatteGPUState.activeShaderHasError)
return;
LatteSHRC_UpdateGeometryShader(geometryShaderUsed, gsProgramCode, gsProgramSize, copyProgramCode, copyProgramSize);
if (LatteGPUState.activeShaderHasError)
return;
LatteSHRC_UpdatePixelShader(psProgramCode, psProgramSize, geometryShaderUsed);
if (LatteGPUState.activeShaderHasError)
return;
}
// returns the sampler base index for the given shader type
sint32 LatteDecompiler_getTextureSamplerBaseIndex(LatteConst::ShaderType shaderType)
{
uint32 samplerId = LATTE_DECOMPILER_SAMPLER_NONE;
if (shaderType == LatteConst::ShaderType::Vertex)
return Latte::SAMPLER_BASE_INDEX_VERTEX;
else if (shaderType == LatteConst::ShaderType::Pixel)
return Latte::SAMPLER_BASE_INDEX_PIXEL;
else if (shaderType == LatteConst::ShaderType::Geometry)
return Latte::SAMPLER_BASE_INDEX_GEOMETRY;
else
cemu_assert_suspicious();
return 0;
}
void LatteSHRC_Init()
{
cemu_assert_debug(sVertexShaders.empty());
cemu_assert_debug(sGeometryShaders.empty());
cemu_assert_debug(sPixelShaders.empty());
}