#include "Cafe/HW/Latte/Core/LatteConst.h" #include "Cafe/HW/Latte/Core/LatteShaderAssembly.h" #include "Cafe/HW/Latte/ISA/RegDefines.h" #include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency #include "Cafe/HW/Latte/Core/Latte.h" #include "Cafe/HW/Latte/Core/LatteDraw.h" #include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h" #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h" #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "config/ActiveSettings.h" #include "util/helpers/StringBuf.h" #include #include #define _CRLF "\r\n" void LatteDecompiler_emitAttributeDecodeGLSL(LatteDecompilerShader* shaderContext, StringBuf* src, LatteParsedFetchShaderAttribute_t* attrib); /* * Variable names: * R0-R127 temp * Most variables are multi-typed and the respective type is appended to the name * Type suffixes are: f (float), i (32bit int), ui (unsigned 32bit int) * Examples: R13ui.x, tempf.z */ // local prototypes void _emitTypeConversionPrefix(LatteDecompilerShaderContext* shaderContext, sint32 sourceType, sint32 destinationType); void _emitTypeConversionSuffix(LatteDecompilerShaderContext* shaderContext, sint32 sourceType, sint32 destinationType); void LatteDecompiler_emitClauseCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, bool isSubroutine); const char* _getShaderUniformBlockInterfaceName(LatteConst::ShaderType mode) { switch (mode) { case LatteConst::ShaderType::Vertex: return "uniformBlockVS"; case LatteConst::ShaderType::Pixel: return "uniformBlockPS"; case LatteConst::ShaderType::Geometry: return "uniformBlockGS"; default: break; } cemu_assert_unimplemented(); return nullptr; } const char* _getShaderUniformBlockVariableName(LatteConst::ShaderType mode) { switch (mode) { case LatteConst::ShaderType::Vertex: return "uf_blockVS"; case LatteConst::ShaderType::Pixel: return "uf_blockPS"; case LatteConst::ShaderType::Geometry: return "uf_blockGS"; default: break; } cemu_assert_unimplemented(); return nullptr; } const char* _getTextureUnitVariablePrefixName(LatteConst::ShaderType mode) { switch (mode) { case LatteConst::ShaderType::Vertex: return "textureUnitVS"; case LatteConst::ShaderType::Pixel: return "textureUnitPS"; case LatteConst::ShaderType::Geometry: return "textureUnitGS"; } cemu_assert_unimplemented(); return nullptr; } const char* _getElementStrByIndex(uint32 channel) { switch (channel) { case 0: return "x"; case 1: return "y"; case 2: return "z"; case 3: return "w"; } return "UNDEFINED"; } char _tempGenString[64][256]; uint32 _tempGenStringIndex = 0; char* _getTempString() { char* str = _tempGenString[_tempGenStringIndex]; _tempGenStringIndex = (_tempGenStringIndex+1)%64; return str; } static char* _getActiveMaskVarName(LatteDecompilerShaderContext* shaderContext, sint32 index) { char* varName = _getTempString(); if (shaderContext->isSubroutine) sprintf(varName, "activeMaskStackSub%04x[%d]", shaderContext->subroutineInfo->cfAddr, index); else sprintf(varName, "activeMaskStack[%d]", index); return varName; } static char* _getActiveMaskCVarName(LatteDecompilerShaderContext* shaderContext, sint32 index) { char* varName = _getTempString(); if (shaderContext->isSubroutine) sprintf(varName, "activeMaskStackCSub%04x[%d]", shaderContext->subroutineInfo->cfAddr, index); else sprintf(varName, "activeMaskStackC[%d]", index); return varName; } static char* _getRegisterVarName(LatteDecompilerShaderContext* shaderContext, uint32 index, sint32 destRelIndexMode=-1) { auto type = shaderContext->typeTracker.defaultDataType; char* tempStr = _getTempString(); if (shaderContext->typeTracker.useArrayGPRs == false) { if (type == LATTE_DECOMPILER_DTYPE_SIGNED_INT) sprintf(tempStr, "R%di", index); else if (type == LATTE_DECOMPILER_DTYPE_FLOAT) sprintf(tempStr, "R%df", index); } else { char destRelOffset[32]; if (destRelIndexMode >= 0) { if (destRelIndexMode == GPU7_INDEX_AR_X) strcpy(destRelOffset, "ARi.x"); else if (destRelIndexMode == GPU7_INDEX_AR_Y) strcpy(destRelOffset, "ARi.y"); else if (destRelIndexMode == GPU7_INDEX_AR_Z) strcpy(destRelOffset, "ARi.z"); else if (destRelIndexMode == GPU7_INDEX_AR_W) strcpy(destRelOffset, "ARi.w"); else debugBreakpoint(); if (type == LATTE_DECOMPILER_DTYPE_SIGNED_INT) { sprintf(tempStr, "Ri[%d+%s]", index, destRelOffset); } else if (type == LATTE_DECOMPILER_DTYPE_FLOAT) { sprintf(tempStr, "Rf[%d+%s]", index, destRelOffset); } } else { if (type == LATTE_DECOMPILER_DTYPE_SIGNED_INT) { sprintf(tempStr, "Ri[%d]", index); } else if (type == LATTE_DECOMPILER_DTYPE_FLOAT) { sprintf(tempStr, "Rf[%d]", index); } } } return tempStr; } static void _appendRegisterTypeSuffix(StringBuf* src, sint32 dataType) { if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->add("i"); else if (dataType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT) src->add("ui"); else if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->add("f"); else cemu_assert_unimplemented(); } // appends x/y/z/w static void _appendChannel(StringBuf* src, sint32 channelIndex) { cemu_assert_debug(channelIndex >= 0 && channelIndex <= 3); switch (channelIndex) { case 0: src->add("x"); return; case 1: src->add("y"); return; case 2: src->add("z"); return; case 3: src->add("w"); return; } } // appends .x/.y/.z/.w static void _appendChannelAccess(StringBuf* src, sint32 channelIndex) { cemu_assert_debug(channelIndex >= 0 && channelIndex <= 3); switch (channelIndex) { case 0: src->add(".x"); return; case 1: src->add(".y"); return; case 2: src->add(".z"); return; case 3: src->add(".w"); return; } } static void _appendPVPS(LatteDecompilerShaderContext* shaderContext, StringBuf* src, uint32 groupIndex, uint8 aluUnit) { cemu_assert_debug(aluUnit < 5); if (aluUnit == 4) { src->addFmt("PS{}", (groupIndex & 1)); _appendRegisterTypeSuffix(src, shaderContext->typeTracker.defaultDataType); return; } src->addFmt("PV{}", (groupIndex & 1)); _appendRegisterTypeSuffix(src, shaderContext->typeTracker.defaultDataType); _appendChannel(src, aluUnit); } std::string _FormatFloatAsGLSLConstant(float f) { char floatAsStr[64]; size_t floatAsStrLen = fmt::format_to_n(floatAsStr, 64, "{:#}", f).size; size_t floatAsStrLenOrg = floatAsStrLen; if(floatAsStrLen > 0 && floatAsStr[floatAsStrLen-1] == '.') { floatAsStr[floatAsStrLen] = '0'; floatAsStrLen++; } cemu_assert(floatAsStrLen < 50); // constant suspiciously long? floatAsStr[floatAsStrLen] = '\0'; cemu_assert_debug(floatAsStrLen >= 3); // shortest possible form is "0.0" return floatAsStr; } // tracks PV/PS and register backups struct ALUClauseTemporariesState { struct PVPSAlias { enum class LOCATION_TYPE : uint8 { LOCATION_NONE, LOCATION_GPR, LOCATION_PVPS, }; LOCATION_TYPE location{ LOCATION_TYPE::LOCATION_NONE }; uint8 index; // GPR index or temporary index uint8 aluUnit; // x,y,z,w (or 5 for PS) void SetLocationGPR(uint8 gprIndex, uint8 channel) { cemu_assert_debug(channel < 4); this->location = LOCATION_TYPE::LOCATION_GPR; this->index = gprIndex; this->aluUnit = channel; } void SetLocationPSPVTemporary(uint8 aluUnit, uint32 groupIndex) { cemu_assert_debug(aluUnit < 5); this->location = LOCATION_TYPE::LOCATION_PVPS; this->index = groupIndex & 1; this->aluUnit = aluUnit; } }; struct GPRTemporary { GPRTemporary(uint8 gprIndex, uint8 channel, uint8 backupVarIndex) : gprIndex(gprIndex), channel(channel), backupVarIndex(backupVarIndex) {} uint8 gprIndex; uint8 channel; uint8 backupVarIndex; }; void TrackGroupOutputPVPS(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstr, size_t numInstr) { // unset current for (auto& it : m_pvps) it.location = PVPSAlias::LOCATION_TYPE::LOCATION_NONE; for (size_t i = 0; i < numInstr; i++) { LatteDecompilerALUInstruction& inst = aluInstr[i]; if (!inst.isOP3 && inst.opcode == ALU_OP2_INST_NOP) continue; // skip NOP instruction if (inst.writeMask == 0) { // map to temporary m_pvps[inst.aluUnit].SetLocationPSPVTemporary(inst.aluUnit, aluInstr->instructionGroupIndex); } else { // map to GPR if(inst.destRel == 0) // is PV/PS set for indexed writes? m_pvps[inst.aluUnit].SetLocationGPR(inst.destGpr, inst.destElem); } } } bool HasPVPS(uint8 aluUnitIndex) const { cemu_assert_debug(aluUnitIndex < 5); return m_pvps[aluUnitIndex].location != PVPSAlias::LOCATION_TYPE::LOCATION_NONE; } void EmitPVPSAccess(LatteDecompilerShaderContext* shaderContext, uint8 aluUnitIndex, uint32 currentGroupIndex) const { switch (m_pvps[aluUnitIndex].location) { case PVPSAlias::LOCATION_TYPE::LOCATION_GPR: { sint32 temporaryIndex = GetTemporaryForGPR(m_pvps[aluUnitIndex].index, m_pvps[aluUnitIndex].aluUnit); if (temporaryIndex < 0) { shaderContext->shaderSource->add(_getRegisterVarName(shaderContext, m_pvps[aluUnitIndex].index, -1)); _appendChannelAccess(shaderContext->shaderSource, m_pvps[aluUnitIndex].aluUnit); } else { // use temporary instead of GPR shaderContext->shaderSource->addFmt("backupReg{}", temporaryIndex); _appendRegisterTypeSuffix(shaderContext->shaderSource, shaderContext->typeTracker.defaultDataType); } break; } case PVPSAlias::LOCATION_TYPE::LOCATION_PVPS: _appendPVPS(shaderContext, shaderContext->shaderSource, currentGroupIndex-1, m_pvps[aluUnitIndex].aluUnit); break; default: cemuLog_log(LogType::Force, "Shader {:016x} accesses PV/PS without writing to it", shaderContext->shaderBaseHash); cemu_assert_suspicious(); break; } } /* * Check for GPR channels which are modified before they are read within the same group * These registers need to be copied to a temporary */ void CreateGPRTemporaries(LatteDecompilerShaderContext* shaderContext, std::span aluInstructions) { uint8 registerChannelWriteMask[(LATTE_NUM_GPR * 4 + 7) / 8] = { 0 }; m_gprTemporaries.clear(); for (auto& aluInstruction : aluInstructions) { // ignore NOP instructions if (aluInstruction.isOP3 == false && aluInstruction.opcode == ALU_OP2_INST_NOP) continue; cemu_assert_debug(aluInstruction.destElem <= 3); // check if any previously written register is read for (sint32 f = 0; f < 3; f++) { uint32 readGPRIndex; uint32 readGPRChannel; if (GPU7_ALU_SRC_IS_GPR(aluInstruction.sourceOperand[f].sel)) { readGPRIndex = GPU7_ALU_SRC_GET_GPR_INDEX(aluInstruction.sourceOperand[f].sel); cemu_assert_debug(aluInstruction.sourceOperand[f].chan <= 3); readGPRChannel = aluInstruction.sourceOperand[f].chan; } else if (GPU7_ALU_SRC_IS_PV(aluInstruction.sourceOperand[f].sel) || GPU7_ALU_SRC_IS_PS(aluInstruction.sourceOperand[f].sel)) { uint8 aluUnitIndex = 0; if (GPU7_ALU_SRC_IS_PV(aluInstruction.sourceOperand[f].sel)) aluUnitIndex = aluInstruction.sourceOperand[f].chan; else aluUnitIndex = 4; // if aliased to a GPR, then consider it a GPR read if(m_pvps[aluUnitIndex].location != PVPSAlias::LOCATION_TYPE::LOCATION_GPR) continue; readGPRIndex = m_pvps[aluUnitIndex].index; readGPRChannel = m_pvps[aluUnitIndex].aluUnit; } else continue; // track GPR read if ((registerChannelWriteMask[(readGPRIndex * 4 + aluInstruction.sourceOperand[f].chan) / 8] & (1 << ((readGPRIndex * 4 + aluInstruction.sourceOperand[f].chan) % 8))) != 0) { // register is overwritten by previous instruction, a temporary variable is required if (GetTemporaryForGPR(readGPRIndex, readGPRChannel) < 0) m_gprTemporaries.emplace_back(readGPRIndex, readGPRChannel, m_gprTemporaries.size()); } } // track write if (aluInstruction.writeMask != 0) registerChannelWriteMask[(aluInstruction.destGpr * 4 + aluInstruction.destElem) / 8] |= (1 << ((aluInstruction.destGpr * 4 + aluInstruction.destElem) % 8)); } // output code to move GPRs into temporaries StringBuf* src = shaderContext->shaderSource; for (auto& it : m_gprTemporaries) { src->addFmt("backupReg{}", it.backupVarIndex); _appendRegisterTypeSuffix(src, shaderContext->typeTracker.defaultDataType); src->add(" = "); src->add(_getRegisterVarName(shaderContext, it.gprIndex)); _appendChannelAccess(src, it.channel); src->add(";" _CRLF); } } // returns -1 if none present sint32 GetTemporaryForGPR(uint8 gprIndex, uint8 channel) const { for (auto& it : m_gprTemporaries) { if (it.gprIndex == gprIndex && it.channel == channel) return (sint32)it.backupVarIndex; } return -1; } private: PVPSAlias m_pvps[5]{}; boost::container::small_vector m_gprTemporaries; }; sint32 _getVertexShaderOutParamSemanticId(uint32* contextRegisters, sint32 index) // deprecated - move to LatteShaderPSInputTable { uint32 vsSemanticId = (contextRegisters[mmSPI_VS_OUT_ID_0 + (index / 4)] >> (8 * (index % 4))) & 0xFF; // check if export exists since exports are generated based on PS inputs LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); for (sint32 i = 0; i < psInputTable->count; i++) { if(psInputTable->import[i].semanticId == vsSemanticId) return vsSemanticId; } return 0xFF; } sint32 _getInputRegisterDataType(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex) { return shaderContext->typeTracker.defaultDataType; } sint32 _getALUInstructionOutputDataType(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction) { return shaderContext->typeTracker.defaultDataType; } // returns true if the ALU instruction is a OP2 reduction instruction bool _isReductionInstruction(LatteDecompilerALUInstruction* aluInstruction) { return aluInstruction->isOP3 == false && (aluInstruction->opcode == ALU_OP2_INST_DOT4 || aluInstruction->opcode == ALU_OP2_INST_DOT4_IEEE || aluInstruction->opcode == ALU_OP2_INST_CUBE); } /* * Writes the name of the output variable and channel * E.g. R5f.x or tempf.x if writeMask is 0 */ void _emitInstructionOutputVariableName(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction) { auto src = shaderContext->shaderSource; sint32 outputDataType = _getALUInstructionOutputDataType(shaderContext, aluInstruction); if( aluInstruction->writeMask == 0 ) { // does not output to GPR if( !_isReductionInstruction(aluInstruction) ) { // output to PV/PS _appendPVPS(shaderContext, src, aluInstruction->instructionGroupIndex, aluInstruction->aluUnit); return; } else { // output to temp src->add("temp"); _appendRegisterTypeSuffix(src, outputDataType); } _appendChannelAccess(src, aluInstruction->aluUnit); } else { // output to GPR. Aliasing to PV/PS happens at the end of the group src->add(_getRegisterVarName(shaderContext, aluInstruction->destGpr, aluInstruction->destRel==0?-1:aluInstruction->indexMode)); _appendChannelAccess(src, aluInstruction->destElem); } } void _emitInstructionPVPSOutputVariableName(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction) { _appendPVPS(shaderContext, shaderContext->shaderSource, aluInstruction->instructionGroupIndex, aluInstruction->aluUnit); } void _emitRegisterAccessCode(LatteDecompilerShaderContext* shaderContext, sint32 gprIndex, sint32 channel0, sint32 channel1, sint32 channel2, sint32 channel3, sint32 dataType = -1) { StringBuf* src = shaderContext->shaderSource; sint32 registerElementDataType = shaderContext->typeTracker.defaultDataType; cemu_assert_debug(gprIndex >= 0 && gprIndex <= 127); if (dataType >= 0) { _emitTypeConversionPrefix(shaderContext, registerElementDataType, dataType); } if (shaderContext->typeTracker.useArrayGPRs) src->add("R"); else src->addFmt("R{}", gprIndex); _appendRegisterTypeSuffix(src, registerElementDataType); if (shaderContext->typeTracker.useArrayGPRs) src->addFmt("[{}]", gprIndex); src->add("."); sint32 channelArray[4]; channelArray[0] = channel0; channelArray[1] = channel1; channelArray[2] = channel2; channelArray[3] = channel3; for (sint32 i = 0; i < 4; i++) { if (channelArray[i] >= 0 && channelArray[i] <= 3) src->add(_getElementStrByIndex(channelArray[i])); else if (channelArray[i] == -1) { // channel not used } else { cemu_assert_unimplemented(); } } if (dataType >= 0) _emitTypeConversionSuffix(shaderContext, registerElementDataType, dataType); } // optimized variant of _emitRegisterAccessCode for raw one channel reads void _emitRegisterChannelAccessCode(LatteDecompilerShaderContext* shaderContext, sint32 gprIndex, sint32 channel, sint32 dataType) { cemu_assert_debug(gprIndex >= 0 && gprIndex <= 127); cemu_assert_debug(channel >= 0 && channel < 4); StringBuf* src = shaderContext->shaderSource; sint32 registerElementDataType = shaderContext->typeTracker.defaultDataType; _emitTypeConversionPrefix(shaderContext, registerElementDataType, dataType); if (shaderContext->typeTracker.useArrayGPRs) src->add("R"); else src->addFmt("R{}", gprIndex); _appendRegisterTypeSuffix(src, registerElementDataType); if (shaderContext->typeTracker.useArrayGPRs) src->addFmt("[{}]", gprIndex); src->add("."); src->add(_getElementStrByIndex(channel)); _emitTypeConversionSuffix(shaderContext, registerElementDataType, dataType); } void _emitALURegisterInputAccessCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex) { StringBuf* src = shaderContext->shaderSource; sint32 currentRegisterElementType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex); cemu_assert_debug(GPU7_ALU_SRC_IS_GPR(aluInstruction->sourceOperand[operandIndex].sel)); sint32 gprIndex = GPU7_ALU_SRC_GET_GPR_INDEX(aluInstruction->sourceOperand[operandIndex].sel); sint32 temporaryIndex = shaderContext->aluPVPSState->GetTemporaryForGPR(gprIndex, aluInstruction->sourceOperand[operandIndex].chan); if(temporaryIndex >= 0) { // access via backup variable src->addFmt("backupReg{}", temporaryIndex); _appendRegisterTypeSuffix(src, currentRegisterElementType); } else { // access via register variable _emitRegisterAccessCode(shaderContext, gprIndex, aluInstruction->sourceOperand[operandIndex].chan, -1, -1, -1); } } void _emitPVPSAccessCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex, uint8 aluUnitIndex) { cemu_assert_debug(aluInstruction->instructionGroupIndex > 0); // PV/PS is uninitialized for group 0 // PV/PS vars are currently always using the default type (shaderContext->typeTracker.defaultDataType) shaderContext->aluPVPSState->EmitPVPSAccess(shaderContext, aluUnitIndex, aluInstruction->instructionGroupIndex); } /* * Emits the expression used for calculating the index for uniform access * For static access, this is a number * For dynamic access, this is AR.* + base */ void _emitUniformAccessIndexCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex) { StringBuf* src = shaderContext->shaderSource; bool isUniformRegister = GPU7_ALU_SRC_IS_CFILE(aluInstruction->sourceOperand[operandIndex].sel); sint32 uniformOffset = 0; // index into array, for relative accesses this is the base offset if( isUniformRegister ) { uniformOffset = GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction->sourceOperand[operandIndex].sel); } else { if( GPU7_ALU_SRC_IS_CBANK0(aluInstruction->sourceOperand[operandIndex].sel) ) { uniformOffset = GPU7_ALU_SRC_GET_CBANK0_INDEX(aluInstruction->sourceOperand[operandIndex].sel) + aluInstruction->cfInstruction->cBank0AddrBase; } else { uniformOffset = GPU7_ALU_SRC_GET_CBANK1_INDEX(aluInstruction->sourceOperand[operandIndex].sel) + aluInstruction->cfInstruction->cBank1AddrBase; } } if( aluInstruction->sourceOperand[operandIndex].rel != 0 ) { if (aluInstruction->indexMode == GPU7_INDEX_AR_X) src->addFmt("ARi.x+{}", uniformOffset); else if (aluInstruction->indexMode == GPU7_INDEX_AR_Y) src->addFmt("ARi.y+{}", uniformOffset); else if (aluInstruction->indexMode == GPU7_INDEX_AR_Z) src->addFmt("ARi.z+{}", uniformOffset); else if (aluInstruction->indexMode == GPU7_INDEX_AR_W) src->addFmt("ARi.w+{}", uniformOffset); else cemu_assert_unimplemented(); } else { src->addFmt("{}", uniformOffset); } } void _emitUniformAccessCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex, sint32 requiredType) { StringBuf* src = shaderContext->shaderSource; if(shaderContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_REMAPPED ) { // uniform registers or buffers are accessed statically with predictable offsets // find entry in remapped uniform if( aluInstruction->sourceOperand[operandIndex].rel != 0 ) debugBreakpoint(); bool isUniformRegister = GPU7_ALU_SRC_IS_CFILE(aluInstruction->sourceOperand[operandIndex].sel); sint32 uniformOffset = 0; // index into array sint32 uniformBufferIndex = 0; if( isUniformRegister ) { uniformOffset = GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction->sourceOperand[operandIndex].sel); uniformBufferIndex = 0; } else { if( GPU7_ALU_SRC_IS_CBANK0(aluInstruction->sourceOperand[operandIndex].sel) ) { uniformOffset = GPU7_ALU_SRC_GET_CBANK0_INDEX(aluInstruction->sourceOperand[operandIndex].sel) + aluInstruction->cfInstruction->cBank0AddrBase; uniformBufferIndex = aluInstruction->cfInstruction->cBank0Index; } else { uniformOffset = GPU7_ALU_SRC_GET_CBANK1_INDEX(aluInstruction->sourceOperand[operandIndex].sel) + aluInstruction->cfInstruction->cBank1AddrBase; uniformBufferIndex = aluInstruction->cfInstruction->cBank1Index; } } LatteDecompilerRemappedUniformEntry_t* remappedUniformEntry = NULL; for(size_t i=0; i< shaderContext->shader->list_remappedUniformEntries.size(); i++) { LatteDecompilerRemappedUniformEntry_t* remappedUniformEntryItr = shaderContext->shader->list_remappedUniformEntries.data() + i; if( remappedUniformEntryItr->isRegister && isUniformRegister ) { if( remappedUniformEntryItr->index == uniformOffset ) { remappedUniformEntry = remappedUniformEntryItr; break; } } else { if( remappedUniformEntryItr->kcacheBankId == uniformBufferIndex && remappedUniformEntryItr->index == uniformOffset ) { remappedUniformEntry = remappedUniformEntryItr; break; } } } cemu_assert_debug(remappedUniformEntry); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType); if(shaderContext->shader->shaderType == LatteConst::ShaderType::Vertex ) src->addFmt("uf_remappedVS[{}]", remappedUniformEntry->mappedIndex); else if(shaderContext->shader->shaderType == LatteConst::ShaderType::Pixel ) src->addFmt("uf_remappedPS[{}]", remappedUniformEntry->mappedIndex); else if(shaderContext->shader->shaderType == LatteConst::ShaderType::Geometry ) src->addFmt("uf_remappedGS[{}]", remappedUniformEntry->mappedIndex); else debugBreakpoint(); _appendChannelAccess(src, aluInstruction->sourceOperand[operandIndex].chan); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType); } else if( shaderContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE ) { // uniform registers are accessed with unpredictable (dynamic) offset _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType); if(shaderContext->shader->shaderType == LatteConst::ShaderType::Vertex ) src->add("uf_uniformRegisterVS["); else if (shaderContext->shader->shaderType == LatteConst::ShaderType::Pixel) src->add("uf_uniformRegisterPS["); else if(shaderContext->shader->shaderType == LatteConst::ShaderType::Geometry ) src->add("uf_uniformRegisterGS["); else debugBreakpoint(); _emitUniformAccessIndexCode(shaderContext, aluInstruction, operandIndex); src->add("]"); _appendChannelAccess(src, aluInstruction->sourceOperand[operandIndex].chan); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType); } else if( shaderContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK ) { // uniform buffers are available as a whole bool isUniformRegister = GPU7_ALU_SRC_IS_CFILE(aluInstruction->sourceOperand[operandIndex].sel); if( isUniformRegister ) debugBreakpoint(); sint32 uniformBufferIndex = 0; if( GPU7_ALU_SRC_IS_CBANK0(aluInstruction->sourceOperand[operandIndex].sel) ) { uniformBufferIndex = aluInstruction->cfInstruction->cBank0Index; } else { uniformBufferIndex = aluInstruction->cfInstruction->cBank1Index; } _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType); src->addFmt("{}{}[", _getShaderUniformBlockVariableName(shaderContext->shader->shaderType), uniformBufferIndex); _emitUniformAccessIndexCode(shaderContext, aluInstruction, operandIndex); src->addFmt("]"); _appendChannelAccess(src, aluInstruction->sourceOperand[operandIndex].chan); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType); } else debugBreakpoint(); } // Generates (slow) code to read an indexed GPR void _emitCodeToReadRelativeGPR(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex, sint32 requiredType) { StringBuf* src = shaderContext->shaderSource; uint32 gprBaseIndex = GPU7_ALU_SRC_GET_GPR_INDEX(aluInstruction->sourceOperand[operandIndex].sel); cemu_assert_debug(aluInstruction->sourceOperand[operandIndex].rel != 0); if( shaderContext->typeTracker.useArrayGPRs ) { _emitTypeConversionPrefix(shaderContext, shaderContext->typeTracker.defaultDataType, requiredType); src->add(_getRegisterVarName(shaderContext, gprBaseIndex, aluInstruction->indexMode)); _appendChannelAccess(src, aluInstruction->sourceOperand[operandIndex].chan); _emitTypeConversionSuffix(shaderContext, shaderContext->typeTracker.defaultDataType, requiredType); return; } char indexAccessCode[64]; if (aluInstruction->indexMode == GPU7_INDEX_AR_X) sprintf(indexAccessCode, "ARi.x"); else if (aluInstruction->indexMode == GPU7_INDEX_AR_Y) sprintf(indexAccessCode, "ARi.y"); else if (aluInstruction->indexMode == GPU7_INDEX_AR_Z) sprintf(indexAccessCode, "ARi.z"); else if (aluInstruction->indexMode == GPU7_INDEX_AR_W) sprintf(indexAccessCode, "ARi.w"); else cemu_assert_unimplemented(); if( LATTE_DECOMPILER_DTYPE_SIGNED_INT != requiredType ) _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType); // generated code looks like this: // result = ((lookupIndex==0)?GPR5:(lookupIndex==1)?GPR6:(lookupIndex==2)?GPR7:...:(lookupIndex==122)?GPR127:0) src->add("("); for(sint32 i=gprBaseIndex; ianalyzer.gprUseMask[i / 8] & (1 << (i % 8))) == 0 ) continue; src->addFmt("({}=={})?", indexAccessCode, i-gprBaseIndex); // code to access gpr uint32 gprIndex = i; src->add(_getRegisterVarName(shaderContext, i)); _appendChannelAccess(src, aluInstruction->sourceOperand[operandIndex].chan); src->add(":"); } src->add("0)"); if( LATTE_DECOMPILER_DTYPE_SIGNED_INT != requiredType ) _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType); } void _emitOperandInputCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex, sint32 requiredType) { StringBuf* src = shaderContext->shaderSource; if( operandIndex < 0 || operandIndex >= 3 ) debugBreakpoint(); sint32 requiredTypeOut = requiredType; if( requiredType != LATTE_DECOMPILER_DTYPE_FLOAT && (aluInstruction->sourceOperand[operandIndex].abs != 0 || aluInstruction->sourceOperand[operandIndex].neg != 0) ) { // we need to apply float operations on the input but it's not read as a float // force internal required type to float and then cast it back to whatever type is actually required requiredType = LATTE_DECOMPILER_DTYPE_FLOAT; } if( requiredTypeOut != requiredType ) _emitTypeConversionPrefix(shaderContext, requiredType, requiredTypeOut); if( aluInstruction->sourceOperand[operandIndex].neg != 0 ) src->add("-("); if( aluInstruction->sourceOperand[operandIndex].abs != 0 ) src->add("abs("); if( GPU7_ALU_SRC_IS_GPR(aluInstruction->sourceOperand[operandIndex].sel) ) { if( aluInstruction->sourceOperand[operandIndex].rel != 0 ) { _emitCodeToReadRelativeGPR(shaderContext, aluInstruction, operandIndex, requiredType); } else { uint32 gprIndex = GPU7_ALU_SRC_GET_GPR_INDEX(aluInstruction->sourceOperand[operandIndex].sel); if( requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { // signed int 32bit sint32 currentRegisterElementType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex); // write code for register input _emitTypeConversionPrefix(shaderContext, currentRegisterElementType, requiredType); _emitALURegisterInputAccessCode(shaderContext, aluInstruction, operandIndex); _emitTypeConversionSuffix(shaderContext, currentRegisterElementType, requiredType); } else if( requiredType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT ) { // unsigned int 32bit sint32 currentRegisterElementType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex); if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { // need to convert from int to uint src->add("uint("); } else if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT ) { // no extra work necessary } else debugBreakpoint(); // write code for register input _emitALURegisterInputAccessCode(shaderContext, aluInstruction, operandIndex); if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { src->add(")"); } } else if( requiredType == LATTE_DECOMPILER_DTYPE_FLOAT ) { // float 32bit sint32 currentRegisterElementType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex); if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { // need to convert (not cast) from int bits to float src->add("intBitsToFloat("); } else if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_FLOAT ) { // no extra work necessary } else debugBreakpoint(); // write code for register input _emitALURegisterInputAccessCode(shaderContext, aluInstruction, operandIndex); if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { src->add(")"); } } else debugBreakpoint(); } } else if( GPU7_ALU_SRC_IS_CONST_0F(aluInstruction->sourceOperand[operandIndex].sel) ) { if(requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT || requiredType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT) src->add("0"); else if( requiredType == LATTE_DECOMPILER_DTYPE_FLOAT ) src->add("0.0"); } else if( GPU7_ALU_SRC_IS_CONST_1F(aluInstruction->sourceOperand[operandIndex].sel) ) { _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType); src->add("1.0"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType); } else if( GPU7_ALU_SRC_IS_CONST_0_5F(aluInstruction->sourceOperand[operandIndex].sel) ) { _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType); src->add("0.5"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType); } else if( GPU7_ALU_SRC_IS_CONST_1I(aluInstruction->sourceOperand[operandIndex].sel) ) { if (requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->add("int(1)"); else if (requiredType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT) src->add("uint(1)"); else cemu_assert_suspicious(); } else if( GPU7_ALU_SRC_IS_CONST_M1I(aluInstruction->sourceOperand[operandIndex].sel) ) { if( requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) src->add("int(-1)"); else cemu_assert_suspicious(); } else if( GPU7_ALU_SRC_IS_LITERAL(aluInstruction->sourceOperand[operandIndex].sel) ) { if( requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) src->addFmt("0x{:x}", aluInstruction->literalData.w[aluInstruction->sourceOperand[operandIndex].chan]); else if( requiredType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT ) src->addFmt("uint(0x{:x})", aluInstruction->literalData.w[aluInstruction->sourceOperand[operandIndex].chan]); else if (requiredType == LATTE_DECOMPILER_DTYPE_FLOAT) { uint32 constVal = aluInstruction->literalData.w[aluInstruction->sourceOperand[operandIndex].chan]; sint32 exponent = (constVal >> 23) & 0xFF; exponent -= 127; if ((constVal & 0xFF) == 0 && exponent >= -10 && exponent <= 10) { src->add(_FormatFloatAsGLSLConstant(*(float*)&constVal)); } else src->addFmt("intBitsToFloat(0x{:08x})", constVal); } } else if( GPU7_ALU_SRC_IS_CFILE(aluInstruction->sourceOperand[operandIndex].sel) ) { _emitUniformAccessCode(shaderContext, aluInstruction, operandIndex, requiredType); } else if( GPU7_ALU_SRC_IS_CBANK0(aluInstruction->sourceOperand[operandIndex].sel) || GPU7_ALU_SRC_IS_CBANK1(aluInstruction->sourceOperand[operandIndex].sel) ) { _emitUniformAccessCode(shaderContext, aluInstruction, operandIndex, requiredType); } else if( GPU7_ALU_SRC_IS_PV(aluInstruction->sourceOperand[operandIndex].sel) ) { sint32 currentPVDataType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex); _emitTypeConversionPrefix(shaderContext, currentPVDataType, requiredType); _emitPVPSAccessCode(shaderContext, aluInstruction, operandIndex, aluInstruction->sourceOperand[operandIndex].chan); _emitTypeConversionSuffix(shaderContext, currentPVDataType, requiredType); } else if( GPU7_ALU_SRC_IS_PS(aluInstruction->sourceOperand[operandIndex].sel) ) { sint32 currentPSDataType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex); _emitTypeConversionPrefix(shaderContext, currentPSDataType, requiredType); _emitPVPSAccessCode(shaderContext, aluInstruction, operandIndex, 4); _emitTypeConversionSuffix(shaderContext, currentPSDataType, requiredType); } else { cemuLog_log(LogType::Force, "Unsupported shader ALU operand sel {:#x}\n", aluInstruction->sourceOperand[operandIndex].sel); debugBreakpoint(); } if( aluInstruction->sourceOperand[operandIndex].abs != 0 ) src->add(")"); if( aluInstruction->sourceOperand[operandIndex].neg != 0 ) src->add(")"); if( requiredTypeOut != requiredType ) _emitTypeConversionSuffix(shaderContext, requiredType, requiredTypeOut); } void _emitTypeConversionPrefix(LatteDecompilerShaderContext* shaderContext, sint32 sourceType, sint32 destinationType) { if( sourceType == destinationType ) return; StringBuf* src = shaderContext->shaderSource; if (sourceType == LATTE_DECOMPILER_DTYPE_FLOAT && destinationType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->add("floatBitsToInt("); else if (sourceType == LATTE_DECOMPILER_DTYPE_FLOAT && destinationType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT) src->add("floatBitsToUint("); else if( sourceType == LATTE_DECOMPILER_DTYPE_SIGNED_INT && destinationType == LATTE_DECOMPILER_DTYPE_FLOAT ) src->add("intBitsToFloat("); else if( sourceType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT && destinationType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) src->add("int("); else if( sourceType == LATTE_DECOMPILER_DTYPE_SIGNED_INT && destinationType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT ) src->add("uint("); else cemu_assert_debug(false); } void _emitTypeConversionSuffix(LatteDecompilerShaderContext* shaderContext, sint32 sourceType, sint32 destinationType) { if( sourceType == destinationType ) return; StringBuf* src = shaderContext->shaderSource; src->add(")"); } template void _emitALUOperationBinary(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, const char* operandStr) { StringBuf* src = shaderContext->shaderSource; sint32 outputType = _getALUInstructionOutputDataType(shaderContext, aluInstruction); _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, TDataType, outputType); _emitOperandInputCode(shaderContext, aluInstruction, 0, TDataType); src->add((char*)operandStr); _emitOperandInputCode(shaderContext, aluInstruction, 1, TDataType); _emitTypeConversionSuffix(shaderContext, TDataType, outputType); src->add(";" _CRLF); } static bool _isSameGPROperand(LatteDecompilerALUInstruction* aluInstruction, sint32 opIndexA, sint32 opIndexB) { if (aluInstruction->sourceOperand[opIndexA].sel != aluInstruction->sourceOperand[opIndexB].sel) return false; if (!GPU7_ALU_SRC_IS_GPR(aluInstruction->sourceOperand[opIndexA].sel)) return false; if (aluInstruction->sourceOperand[opIndexA].chan != aluInstruction->sourceOperand[opIndexB].chan) return false; if (aluInstruction->sourceOperand[opIndexA].abs != aluInstruction->sourceOperand[opIndexB].abs) return false; if (aluInstruction->sourceOperand[opIndexA].neg != aluInstruction->sourceOperand[opIndexB].neg) return false; if (aluInstruction->sourceOperand[opIndexA].rel != aluInstruction->sourceOperand[opIndexB].rel) return false; return true; } static bool _operandHasModifiers(LatteDecompilerALUInstruction* aluInstruction, sint32 opIndex) { return aluInstruction->sourceOperand[opIndex].abs != 0 || aluInstruction->sourceOperand[opIndex].neg != 0; } void _emitALUOP2InstructionCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, LatteDecompilerALUInstruction* aluInstruction) { StringBuf* src = shaderContext->shaderSource; sint32 outputType = _getALUInstructionOutputDataType(shaderContext, aluInstruction); // data type of output if( aluInstruction->opcode == ALU_OP2_INST_MOV ) { bool requiresFloatMove = false; requiresFloatMove = aluInstruction->sourceOperand[0].abs != 0 || aluInstruction->sourceOperand[0].neg != 0; if( requiresFloatMove ) { // abs/neg operations are applied to source operand, do float based move _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitOperandInputCode(shaderContext, aluInstruction, 0, outputType); src->add(";" _CRLF); } } else if( aluInstruction->opcode == ALU_OP2_INST_MOVA_FLOOR ) { cemu_assert_debug(aluInstruction->writeMask == 0); cemu_assert_debug(aluInstruction->omod == 0); src->add("tempResultf = "); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(";" _CRLF); src->add("tempResultf = floor(tempResultf);" _CRLF); src->add("tempResultf = clamp(tempResultf, -256.0, 255.0);" _CRLF); // set AR if( aluInstruction->destElem == 0 ) src->add("ARi.x = int(tempResultf);" _CRLF); else if( aluInstruction->destElem == 1 ) src->add("ARi.y = int(tempResultf);" _CRLF); else if( aluInstruction->destElem == 2 ) src->add("ARi.z = int(tempResultf);" _CRLF); else src->add("ARi.w = int(tempResultf);" _CRLF); // set output _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); if( outputType != LATTE_DECOMPILER_DTYPE_SIGNED_INT ) debugBreakpoint(); // todo src->add("floatBitsToInt(tempResultf)"); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_MOVA_INT ) { cemu_assert_debug(aluInstruction->writeMask == 0); cemu_assert_debug(aluInstruction->omod == 0); src->add("tempResulti = "); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(";" _CRLF); src->add("tempResulti = clamp(tempResulti, -256, 255);" _CRLF); // set AR if( aluInstruction->destElem == 0 ) src->add("ARi.x = tempResulti;" _CRLF); else if( aluInstruction->destElem == 1 ) src->add("ARi.y = tempResulti;" _CRLF); else if( aluInstruction->destElem == 2 ) src->add("ARi.z = tempResulti;" _CRLF); else src->add("ARi.w = tempResulti;" _CRLF); // set output _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); if( outputType != LATTE_DECOMPILER_DTYPE_SIGNED_INT ) debugBreakpoint(); // todo src->add("tempResulti"); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_ADD ) { _emitALUOperationBinary(shaderContext, aluInstruction, " + "); } else if( aluInstruction->opcode == ALU_OP2_INST_MUL ) { // 0*anything is always 0 _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); // if any operand is a non-zero literal or constant we can use standard multiplication bool useDefaultMul = false; if (GPU7_ALU_SRC_IS_CONST_0F(aluInstruction->sourceOperand[0].sel) || GPU7_ALU_SRC_IS_CONST_0F(aluInstruction->sourceOperand[1].sel)) { // result is always zero src->add("0.0"); } else { // multiply if (GPU7_ALU_SRC_IS_LITERAL(aluInstruction->sourceOperand[0].sel) || GPU7_ALU_SRC_IS_LITERAL(aluInstruction->sourceOperand[1].sel) || GPU7_ALU_SRC_IS_ANY_CONST(aluInstruction->sourceOperand[0].sel) || GPU7_ALU_SRC_IS_ANY_CONST(aluInstruction->sourceOperand[1].sel)) { useDefaultMul = true; } if (shaderContext->options->strictMul && useDefaultMul == false) { src->add("mul_nonIEEE("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(", "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); } else { _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(" * "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); } } _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_MUL_IEEE ) { // 0*anything according to IEEE rules _emitALUOperationBinary(shaderContext, aluInstruction, " * "); } else if (aluInstruction->opcode == ALU_OP2_INST_RECIP_IEEE) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("1.0"); src->add(" / "); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if (aluInstruction->opcode == ALU_OP2_INST_RECIP_FF) { // untested (BotW bombs) src->add("tempResultf = 1.0 / ("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(");" _CRLF); // INF becomes 0.0 src->add("if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) == 0 ) tempResultf = 0.0;" _CRLF); // -INF becomes -0.0 src->add("else if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) != 0 ) tempResultf = -0.0;" _CRLF); // assign result to output _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("tempResultf"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_IEEE || aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_CLAMPED || aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_FF ) { // todo: This should be correct but testing is needed src->add("tempResultf = 1.0 / sqrt("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(");" _CRLF); if (aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_CLAMPED) { // note: if( -INF < 0.0 ) does not resolve to true src->add("if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) != 0 ) tempResultf = -3.40282347E+38F;" _CRLF); src->add("else if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) == 0 ) tempResultf = 3.40282347E+38F;" _CRLF); } else if (aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_FF) { // untested (BotW bombs) src->add("if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) != 0 ) tempResultf = -0.0;" _CRLF); src->add("else if( isinf(tempResultf) == true && (floatBitsToInt(tempResultf)&0x80000000) == 0 ) tempResultf = 0.0;" _CRLF); } // assign result to output _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("tempResultf"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_MAX || aluInstruction->opcode == ALU_OP2_INST_MIN || aluInstruction->opcode == ALU_OP2_INST_MAX_DX10 || aluInstruction->opcode == ALU_OP2_INST_MIN_DX10 ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); if( aluInstruction->opcode == ALU_OP2_INST_MAX ) src->add("max"); else if( aluInstruction->opcode == ALU_OP2_INST_MIN ) src->add("min"); else if (aluInstruction->opcode == ALU_OP2_INST_MAX_DX10) src->add("max"); else if (aluInstruction->opcode == ALU_OP2_INST_MIN_DX10) src->add("min"); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(", "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_FLOOR || aluInstruction->opcode == ALU_OP2_INST_FRACT || aluInstruction->opcode == ALU_OP2_INST_TRUNC ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); if( aluInstruction->opcode == ALU_OP2_INST_FLOOR ) src->add("floor"); else if( aluInstruction->opcode == ALU_OP2_INST_FRACT ) src->add("fract"); else src->add("trunc"); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_LOG_CLAMPED || aluInstruction->opcode == ALU_OP2_INST_LOG_IEEE ) { src->add("tempResultf = max(0.0, "); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(");" _CRLF); src->add("tempResultf = log2(tempResultf);" _CRLF); if( aluInstruction->opcode == ALU_OP2_INST_LOG_CLAMPED ) { src->add("if( isinf(tempResultf) == true ) tempResultf = -3.40282347E+38F;" _CRLF); } // assign result to output _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("tempResultf"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_RNDNE ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("roundEven"); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_EXP_IEEE ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("exp2"); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_SQRT_IEEE ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("sqrt"); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_SIN || aluInstruction->opcode == ALU_OP2_INST_COS ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); if( aluInstruction->opcode == ALU_OP2_INST_SIN ) src->add("sin"); else src->add("cos"); src->add("(("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")/0.1591549367)"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_FLT_TO_INT ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add("int"); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_FLT_TO_UINT ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_UNSIGNED_INT, outputType); src->add("uint"); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_UNSIGNED_INT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_INT_TO_FLOAT ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("float("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(")"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_UINT_TO_FLOAT ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("float("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_UNSIGNED_INT); src->add(")"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if (aluInstruction->opcode == ALU_OP2_INST_AND_INT) _emitALUOperationBinary(shaderContext, aluInstruction, " & "); else if (aluInstruction->opcode == ALU_OP2_INST_OR_INT) _emitALUOperationBinary(shaderContext, aluInstruction, " | "); else if (aluInstruction->opcode == ALU_OP2_INST_XOR_INT) _emitALUOperationBinary(shaderContext, aluInstruction, " ^ "); else if( aluInstruction->opcode == ALU_OP2_INST_NOT_INT ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add("~("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(")"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_ADD_INT ) _emitALUOperationBinary(shaderContext, aluInstruction, " + "); else if( aluInstruction->opcode == ALU_OP2_INST_MAX_INT || aluInstruction->opcode == ALU_OP2_INST_MIN_INT || aluInstruction->opcode == ALU_OP2_INST_MAX_UINT || aluInstruction->opcode == ALU_OP2_INST_MIN_UINT) { // not verified bool isUnsigned = aluInstruction->opcode == ALU_OP2_INST_MAX_UINT || aluInstruction->opcode == ALU_OP2_INST_MIN_UINT; auto opType = isUnsigned ? LATTE_DECOMPILER_DTYPE_UNSIGNED_INT : LATTE_DECOMPILER_DTYPE_SIGNED_INT; _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, opType, outputType); if( aluInstruction->opcode == ALU_OP2_INST_MAX_INT || aluInstruction->opcode == ALU_OP2_INST_MAX_UINT ) src->add("max("); else src->add("min("); _emitOperandInputCode(shaderContext, aluInstruction, 0, opType); src->add(", "); _emitOperandInputCode(shaderContext, aluInstruction, 1, opType); _emitTypeConversionSuffix(shaderContext, opType, outputType); src->add(");" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_SUB_INT ) { // note: The AMD doc says src1 is on the left side but tests indicate otherwise. It's src0 - src1. _emitALUOperationBinary(shaderContext, aluInstruction, " - "); } else if (aluInstruction->opcode == ALU_OP2_INST_MULLO_INT) _emitALUOperationBinary(shaderContext, aluInstruction, " * "); else if (aluInstruction->opcode == ALU_OP2_INST_MULLO_UINT) _emitALUOperationBinary(shaderContext, aluInstruction, " * "); else if( aluInstruction->opcode == ALU_OP2_INST_LSHL_INT ) _emitALUOperationBinary(shaderContext, aluInstruction, " << "); else if( aluInstruction->opcode == ALU_OP2_INST_LSHR_INT ) _emitALUOperationBinary(shaderContext, aluInstruction, " >> "); else if( aluInstruction->opcode == ALU_OP2_INST_ASHR_INT ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(" >> "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_SIGNED_INT); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_SETGT || aluInstruction->opcode == ALU_OP2_INST_SETGE || aluInstruction->opcode == ALU_OP2_INST_SETNE || aluInstruction->opcode == ALU_OP2_INST_SETE ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); if( aluInstruction->opcode == ALU_OP2_INST_SETGT ) src->add(" > "); else if( aluInstruction->opcode == ALU_OP2_INST_SETGE ) src->add(" >= "); else if (aluInstruction->opcode == ALU_OP2_INST_SETNE) src->add(" != "); else if (aluInstruction->opcode == ALU_OP2_INST_SETE) src->add(" == "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")?1.0:0.0"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_SETGT_DX10 || aluInstruction->opcode == ALU_OP2_INST_SETE_DX10 || aluInstruction->opcode == ALU_OP2_INST_SETNE_DX10 || aluInstruction->opcode == ALU_OP2_INST_SETGE_DX10 ) { if( aluInstruction->omod != 0 ) debugBreakpoint(); _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add("(("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); if( aluInstruction->opcode == ALU_OP2_INST_SETE_DX10 ) src->add(" == "); else if( aluInstruction->opcode == ALU_OP2_INST_SETNE_DX10 ) src->add(" != "); else if( aluInstruction->opcode == ALU_OP2_INST_SETGT_DX10 ) src->add(" > "); else if( aluInstruction->opcode == ALU_OP2_INST_SETGE_DX10 ) src->add(" >= "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")?-1:0)"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";"); src->add(_CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_SETE_INT || aluInstruction->opcode == ALU_OP2_INST_SETNE_INT || aluInstruction->opcode == ALU_OP2_INST_SETGT_INT || aluInstruction->opcode == ALU_OP2_INST_SETGE_INT ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT); if( aluInstruction->opcode == ALU_OP2_INST_SETE_INT ) src->add(" == "); else if( aluInstruction->opcode == ALU_OP2_INST_SETNE_INT ) src->add(" != "); else if( aluInstruction->opcode == ALU_OP2_INST_SETGT_INT ) src->add(" > "); else if( aluInstruction->opcode == ALU_OP2_INST_SETGE_INT ) src->add(" >= "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(")?-1:0"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_SETGE_UINT || aluInstruction->opcode == ALU_OP2_INST_SETGT_UINT ) { // todo: Unsure if the result is unsigned or signed _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_UNSIGNED_INT); if( aluInstruction->opcode == ALU_OP2_INST_SETGE_UINT ) src->add(" >= "); else if( aluInstruction->opcode == ALU_OP2_INST_SETGT_UINT ) src->add(" > "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_UNSIGNED_INT); src->add(")?int(0xFFFFFFFF):int(0x0)"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_PRED_SETGT || aluInstruction->opcode == ALU_OP2_INST_PRED_SETGE || aluInstruction->opcode == ALU_OP2_INST_PRED_SETE || aluInstruction->opcode == ALU_OP2_INST_PRED_SETNE || aluInstruction->opcode == ALU_OP2_INST_PRED_SETNE_INT || aluInstruction->opcode == ALU_OP2_INST_PRED_SETE_INT || aluInstruction->opcode == ALU_OP2_INST_PRED_SETGE_INT || aluInstruction->opcode == ALU_OP2_INST_PRED_SETGT_INT ) { cemu_assert_debug(aluInstruction->writeMask == 0); bool isIntPred = (aluInstruction->opcode == ALU_OP2_INST_PRED_SETNE_INT) || (aluInstruction->opcode == ALU_OP2_INST_PRED_SETE_INT) || (aluInstruction->opcode == ALU_OP2_INST_PRED_SETGE_INT) || (aluInstruction->opcode == ALU_OP2_INST_PRED_SETGT_INT); src->add("predResult"); src->add(" = ("); _emitOperandInputCode(shaderContext, aluInstruction, 0, isIntPred?LATTE_DECOMPILER_DTYPE_SIGNED_INT:LATTE_DECOMPILER_DTYPE_FLOAT); if (aluInstruction->opcode == ALU_OP2_INST_PRED_SETE || aluInstruction->opcode == ALU_OP2_INST_PRED_SETE_INT) src->add(" == "); else if (aluInstruction->opcode == ALU_OP2_INST_PRED_SETGT || aluInstruction->opcode == ALU_OP2_INST_PRED_SETGT_INT) src->add(" > "); else if (aluInstruction->opcode == ALU_OP2_INST_PRED_SETGE || aluInstruction->opcode == ALU_OP2_INST_PRED_SETGE_INT) src->add(" >= "); else if (aluInstruction->opcode == ALU_OP2_INST_PRED_SETNE || aluInstruction->opcode == ALU_OP2_INST_PRED_SETNE_INT) src->add(" != "); else cemu_assert_debug(false); _emitOperandInputCode(shaderContext, aluInstruction, 1, isIntPred?LATTE_DECOMPILER_DTYPE_SIGNED_INT:LATTE_DECOMPILER_DTYPE_FLOAT); src->add(");" _CRLF); // handle result of predicate instruction based on current ALU clause type if( cfInstruction->type == GPU7_CF_INST_ALU_PUSH_BEFORE ) { src->addFmt("{} = predResult;" _CRLF, _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth)); src->addFmt("{} = predResult == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth)); } else if( cfInstruction->type == GPU7_CF_INST_ALU_BREAK ) { // leave current loop src->add("if( predResult == false ) break;" _CRLF); } else cemu_assert_debug(false); } else if( aluInstruction->opcode == ALU_OP2_INST_KILLE_INT || aluInstruction->opcode == ALU_OP2_INST_KILLNE_INT || aluInstruction->opcode == ALU_OP2_INST_KILLGT_INT) { src->add("if( "); src->add(" ("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT); if( aluInstruction->opcode == ALU_OP2_INST_KILLE_INT ) src->add(" == "); else if (aluInstruction->opcode == ALU_OP2_INST_KILLNE_INT) src->add(" != "); else if (aluInstruction->opcode == ALU_OP2_INST_KILLGT_INT) src->add(" > "); else debugBreakpoint(); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(")"); src->add(") discard;"); src->add(_CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_KILLGT || aluInstruction->opcode == ALU_OP2_INST_KILLGE || aluInstruction->opcode == ALU_OP2_INST_KILLE ) { src->add("if( "); src->add(" ("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); if( aluInstruction->opcode == ALU_OP2_INST_KILLGT ) src->add(" > "); else if( aluInstruction->opcode == ALU_OP2_INST_KILLGE ) src->add(" >= "); else if( aluInstruction->opcode == ALU_OP2_INST_KILLE ) src->add(" == "); else debugBreakpoint(); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); src->add(") discard;"); src->add(_CRLF); } else { src->add("Unsupported instruction;" _CRLF); debug_printf("Unsupported ALU op2 instruction 0x%x\n", aluInstruction->opcode); shaderContext->shader->hasError = true; } } void _emitALUOP3InstructionCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, LatteDecompilerALUInstruction* aluInstruction) { StringBuf* src = shaderContext->shaderSource; cemu_assert_debug(aluInstruction->destRel == 0); // todo sint32 outputType = _getALUInstructionOutputDataType(shaderContext, aluInstruction); /* check for common no-op or mov-like instructions */ if (aluInstruction->opcode == ALU_OP3_INST_CMOVGE || aluInstruction->opcode == ALU_OP3_INST_CMOVE || aluInstruction->opcode == ALU_OP3_INST_CMOVGT || aluInstruction->opcode == ALU_OP3_INST_CNDE_INT || aluInstruction->opcode == ALU_OP3_INST_CNDGT_INT || aluInstruction->opcode == ALU_OP3_INST_CMOVGE_INT) { if (_isSameGPROperand(aluInstruction, 1, 2) && !_operandHasModifiers(aluInstruction, 1)) { // the condition is irrelevant as both operands are the same _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitOperandInputCode(shaderContext, aluInstruction, 1, outputType); src->add(";" _CRLF); return; } } /* generic handlers */ if( aluInstruction->opcode == ALU_OP3_INST_MULADD || aluInstruction->opcode == ALU_OP3_INST_MULADD_D2 || aluInstruction->opcode == ALU_OP3_INST_MULADD_M2 || aluInstruction->opcode == ALU_OP3_INST_MULADD_M4 || aluInstruction->opcode == ALU_OP3_INST_MULADD_IEEE ) { // todo: The difference between MULADD and MULADD IEEE is that the former has 0*anything=0 rule similar to MUL/MUL_IEEE? _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); if (aluInstruction->opcode != ALU_OP3_INST_MULADD) // avoid unnecessary parenthesis to improve code readability slightly src->add("("); bool useDefaultMul = false; if (GPU7_ALU_SRC_IS_LITERAL(aluInstruction->sourceOperand[0].sel) || GPU7_ALU_SRC_IS_LITERAL(aluInstruction->sourceOperand[1].sel) || GPU7_ALU_SRC_IS_ANY_CONST(aluInstruction->sourceOperand[0].sel) || GPU7_ALU_SRC_IS_ANY_CONST(aluInstruction->sourceOperand[1].sel)) { useDefaultMul = true; } if (aluInstruction->opcode == ALU_OP3_INST_MULADD_IEEE) useDefaultMul = true; if (shaderContext->options->strictMul && useDefaultMul == false) { src->add("mul_nonIEEE("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); } else { _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(" * "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); } src->add(" + "); _emitOperandInputCode(shaderContext, aluInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); if(aluInstruction->opcode != ALU_OP3_INST_MULADD) src->add(")"); if( aluInstruction->opcode == ALU_OP3_INST_MULADD_D2 ) src->add("/2.0"); else if( aluInstruction->opcode == ALU_OP3_INST_MULADD_M2 ) src->add("*2.0"); else if( aluInstruction->opcode == ALU_OP3_INST_MULADD_M4 ) src->add("*4.0"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if(aluInstruction->opcode == ALU_OP3_INST_CNDE_INT || aluInstruction->opcode == ALU_OP3_INST_CNDGT_INT || aluInstruction->opcode == ALU_OP3_INST_CMOVGE_INT) { bool requiresFloatResult = (aluInstruction->sourceOperand[1].neg != 0) || (aluInstruction->sourceOperand[2].neg != 0); _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, requiresFloatResult?LATTE_DECOMPILER_DTYPE_FLOAT:LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add("(("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT); if (aluInstruction->opcode == ALU_OP3_INST_CNDE_INT) src->add(" == "); else if (aluInstruction->opcode == ALU_OP3_INST_CNDGT_INT) src->add(" > "); else if (aluInstruction->opcode == ALU_OP3_INST_CMOVGE_INT) src->add(" >= "); src->add("0)?("); _emitOperandInputCode(shaderContext, aluInstruction, 1, requiresFloatResult?LATTE_DECOMPILER_DTYPE_FLOAT:LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add("):("); _emitOperandInputCode(shaderContext, aluInstruction, 2, requiresFloatResult?LATTE_DECOMPILER_DTYPE_FLOAT:LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add("))"); _emitTypeConversionSuffix(shaderContext, requiresFloatResult?LATTE_DECOMPILER_DTYPE_FLOAT:LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP3_INST_CMOVGE || aluInstruction->opcode == ALU_OP3_INST_CMOVE || aluInstruction->opcode == ALU_OP3_INST_CMOVGT ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add("(("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); if (aluInstruction->opcode == ALU_OP3_INST_CMOVE) src->add(" == "); else if (aluInstruction->opcode == ALU_OP3_INST_CMOVGE) src->add(" >= "); else if (aluInstruction->opcode == ALU_OP3_INST_CMOVGT) src->add(" > "); src->add("0.0)?("); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add("):("); _emitOperandInputCode(shaderContext, aluInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add("))"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";" _CRLF); } else { src->add("Unsupported instruction;" _CRLF); debug_printf("Unsupported ALU op3 instruction 0x%x\n", aluInstruction->opcode); shaderContext->shader->hasError = true; } } void _emitALUReductionInstructionCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluRedcInstruction[4]) { StringBuf* src = shaderContext->shaderSource; if( aluRedcInstruction[0]->isOP3 == false && (aluRedcInstruction[0]->opcode == ALU_OP2_INST_DOT4 || aluRedcInstruction[0]->opcode == ALU_OP2_INST_DOT4_IEEE) ) { // todo: Figure out and implement the difference between normal DOT4 and DOT4_IEEE sint32 outputType = _getALUInstructionOutputDataType(shaderContext, aluRedcInstruction[0]); _emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[0]); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); // dot(vec4(op0),vec4(op1)) src->add("dot(vec4("); _emitOperandInputCode(shaderContext, aluRedcInstruction[0], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[1], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[2], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[3], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add("),vec4("); _emitOperandInputCode(shaderContext, aluRedcInstruction[0], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[1], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[2], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[3], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add("))"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluRedcInstruction[0]->isOP3 == false && (aluRedcInstruction[0]->opcode == ALU_OP2_INST_CUBE) ) { /* * How the CUBE instruction works (guessed mostly, based on DirectX/OpenGL spec): Input: vec4, 3d direction vector (can be unnormalized) + w component (which can be ignored, since it only scales the vector but does not affect the direction) First we figure out the major axis (closest axis-aligned vector). There are six possible vectors: +rx 0 -rx 1 +ry 2 -ry 3 +rz 4 -rz 5 The major axis vector is calculated by looking at the largest (absolute) 3d vector component and then setting the other components to 0.0 The value that remains in the axis vector is referred to as 'MajorAxis' by the AMD documentation. The S,T coordinates are taken from the other two components. Example: -0.5,0.2,0.4 -> -rx -> -0.5,0.0,0.0 MajorAxis: -0.5, S: 0.2 T: 0.4 The CUBE reduction instruction requires a specific mapping for the input vector: src0 = Rn.zzxy src1 = Rn.yxzz It's probably related to the way the instruction works internally? If we look at the individual components per ALU unit: z y -> Compare y/z z x -> Compare x/z x z -> Compare x/z y z -> Compare y/z */ sint32 outputType; src->add("redcCUBE("); src->add("vec4("); _emitOperandInputCode(shaderContext, aluRedcInstruction[0], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[1], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[2], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[3], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add("),"); src->add("vec4("); _emitOperandInputCode(shaderContext, aluRedcInstruction[0], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[1], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[2], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[3], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add("),"); src->add("cubeMapSTM,cubeMapFaceId);" _CRLF); // dst.X (S) outputType = _getALUInstructionOutputDataType(shaderContext, aluRedcInstruction[0]); _emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[0]); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("cubeMapSTM.x"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); // dst.Y (T) outputType = _getALUInstructionOutputDataType(shaderContext, aluRedcInstruction[1]); _emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[1]); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("cubeMapSTM.y"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); // dst.Z (MajorAxis) outputType = _getALUInstructionOutputDataType(shaderContext, aluRedcInstruction[2]); _emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[2]); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("cubeMapSTM.z"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); // dst.W (FaceId) outputType = _getALUInstructionOutputDataType(shaderContext, aluRedcInstruction[3]); _emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[3]); src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add("cubeMapFaceId"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";" _CRLF); } else cemu_assert_unimplemented(); } void _emitALUClauseRegisterBackupCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, sint32 startIndex) { sint32 instructionGroupIndex = cfInstruction->instructionsALU[startIndex].instructionGroupIndex; size_t groupSize = 1; while ((startIndex + groupSize) < cfInstruction->instructionsALU.size()) { if (instructionGroupIndex != cfInstruction->instructionsALU[startIndex + groupSize].instructionGroupIndex) break; groupSize++; } shaderContext->aluPVPSState->CreateGPRTemporaries(shaderContext, { cfInstruction->instructionsALU.data() + startIndex, groupSize }); } /* bool _isPVUsedInNextGroup(LatteDecompilerCFInstruction* cfInstruction, sint32 startIndex, sint32 pvUnit) { sint32 currentGroupIndex = cfInstruction->instructionsALU[startIndex].instructionGroupIndex; for (sint32 i = startIndex + 1; i < (sint32)cfInstruction->instructionsALU.size(); i++) { LatteDecompilerALUInstruction& aluInstructionItr = cfInstruction->instructionsALU[i]; if(aluInstructionItr.instructionGroupIndex == currentGroupIndex ) continue; if ((sint32)aluInstructionItr.instructionGroupIndex > currentGroupIndex + 1) return false; // check OP code type if (aluInstructionItr.isOP3) { // op0 if (GPU7_ALU_SRC_IS_PV(aluInstructionItr.sourceOperand[0].sel)) { uint32 chan = aluInstructionItr.sourceOperand[0].chan; if (pvUnit == chan) return true; } // op1 if (GPU7_ALU_SRC_IS_PV(aluInstructionItr.sourceOperand[1].sel)) { uint32 chan = aluInstructionItr.sourceOperand[1].chan; if (pvUnit == chan) return true; } // op2 if (GPU7_ALU_SRC_IS_PV(aluInstructionItr.sourceOperand[2].sel)) { uint32 chan = aluInstructionItr.sourceOperand[2].chan; if (pvUnit == chan) return true; } } else { // op0 if (GPU7_ALU_SRC_IS_PV(aluInstructionItr.sourceOperand[0].sel)) { uint32 chan = aluInstructionItr.sourceOperand[0].chan; if (pvUnit == chan) return true; } // op1 if (GPU7_ALU_SRC_IS_PV(aluInstructionItr.sourceOperand[1].sel)) { uint32 chan = aluInstructionItr.sourceOperand[1].chan; if (pvUnit == chan) return true; } // todo: Not all operations use both operands } } return false; } */ void _emitVec3(LatteDecompilerShaderContext* shaderContext, uint32 dataType, LatteDecompilerALUInstruction* aluInst0, sint32 opIdx0, LatteDecompilerALUInstruction* aluInst1, sint32 opIdx1, LatteDecompilerALUInstruction* aluInst2, sint32 opIdx2) { StringBuf* src = shaderContext->shaderSource; if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT) { src->add("vec3("); _emitOperandInputCode(shaderContext, aluInst0, opIdx0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluInst1, opIdx1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluInst2, opIdx2, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); } else if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) { src->add("ivec3("); _emitOperandInputCode(shaderContext, aluInst0, opIdx0, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(","); _emitOperandInputCode(shaderContext, aluInst1, opIdx1, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(","); _emitOperandInputCode(shaderContext, aluInst2, opIdx2, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(")"); } else cemu_assert_unimplemented(); } void _emitGPRVectorAssignment(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction** aluInstructions, sint32 count) { StringBuf* src = shaderContext->shaderSource; // output var name (GPR) src->add(_getRegisterVarName(shaderContext, aluInstructions[0]->destGpr, -1)); src->add("."); for (sint32 f = 0; f < count; f++) { src->add(_getElementStrByIndex(aluInstructions[f]->destElem)); } src->add(" = "); } void _emitALUClauseCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction) { ALUClauseTemporariesState pvpsState; shaderContext->aluPVPSState = &pvpsState; StringBuf* src = shaderContext->shaderSource; LatteDecompilerALUInstruction* aluRedcInstruction[4]; size_t groupStartIndex = 0; for(size_t i=0; iinstructionsALU.size(); i++) { LatteDecompilerALUInstruction& aluInstruction = cfInstruction->instructionsALU[i]; if( aluInstruction.indexInGroup == 0 ) { src->addFmt("// {}" _CRLF, aluInstruction.instructionGroupIndex); // apply PV/PS updates for previous group if (i > 0) { pvpsState.TrackGroupOutputPVPS(shaderContext, cfInstruction->instructionsALU.data() + groupStartIndex, i - groupStartIndex); } groupStartIndex = i; // backup registers which are read after being written _emitALUClauseRegisterBackupCode(shaderContext, cfInstruction, i); } // detect reduction instructions and use a special handler bool isReductionOperation = _isReductionInstruction(&aluInstruction); if( isReductionOperation ) { cemu_assert_debug((i + 4) <= cfInstruction->instructionsALU.size()); aluRedcInstruction[0] = &aluInstruction; aluRedcInstruction[1] = &cfInstruction->instructionsALU[i + 1]; aluRedcInstruction[2] = &cfInstruction->instructionsALU[i + 2]; aluRedcInstruction[3] = &cfInstruction->instructionsALU[i + 3]; if( aluRedcInstruction[0]->isOP3 != aluRedcInstruction[1]->isOP3 || aluRedcInstruction[1]->isOP3 != aluRedcInstruction[2]->isOP3 || aluRedcInstruction[2]->isOP3 != aluRedcInstruction[3]->isOP3 ) debugBreakpoint(); if( aluRedcInstruction[0]->opcode != aluRedcInstruction[1]->opcode || aluRedcInstruction[1]->opcode != aluRedcInstruction[2]->opcode || aluRedcInstruction[2]->opcode != aluRedcInstruction[3]->opcode ) debugBreakpoint(); if( aluRedcInstruction[0]->omod != aluRedcInstruction[1]->omod || aluRedcInstruction[1]->omod != aluRedcInstruction[2]->omod || aluRedcInstruction[2]->omod != aluRedcInstruction[3]->omod ) debugBreakpoint(); if( aluRedcInstruction[0]->destClamp != aluRedcInstruction[1]->destClamp || aluRedcInstruction[1]->destClamp != aluRedcInstruction[2]->destClamp || aluRedcInstruction[2]->destClamp != aluRedcInstruction[3]->destClamp ) debugBreakpoint(); _emitALUReductionInstructionCode(shaderContext, aluRedcInstruction); i += 3; // skip the instructions that are part of the reduction operation } else /* not a reduction operation */ { if( aluInstruction.isOP3 ) { // op3 _emitALUOP3InstructionCode(shaderContext, cfInstruction, &aluInstruction); } else { // op2 if( aluInstruction.opcode == ALU_OP2_INST_NOP ) continue; // skip NOP instruction _emitALUOP2InstructionCode(shaderContext, cfInstruction, &aluInstruction); } } // handle omod sint32 outputDataType = _getALUInstructionOutputDataType(shaderContext, &aluInstruction); if( aluInstruction.omod != ALU_OMOD_NONE ) { if( outputDataType == LATTE_DECOMPILER_DTYPE_FLOAT ) { _emitInstructionOutputVariableName(shaderContext, &aluInstruction); if( aluInstruction.omod == ALU_OMOD_MUL2 ) src->add(" *= 2.0;" _CRLF); else if( aluInstruction.omod == ALU_OMOD_MUL4 ) src->add(" *= 4.0;" _CRLF); else if( aluInstruction.omod == ALU_OMOD_DIV2 ) src->add(" /= 2.0;" _CRLF); } else if( outputDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { _emitInstructionOutputVariableName(shaderContext, &aluInstruction); src->add(" = "); src->add("floatBitsToInt(intBitsToFloat("); _emitInstructionOutputVariableName(shaderContext, &aluInstruction); src->add(")"); if( aluInstruction.omod == 1 ) src->add(" * 2.0"); else if( aluInstruction.omod == 2 ) src->add(" * 4.0"); else if( aluInstruction.omod == 3 ) src->add(" / 2.0"); src->add(");" _CRLF); } else { cemu_assert_unimplemented(); } } // handle clamp if( aluInstruction.destClamp != 0 ) { if( outputDataType == LATTE_DECOMPILER_DTYPE_FLOAT ) { _emitInstructionOutputVariableName(shaderContext, &aluInstruction); src->add(" = clamp("); _emitInstructionOutputVariableName(shaderContext, &aluInstruction); src->add(", 0.0, 1.0);" _CRLF); } else if( outputDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { _emitInstructionOutputVariableName(shaderContext, &aluInstruction); src->add(" = clampFI32("); _emitInstructionOutputVariableName(shaderContext, &aluInstruction); src->add(");" _CRLF); } else { cemu_assert_unimplemented(); } } // handle result broadcasting for reduction instructions if( isReductionOperation ) { // reduction operations set all four PV components (todo: Needs further research. According to AMD docs, dot4 only sets PV.x? update: Unlike DOT4, CUBE sets all PV elements accordingly to their GPR output?) if( aluRedcInstruction[0]->opcode == ALU_OP2_INST_CUBE ) { // CUBE for (sint32 f = 0; f < 4; f++) { if (aluRedcInstruction[f]->writeMask != 0) continue; _emitInstructionPVPSOutputVariableName(shaderContext, aluRedcInstruction[f]); src->add(" = "); _emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[0]); src->add(";" _CRLF); } } else { // DOT4, DOT4_IEEE, etc. // reduction operation result is only set for output in redc[0], we also need to update redc[1] to redc[3] for(sint32 f=0; f<4; f++) { if( aluRedcInstruction[f]->writeMask == 0 ) _emitInstructionPVPSOutputVariableName(shaderContext, aluRedcInstruction[f]); else { if (f == 0) continue; _emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[f]); } src->add(" = "); _emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[0]); src->add(";" _CRLF); } } } } shaderContext->aluPVPSState = nullptr; } /* * Emits code to access one component (xyzw) of the texture coordinate input vector */ void _emitTEXSampleCoordInputComponent(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction, sint32 componentIndex, sint32 interpretSrcAsType) { cemu_assert(componentIndex >= 0 && componentIndex < 4); cemu_assert_debug(interpretSrcAsType == LATTE_DECOMPILER_DTYPE_SIGNED_INT || interpretSrcAsType == LATTE_DECOMPILER_DTYPE_FLOAT); StringBuf* src = shaderContext->shaderSource; sint32 elementSel = texInstruction->textureFetch.srcSel[componentIndex]; if (elementSel < 4) { _emitRegisterChannelAccessCode(shaderContext, texInstruction->srcGpr, elementSel, interpretSrcAsType); return; } const char* resultElemTable[4] = {"x","y","z","w"}; if(interpretSrcAsType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { if( elementSel == 4 ) src->add("floatBitsToInt(0.0)"); else if( elementSel == 5 ) src->add("floatBitsToInt(1.0)"); } else if(interpretSrcAsType == LATTE_DECOMPILER_DTYPE_FLOAT ) { if( elementSel == 4 ) src->add("0.0"); else if( elementSel == 5 ) src->add("1.0"); } } const char* _texGprAccessElemTable[8] = {"x","y","z","w","_","_","_","_"}; char* _getTexGPRAccess(LatteDecompilerShaderContext* shaderContext, sint32 gprIndex, uint32 dataType, sint8 selX, sint8 selY, sint8 selZ, sint8 selW, char* tempBuffer) { // intBitsToFloat(R{}i.w) *tempBuffer = '\0'; uint8 elemCount = (selX > 0 ? 1 : 0) + (selY > 0 ? 1 : 0) + (selZ > 0 ? 1 : 0) + (selW > 0 ? 1 : 0); if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) { if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) ; // no conversion else if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT) strcat(tempBuffer, "intBitsToFloat("); else cemu_assert_unimplemented(); strcat(tempBuffer, _getRegisterVarName(shaderContext, gprIndex)); // _texGprAccessElemTable strcat(tempBuffer, "."); if (selX >= 0) strcat(tempBuffer, _texGprAccessElemTable[selX]); if (selY >= 0) strcat(tempBuffer, _texGprAccessElemTable[selY]); if (selZ >= 0) strcat(tempBuffer, _texGprAccessElemTable[selZ]); if (selW >= 0) strcat(tempBuffer, _texGprAccessElemTable[selW]); if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) ; // no conversion else if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT) strcat(tempBuffer, ")"); else cemu_assert_unimplemented(); } else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) { if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) cemu_assert_unimplemented(); else if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT) ; // no conversion else cemu_assert_unimplemented(); strcat(tempBuffer, _getRegisterVarName(shaderContext, gprIndex)); // _texGprAccessElemTable strcat(tempBuffer, "."); if (selX >= 0) strcat(tempBuffer, _texGprAccessElemTable[selX]); if (selY >= 0) strcat(tempBuffer, _texGprAccessElemTable[selY]); if (selZ >= 0) strcat(tempBuffer, _texGprAccessElemTable[selZ]); if (selW >= 0) strcat(tempBuffer, _texGprAccessElemTable[selW]); if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) cemu_assert_unimplemented(); else if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT) ; // no conversion else cemu_assert_unimplemented(); } else cemu_assert_unimplemented(); return tempBuffer; } void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { StringBuf* src = shaderContext->shaderSource; if (texInstruction->textureFetch.textureIndex < 0 || texInstruction->textureFetch.textureIndex >= LATTE_NUM_MAX_TEX_UNITS) { // skip out of bounds texture unit access return; } auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex]; char tempBuffer0[32]; char tempBuffer1[32]; src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr)); src->add("."); const char* resultElemTable[4] = {"x","y","z","w"}; sint32 numWrittenElements = 0; for(sint32 f=0; f<4; f++) { if( texInstruction->dstSel[f] < 4 ) { src->add(resultElemTable[f]); numWrittenElements++; } else if( texInstruction->dstSel[f] == 7 ) { // masked and not written } else { debugBreakpoint(); } } // texture sampler opcode uint32 texOpcode = texInstruction->opcode; if (shaderContext->shaderType == LatteConst::ShaderType::Vertex) { // vertex shader forces LOD to zero, but certain sampler types don't support textureLod(...) API if (texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ) texOpcode = GPU7_TEX_INST_SAMPLE_C; } // check if offset is used bool hasOffset = false; if( texInstruction->textureFetch.offsetX != 0 || texInstruction->textureFetch.offsetY != 0 || texInstruction->textureFetch.offsetZ != 0 ) hasOffset = true; // emit sample code if (shaderContext->shader->textureIsIntegerFormat[texInstruction->textureFetch.textureIndex]) { // integer samplers if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) // uint to int { if(numWrittenElements == 1) src->add(" = int("); else shaderContext->shaderSource->addFmt(" = ivec{}(", numWrittenElements); } else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->add(" = uintBitsToFloat("); } else { // float samplers if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->add(" = floatBitsToInt("); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->add(" = ("); } bool unnormalizationHandled = false; bool useTexelCoordinates = false; // handle illegal combinations if (texOpcode == GPU7_TEX_INST_FETCH4 && (texDim == Latte::E_DIM::DIM_1D || texDim == Latte::E_DIM::DIM_1D_ARRAY)) { // fetch4 is not allowed on 1D textures // seen in YWW during boss fight of Level 1-4 // todo - investigate what this returns on actual HW if (numWrittenElements == 1) shaderContext->shaderSource->add("0.0"); else shaderContext->shaderSource->addFmt("vec{}(0.0)", numWrittenElements); shaderContext->shaderSource->add(");" _CRLF); return; } if (texOpcode == GPU7_TEX_INST_SAMPLE && (texInstruction->textureFetch.unnormalized[0] && texInstruction->textureFetch.unnormalized[1] && texInstruction->textureFetch.unnormalized[2] && texInstruction->textureFetch.unnormalized[3]) ) { // texture is likely a RECT if (hasOffset) cemu_assert_unimplemented(); src->add("texelFetch("); unnormalizationHandled = true; useTexelCoordinates = true; } else if( texOpcode == GPU7_TEX_INST_FETCH4 ) { if( hasOffset ) cemu_assert_unimplemented(); src->add("textureGather("); } else if( texOpcode == GPU7_TEX_INST_LD ) { if( hasOffset ) cemu_assert_unimplemented(); src->add("texelFetch("); unnormalizationHandled = true; useTexelCoordinates = true; } else if( texOpcode == GPU7_TEX_INST_SAMPLE_L ) { // sample with LOD value set in gpr.w (replaces computed LOD value) if( hasOffset ) src->add("textureLodOffset("); else src->add("textureLod("); } else if (texOpcode == GPU7_TEX_INST_SAMPLE_LZ) { // sample with LOD set to 0.0 (replaces computed LOD value) if (hasOffset) src->add("textureLodOffset("); else src->add("textureLod("); } else if (texOpcode == GPU7_TEX_INST_SAMPLE_LB) { // sample with LOD biased // note: AMD doc says LOD bias is calculated from instruction LOD_BIAS field. But it appears that LOD bias is taken from input register. Might actually be both? if (hasOffset) src->add("textureOffset("); else src->add("texture("); } else if (texOpcode == GPU7_TEX_INST_SAMPLE) { if (hasOffset) src->add("textureOffset("); else src->add("texture("); } else if (texOpcode == GPU7_TEX_INST_SAMPLE_C_L) { // sample with LOD value set in gpr.w (replaces computed LOD value) if (hasOffset) src->add("textureLodOffset("); else src->add("textureLod("); } else if (texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ) { // sample with LOD set to 0.0 (replaces computed LOD value) if (hasOffset) src->add("textureLodOffset("); else src->add("textureLod("); } else if (texOpcode == GPU7_TEX_INST_SAMPLE_C) { if (hasOffset) src->add("textureOffset("); else src->add("texture("); } else if (texOpcode == GPU7_TEX_INST_SAMPLE_G) { if (hasOffset) cemu_assert_unimplemented(); src->add("textureGrad("); } else { if( hasOffset ) cemu_assert_unimplemented(); cemu_assert_unimplemented(); src->add("texture("); } src->addFmt("{}{}, ", _getTextureUnitVariablePrefixName(shaderContext->shader->shaderType), texInstruction->textureFetch.textureIndex); // for textureGather() add shift (todo: depends on rounding mode set in sampler registers?) if (texOpcode == GPU7_TEX_INST_FETCH4) { if (texDim == Latte::E_DIM::DIM_2D) { //src->addFmt2("(vec2(-0.1) / vec2(textureSize({}{},0).xy)) + ", gpu7Decompiler_getTextureUnitVariablePrefixName(shaderContext->shader->shaderType), texInstruction->textureIndex); // vec2(-0.00001) is minimum to break Nvidia // vec2(0.0001) is minimum to fix shadows on Intel, also fixes it on AMD (Windows and Linux) // todo - emulating coordinate rounding mode correctly is tricky // GX2 supports two modes: Truncate or rounding according to DX9 rules // Vulkan uses truncate mode when point sampling (min and mag is both nearest) otherwise it uses rounding // adding a small fixed bias is enough to avoid vendor-specific cases where small inaccuracies cause the number to get rounded down due to truncation src->addFmt("vec2(0.0001) + "); } } const sint32 texCoordDataType = (texOpcode == GPU7_TEX_INST_LD) ? LATTE_DECOMPILER_DTYPE_SIGNED_INT : LATTE_DECOMPILER_DTYPE_FLOAT; if(useTexelCoordinates) { // handle integer coordinates for texelFetch if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA) { src->add("ivec2("); src->add("vec2("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, texCoordDataType); src->addFmt(", "); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, texCoordDataType); src->addFmt(")*uf_tex{}Scale", texInstruction->textureFetch.textureIndex); // close vec2 and scale src->add("), 0"); // close ivec2 and lod param // todo - lod } else if (texDim == Latte::E_DIM::DIM_1D) { // VC DS games forget to initialize textures and use texel fetch on an uninitialized texture (a dim of 0 maps to 1D) src->add("int("); src->add("float("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, (texOpcode == GPU7_TEX_INST_LD) ? LATTE_DECOMPILER_DTYPE_SIGNED_INT : LATTE_DECOMPILER_DTYPE_FLOAT); src->addFmt(")*uf_tex{}Scale.x", texInstruction->textureFetch.textureIndex); src->add("), 0"); // todo - lod } else cemu_assert_debug(false); } else /* useTexelCoordinates == false */ { // float coordinates if ( (texOpcode == GPU7_TEX_INST_SAMPLE_C || texOpcode == GPU7_TEX_INST_SAMPLE_C_L || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ) ) { // shadow sampler if (texDim == Latte::E_DIM::DIM_2D_ARRAY) { // 3 coords + compare value (as vec4) src->add("vec4("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); src->addFmt(",{})", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer0)); } else if (texDim == Latte::E_DIM::DIM_CUBEMAP) { // 2 coords + faceId if (texInstruction->textureFetch.srcSel[0] >= 4 || texInstruction->textureFetch.srcSel[1] >= 4) { debugBreakpoint(); } src->add("vec4("); src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0)); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->addFmt(")"); src->addFmt(",cubeMapArrayIndex{})", texInstruction->textureFetch.textureIndex); // cubemap index } else if (texDim == Latte::E_DIM::DIM_1D) { // 1 coord + 1 unused coord (per GLSL spec) + compare value if (texInstruction->textureFetch.srcSel[0] >= 4) { debugBreakpoint(); } src->addFmt("vec3({},0.0,{})", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1)); } else { // 2 coords + compare value (as vec3) if (texInstruction->textureFetch.srcSel[0] >= 4 && texInstruction->textureFetch.srcSel[1] >= 4) { debugBreakpoint(); } src->addFmt("vec3({}, {})", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1)); } } else if( texDim == Latte::E_DIM::DIM_3D || texDim == Latte::E_DIM::DIM_2D_ARRAY ) { // 3 coords src->add("vec3("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); } else if( texDim == Latte::E_DIM::DIM_CUBEMAP ) { // 2 coords + faceId cemu_assert_debug(texInstruction->textureFetch.srcSel[0] < 4); cemu_assert_debug(texInstruction->textureFetch.srcSel[1] < 4); src->add("vec4("); src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0)); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(")"); src->addFmt(",cubeMapArrayIndex{})", texInstruction->textureFetch.textureIndex); // cubemap index } else if( texDim == Latte::E_DIM::DIM_1D ) { // 1 coord src->add(_getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0)); } else { // 2 coords src->add("vec2("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); // avoid truncate to effectively round downwards on texel edges if (ActiveSettings::ForceSamplerRoundToPrecision()) src->addFmt("+ vec2(1.0)/vec2(textureSize({}{}, 0))/512.0", _getTextureUnitVariablePrefixName(shaderContext->shader->shaderType), texInstruction->textureFetch.textureIndex); } // lod or lod bias parameter if( texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LB || texOpcode == GPU7_TEX_INST_SAMPLE_C_L) { src->add(","); if(texOpcode == GPU7_TEX_INST_SAMPLE_LB) src->add(_FormatFloatAsGLSLConstant((float)texInstruction->textureFetch.lodBias / 16.0f)); else _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 3, LATTE_DECOMPILER_DTYPE_FLOAT); } else if( texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ ) { src->add(",0.0"); } } // gradient parameters if (texOpcode == GPU7_TEX_INST_SAMPLE_G) { if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_1D ) { src->add(",gradH.xy,gradV.xy"); } else { cemu_assert_unimplemented(); } } // offset if( texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ || texOpcode == GPU7_TEX_INST_SAMPLE || texOpcode == GPU7_TEX_INST_SAMPLE_C ) { if( hasOffset ) { uint8 offsetComponentCount = 0; if( texDim == Latte::E_DIM::DIM_1D ) offsetComponentCount = 1; else if( texDim == Latte::E_DIM::DIM_2D ) offsetComponentCount = 2; else if( texDim == Latte::E_DIM::DIM_3D ) offsetComponentCount = 3; else if( texDim == Latte::E_DIM::DIM_2D_ARRAY ) offsetComponentCount = 2; else cemu_assert_unimplemented(); if( (texInstruction->textureFetch.offsetX&1) ) cemu_assert_unimplemented(); if( (texInstruction->textureFetch.offsetY&1) ) cemu_assert_unimplemented(); if ((texInstruction->textureFetch.offsetZ & 1)) cemu_assert_unimplemented(); if( offsetComponentCount == 1 ) src->addFmt(",{}", texInstruction->textureFetch.offsetX/2); else if( offsetComponentCount == 2 ) src->addFmt(",ivec2({},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2); else if( offsetComponentCount == 3 ) src->addFmt(",ivec3({},{},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2); } } // lod bias if( texOpcode == GPU7_TEX_INST_SAMPLE_C || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ ) { src->add(")"); if (numWrittenElements > 1) { // result is copied into multiple channels src->add("."); for (sint32 f = 0; f < numWrittenElements; f++) { cemu_assert_debug(texInstruction->dstSel[f] == 0); // only x component is defined src->add("x"); } } } else { src->add(")."); for (sint32 f = 0; f < 4; f++) { if( texInstruction->dstSel[f] < 4 ) { uint8 elemIndex = texInstruction->dstSel[f]; if (texOpcode == GPU7_TEX_INST_FETCH4) { // GLSL's textureGather() and GPU7's FETCH4 instruction have a different order of elements // xyzw: top-left, top-right, bottom-right, bottom-left // textureGather xyzw // fetch4 yzxw // translate index from fetch4 to textureGather order static uint8 fetchToGather[4] = { 2, // x -> z 0, // y -> x 1, // z -> y 3, // w -> w }; elemIndex = fetchToGather[elemIndex]; } src->add(resultElemTable[elemIndex]); numWrittenElements++; } else if( texInstruction->dstSel[f] == 7 ) { // masked and not written } else { cemu_assert_unimplemented(); } } } src->add(");"); // debug #ifdef CEMU_DEBUG_ASSERT if(texInstruction->opcode == GPU7_TEX_INST_LD ) src->add(" // TEX_INST_LD"); else if(texInstruction->opcode == GPU7_TEX_INST_SAMPLE ) src->add(" // TEX_INST_SAMPLE"); else if(texInstruction->opcode == GPU7_TEX_INST_SAMPLE_L ) src->add(" // TEX_INST_SAMPLE_L"); else if(texInstruction->opcode == GPU7_TEX_INST_SAMPLE_LZ ) src->add(" // TEX_INST_SAMPLE_LZ"); else if(texInstruction->opcode == GPU7_TEX_INST_SAMPLE_C ) src->add(" // TEX_INST_SAMPLE_C"); else if(texInstruction->opcode == GPU7_TEX_INST_SAMPLE_G ) src->add(" // TEX_INST_SAMPLE_G"); else src->addFmt(" // 0x{:02x}", texInstruction->opcode); if (texInstruction->opcode != texOpcode) src->addFmt(" (applied as 0x{:02x})", texOpcode); src->addFmt(" OffsetXYZ {:02x} {:02x} {:02x}", (uint8)texInstruction->textureFetch.offsetX&0xFF, (uint8)texInstruction->textureFetch.offsetY&0xFF, (uint8)texInstruction->textureFetch.offsetZ&0xFF); #endif src->add("" _CRLF); } void _emitTEXGetTextureResInfoCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { StringBuf* src = shaderContext->shaderSource; src->addFmt("R{}", texInstruction->dstGpr); src->add("i"); src->add("."); const char* resultElemTable[4] = {"x","y","z","w"}; sint32 numWrittenElements = 0; for(sint32 f=0; f<4; f++) { if( texInstruction->dstSel[f] < 4 ) { src->add(resultElemTable[f]); numWrittenElements++; } else if( texInstruction->dstSel[f] == 7 ) { // masked and not written } else { cemu_assert_unimplemented(); } } // todo - mip index parameter? auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex]; if (texDim == Latte::E_DIM::DIM_1D) src->addFmt(" = ivec4(textureSize({}{}, 0),1,1,1).", _getTextureUnitVariablePrefixName(shaderContext->shader->shaderType), texInstruction->textureFetch.textureIndex); else if (texDim == Latte::E_DIM::DIM_1D_ARRAY) src->addFmt(" = ivec4(textureSize({}{}, 0),1,1).", _getTextureUnitVariablePrefixName(shaderContext->shader->shaderType), texInstruction->textureFetch.textureIndex); else if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA) src->addFmt(" = ivec4(textureSize({}{}, 0),1,1).", _getTextureUnitVariablePrefixName(shaderContext->shader->shaderType), texInstruction->textureFetch.textureIndex); else if (texDim == Latte::E_DIM::DIM_2D_ARRAY) src->addFmt(" = ivec4(textureSize({}{}, 0),1).", _getTextureUnitVariablePrefixName(shaderContext->shader->shaderType), texInstruction->textureFetch.textureIndex); else { cemu_assert_debug(false); src->addFmt(" = ivec4(textureSize({}{}, 0),1,1).", _getTextureUnitVariablePrefixName(shaderContext->shader->shaderType), texInstruction->textureFetch.textureIndex); } for(sint32 f=0; f<4; f++) { if( texInstruction->dstSel[f] < 4 ) { src->add(resultElemTable[texInstruction->dstSel[f]]); numWrittenElements++; } else if( texInstruction->dstSel[f] == 7 ) { // masked and not written } else { debugBreakpoint(); } } src->add(";" _CRLF); } void _emitTEXGetCompTexLodCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { StringBuf* src = shaderContext->shaderSource; src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr)); src->add("."); const char* resultElemTable[4] = {"x","y","z","w"}; sint32 numWrittenElements = 0; for(sint32 f=0; f<4; f++) { if( texInstruction->dstSel[f] < 4 ) { src->add(resultElemTable[f]); numWrittenElements++; } else if( texInstruction->dstSel[f] == 7 ) { // masked and not written } else { debugBreakpoint(); } } src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType); if( shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex] == Latte::E_DIM::DIM_CUBEMAP ) { // 3 coordinates if(shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt("vec4(textureQueryLod({}{}, {}.{}{}{}),0.0,0.0)", _getTextureUnitVariablePrefixName(shaderContext->shader->shaderType), texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]); else src->addFmt("vec4(textureQueryLod({}{}, intBitsToFloat({}.{}{}{})),0.0,0.0)", _getTextureUnitVariablePrefixName(shaderContext->shader->shaderType), texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]); } else { if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt("vec4(textureQueryLod({}{}, {}.{}{}),0.0,0.0)", _getTextureUnitVariablePrefixName(shaderContext->shader->shaderType), texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]); else src->addFmt("vec4(textureQueryLod({}{}, intBitsToFloat({}.{}{})),0.0,0.0)", _getTextureUnitVariablePrefixName(shaderContext->shader->shaderType), texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]); debugBreakpoint(); } _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType); src->add("."); for(sint32 f=0; f<4; f++) { if( texInstruction->dstSel[f] < 4 ) { src->add(resultElemTable[texInstruction->dstSel[f]]); numWrittenElements++; } else if( texInstruction->dstSel[f] == 7 ) { // masked and not written } else { debugBreakpoint(); } } src->add(";" _CRLF); } void _emitTEXSetCubemapIndexCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { StringBuf* src = shaderContext->shaderSource; src->addFmt("cubeMapArrayIndex{}", texInstruction->textureFetch.textureIndex); const char* resultElemTable[4] = {"x","y","z","w"}; if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->addFmt(" = intBitsToFloat(R{}i.{});" _CRLF, texInstruction->srcGpr, resultElemTable[texInstruction->textureFetch.srcSel[0]]); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt(" = R{}f.{};" _CRLF, texInstruction->srcGpr, resultElemTable[texInstruction->textureFetch.srcSel[0]]); else cemu_assert_unimplemented(); } void _emitTEXGetGradientsHV(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { StringBuf* src = shaderContext->shaderSource; sint32 componentCount = 0; for (sint32 i = 0; i < 4; i++) { if(texInstruction->dstSel[i] == 7) continue; componentCount++; } src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr)); src->add("."); const char* resultElemTable[4] = { "x","y","z","w" }; sint32 numWrittenElements = 0; for (sint32 f = 0; f < 4; f++) { if (texInstruction->dstSel[f] < 4) { src->add(resultElemTable[f]); numWrittenElements++; } else if (texInstruction->dstSel[f] == 7) { // masked and not written } else { debugBreakpoint(); } } const char* funcName; if (texInstruction->opcode == GPU7_TEX_INST_GET_GRADIENTS_H) funcName = "dFdx"; else funcName = "dFdy"; src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType); src->addFmt("{}(", funcName); _emitRegisterAccessCode(shaderContext, texInstruction->srcGpr, (componentCount >= 1) ? texInstruction->textureFetch.srcSel[0] : -1, (componentCount >= 2) ? texInstruction->textureFetch.srcSel[1] : -1, (componentCount >= 3) ? texInstruction->textureFetch.srcSel[2] : -1, (componentCount >= 4)?texInstruction->textureFetch.srcSel[3]:-1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType); src->add(";" _CRLF); } void _emitTEXSetGradientsHV(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { StringBuf* src = shaderContext->shaderSource; if (texInstruction->opcode == GPU7_TEX_INST_SET_GRADIENTS_H) src->add("gradH = "); else src->add("gradV = "); _emitRegisterAccessCode(shaderContext, texInstruction->srcGpr, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], texInstruction->textureFetch.srcSel[2], texInstruction->textureFetch.srcSel[3], LATTE_DECOMPILER_DTYPE_FLOAT); src->add(";" _CRLF); } void _emitGSReadInputVFetchCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { StringBuf* src = shaderContext->shaderSource; src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr)); src->add("."); const char* resultElemTable[4] = {"x","y","z","w"}; sint32 numWrittenElements = 0; for(sint32 f=0; f<4; f++) { if( texInstruction->dstSel[f] < 4 ) { src->add(resultElemTable[f]); numWrittenElements++; } else if( texInstruction->dstSel[f] == 7 ) { // masked and not written } else { cemu_assert_unimplemented(); } } src->add(" = "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, shaderContext->typeTracker.defaultDataType); src->add("(v2g["); if (texInstruction->textureFetch.srcSel[0] >= 4) cemu_assert_unimplemented(); if (texInstruction->textureFetch.srcSel[1] >= 4) cemu_assert_unimplemented(); // todo: Index type src->add("0"); src->addFmt("].passV2GParameter{}.", texInstruction->textureFetch.offset/16); for(sint32 f=0; f<4; f++) { if( texInstruction->dstSel[f] < 4 ) { src->add(resultElemTable[texInstruction->dstSel[f]]); numWrittenElements++; } else if( texInstruction->dstSel[f] == 7 ) { // masked and not written } else { cemu_assert_unimplemented(); } } src->add(")"); _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, shaderContext->typeTracker.defaultDataType); src->add(";" _CRLF); } sint32 _writeDestMaskXYZW(LatteDecompilerShaderContext* shaderContext, sint8* dstSel) { StringBuf* src = shaderContext->shaderSource; const char* resultElemTable[4] = { "x","y","z","w" }; sint32 numWrittenElements = 0; for (sint32 f = 0; f < 4; f++) { if (dstSel[f] < 4) { src->add(resultElemTable[f]); numWrittenElements++; } else if (dstSel[f] == 7) { // masked and not written } else { cemu_assert_unimplemented(); } } return numWrittenElements; } void _emitTEXVFetchCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { // handle special case where geometry shader reads input attributes from vertex shader via ringbuffer StringBuf* src = shaderContext->shaderSource; if( texInstruction->textureFetch.textureIndex == 0x9F && shaderContext->shaderType == LatteConst::ShaderType::Geometry ) { _emitGSReadInputVFetchCode(shaderContext, texInstruction); return; } src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr)); src->add("."); _writeDestMaskXYZW(shaderContext, texInstruction->dstSel); const char* resultElemTable[4] = {"x","y","z","w"}; src->add(" = "); if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->add("floatBitsToInt("); else src->add("("); src->addFmt("{}{}[", _getShaderUniformBlockVariableName(shaderContext->shader->shaderType), texInstruction->textureFetch.textureIndex - 0x80); if( shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) src->addFmt("{}.{}", _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]]); else src->addFmt("floatBitsToInt({}.{})", _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]]); src->add("]."); for(sint32 f=0; f<4; f++) { if( texInstruction->dstSel[f] < 4 ) { src->add(resultElemTable[texInstruction->dstSel[f]]); } else if( texInstruction->dstSel[f] == 7 ) { // masked and not written } else { debugBreakpoint(); } } src->add(");" _CRLF); } void _emitTEXReadMemCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { StringBuf* src = shaderContext->shaderSource; src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr)); src->add("."); sint32 count = _writeDestMaskXYZW(shaderContext, texInstruction->dstSel); src->add(" = "); if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->add("floatBitsToInt("); else src->add("("); sint32 readCount; if (texInstruction->memRead.format == FMT_32_FLOAT) { readCount = 1; // todo src->add("0.0"); } else if (texInstruction->memRead.format == FMT_32_32_FLOAT) { readCount = 2; // todo src->add("vec2(0.0,0.0)"); } else if (texInstruction->memRead.format == FMT_32_32_32_FLOAT) { readCount = 3; // todo src->add("vec3(0.0,0.0,0.0)"); } else { cemu_assert_unimplemented(); } if (count < readCount) { if (count == 1) src->add(".x"); else if (count == 2) src->add(".xy"); else if (count == 3) src->add(".xyz"); } src->add(");" _CRLF); } void _emitTEXClauseCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction) { cemu_assert_debug(cfInstruction->instructionsALU.empty()); for(auto& texInstruction : cfInstruction->instructionsTEX) { if( texInstruction.opcode == GPU7_TEX_INST_SAMPLE || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_L || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LB || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LZ || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_L || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_LZ || texInstruction.opcode == GPU7_TEX_INST_FETCH4 || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_G || texInstruction.opcode == GPU7_TEX_INST_LD ) _emitTEXSampleTextureCode(shaderContext, &texInstruction); else if( texInstruction.opcode == GPU7_TEX_INST_GET_TEXTURE_RESINFO ) _emitTEXGetTextureResInfoCode(shaderContext, &texInstruction); else if( texInstruction.opcode == GPU7_TEX_INST_GET_COMP_TEX_LOD ) _emitTEXGetCompTexLodCode(shaderContext, &texInstruction); else if( texInstruction.opcode == GPU7_TEX_INST_SET_CUBEMAP_INDEX ) _emitTEXSetCubemapIndexCode(shaderContext, &texInstruction); else if (texInstruction.opcode == GPU7_TEX_INST_GET_GRADIENTS_H || texInstruction.opcode == GPU7_TEX_INST_GET_GRADIENTS_V) _emitTEXGetGradientsHV(shaderContext, &texInstruction); else if (texInstruction.opcode == GPU7_TEX_INST_SET_GRADIENTS_H || texInstruction.opcode == GPU7_TEX_INST_SET_GRADIENTS_V) _emitTEXSetGradientsHV(shaderContext, &texInstruction); else if (texInstruction.opcode == GPU7_TEX_INST_VFETCH) _emitTEXVFetchCode(shaderContext, &texInstruction); else if (texInstruction.opcode == GPU7_TEX_INST_MEM) _emitTEXReadMemCode(shaderContext, &texInstruction); else cemu_assert_unimplemented(); } } // generate the code for reading the source input GPR (or constants) for exports void _emitExportGPRReadCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, sint32 requiredType, uint32 burstIndex) { StringBuf* src = shaderContext->shaderSource; uint32 numOutputs = 4; if( cfInstruction->type == GPU7_CF_INST_MEM_RING_WRITE ) { numOutputs = (cfInstruction->memWriteCompMask&1)?1:0; numOutputs += (cfInstruction->memWriteCompMask&2)?1:0; numOutputs += (cfInstruction->memWriteCompMask&4)?1:0; numOutputs += (cfInstruction->memWriteCompMask&8)?1:0; } if (requiredType == LATTE_DECOMPILER_DTYPE_FLOAT) { if(numOutputs == 1) src->add("float("); else src->addFmt("vec{}(", numOutputs); } else if (requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) { if (numOutputs == 1) src->add("int("); else src->addFmt("ivec{}(", numOutputs); } else cemu_assert_unimplemented(); sint32 actualOutputs = 0; for(sint32 i=0; i<4; i++) { // todo: Use type of register element based on information from type tracker (currently we assume it's always a signed integer) uint32 exportSel = 0; if( cfInstruction->type == GPU7_CF_INST_MEM_RING_WRITE ) { exportSel = i; if( (cfInstruction->memWriteCompMask&(1<exportComponentSel[i]; } if( actualOutputs > 0 ) src->add(", "); actualOutputs++; if( exportSel < 4 ) { _emitRegisterAccessCode(shaderContext, cfInstruction->exportSourceGPR+burstIndex, exportSel, -1, -1, -1, requiredType); } else if (exportSel == 4) { // constant zero src->add("0"); } else if (exportSel == 5) { // constant one src->add("1.0"); } else if( exportSel == 7 ) { // element masked (which means 0 is exported?) src->add("0"); } else { cemu_assert_debug(false); src->add("0"); } } if( requiredType == LATTE_DECOMPILER_DTYPE_FLOAT ) src->add(")"); else if( requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) src->add(")"); else cemu_assert_unimplemented(); } void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction) { StringBuf* src = shaderContext->shaderSource; src->add("// export" _CRLF); if(shaderContext->shaderType == LatteConst::ShaderType::Vertex ) { if( cfInstruction->exportBurstCount != 0 ) debugBreakpoint(); if (cfInstruction->exportType == 1 && cfInstruction->exportArrayBase == GPU7_DECOMPILER_CF_EXPORT_BASE_POSITION) { // export position // GX2 special state 0 disables rasterizer viewport offset and scaling (probably, exact mechanism is not known). Handle this here bool hasAnyViewportScaleDisabled = !shaderContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_X_SCALE_ENA() || !shaderContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Y_SCALE_ENA() || !shaderContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Z_SCALE_ENA(); if (hasAnyViewportScaleDisabled) { src->add("vec4 finalPos = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0); src->add(";" _CRLF); src->add("finalPos.xy = finalPos.xy * uf_windowSpaceToClipSpaceTransform - vec2(1.0,1.0);"); src->add("SET_POSITION(finalPos);"); } else { src->add("SET_POSITION("); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0); src->add(");" _CRLF); } } else if (cfInstruction->exportType == 1 && cfInstruction->exportArrayBase == GPU7_DECOMPILER_CF_EXPORT_POINT_SIZE ) { // export gl_PointSize if (shaderContext->analyzer.outputPointSize) { cemu_assert_debug(shaderContext->analyzer.writesPointSize); src->add("gl_PointSize = ("); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0); src->add(").x"); src->add(";" _CRLF); } } else if( cfInstruction->exportType == 2 && cfInstruction->exportArrayBase < 32 ) { // export parameter sint32 paramIndex = cfInstruction->exportArrayBase; uint32 vsSemanticId = _getVertexShaderOutParamSemanticId(shaderContext->contextRegisters, paramIndex); if (vsSemanticId != 0xFF) { src->addFmt("passParameterSem{} = ", vsSemanticId); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0); src->add(";" _CRLF); } else { src->add("// skipped export to semanticId 255" _CRLF); } } else cemu_assert_unimplemented(); } else if(shaderContext->shaderType == LatteConst::ShaderType::Pixel ) { if( cfInstruction->exportType == 0 && cfInstruction->exportArrayBase < 8 ) { for(uint32 i=0; i<(cfInstruction->exportBurstCount+1); i++) { sint32 pixelColorOutputIndex = LatteDecompiler_getColorOutputIndexFromExportIndex(shaderContext, cfInstruction->exportArrayBase+i); // if color output is for target 0, then also handle alpha test bool alphaTestEnable = shaderContext->contextRegistersNew->SX_ALPHA_TEST_CONTROL.get_ALPHA_TEST_ENABLE(); auto alphaTestFunc = shaderContext->contextRegistersNew->SX_ALPHA_TEST_CONTROL.get_ALPHA_FUNC(); if( pixelColorOutputIndex == 0 && alphaTestEnable && alphaTestFunc == Latte::E_COMPAREFUNC::NEVER ) { // never pass alpha test src->add("discard;" _CRLF); } else if( pixelColorOutputIndex == 0 && alphaTestEnable && alphaTestFunc != Latte::E_COMPAREFUNC::ALWAYS) { src->add("if( (("); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i); src->add(").a "); switch( alphaTestFunc ) { case Latte::E_COMPAREFUNC::LESS: src->add("<"); break; case Latte::E_COMPAREFUNC::EQUAL: src->add("=="); break; case Latte::E_COMPAREFUNC::LEQUAL: src->add("<="); break; case Latte::E_COMPAREFUNC::GREATER: src->add(">"); break; case Latte::E_COMPAREFUNC::NOTEQUAL: src->add("!="); break; case Latte::E_COMPAREFUNC::GEQUAL: src->add(">="); break; } src->add(" uf_alphaTestRef"); src->add(") == false) discard;" _CRLF); } // pixel color output src->addFmt("passPixelColor{} = ", pixelColorOutputIndex); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i); src->add(";" _CRLF); if( cfInstruction->exportArrayBase+i >= 8 ) cemu_assert_unimplemented(); } } else if( cfInstruction->exportType == 0 && cfInstruction->exportArrayBase == 61 ) { // pixel depth or gl_FragStencilRefARB if( cfInstruction->exportBurstCount > 0 ) cemu_assert_unimplemented(); if (cfInstruction->exportComponentSel[0] == 7) { cemu_assert_unimplemented(); // gl_FragDepth ? } if (cfInstruction->exportComponentSel[1] != 7) { cemu_assert_unimplemented(); // exporting to gl_FragStencilRefARB } if (cfInstruction->exportComponentSel[2] != 7) { cemu_assert_unimplemented(); // ukn } if (cfInstruction->exportComponentSel[3] != 7) { cemu_assert_unimplemented(); // ukn } src->add("gl_FragDepth = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0); src->add(".x"); src->add(";" _CRLF); } else cemu_assert_unimplemented(); } } void _emitXYZWByMask(StringBuf* src, uint32 mask) { if( (mask&(1<<0)) != 0 ) src->add("x"); if( (mask&(1<<1)) != 0 ) src->add("y"); if( (mask&(1<<2)) != 0 ) src->add("z"); if( (mask&(1<<3)) != 0 ) src->add("w"); } void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction) { StringBuf* src = shaderContext->shaderSource; // calculate parameter output (based on ring buffer output offset relative to GS unit) uint32 bytesPerVertex = shaderContext->contextRegisters[mmSQ_GS_VERT_ITEMSIZE] * 4; bytesPerVertex = std::max(bytesPerVertex, (uint32)1); // avoid division by zero uint32 parameterOffset = ((cfInstruction->exportArrayBase * 4) % bytesPerVertex); // for geometry shaders with streamout, MEM_RING_WRITE is used to pass the data to the copy shader, which then uses STREAM*_WRITE if (shaderContext->shaderType == LatteConst::ShaderType::Geometry && shaderContext->analyzer.hasStreamoutEnable) { // if streamout is enabled, we generate transform feedback output code instead of the normal gs output for (uint32 burstIndex = 0; burstIndex < (cfInstruction->exportBurstCount + 1); burstIndex++) { parameterOffset = ((cfInstruction->exportArrayBase * 4 + burstIndex*0x10) % bytesPerVertex); // find matching stream write in copy shader LatteGSCopyShaderStreamWrite_t* streamWrite = nullptr; for (auto& it : shaderContext->parsedGSCopyShader->list_streamWrites) { if (it.offset == parameterOffset) { streamWrite = ⁢ break; } } if (streamWrite == nullptr) { cemu_assert_suspicious(); return; } for (sint32 i = 0; i < 4; i++) { if ((cfInstruction->memWriteCompMask&(1 << i)) == 0) continue; if (shaderContext->options->useTFViaSSBO) { uint32 u32Offset = streamWrite->exportArrayBase + i; src->addFmt("sb_buffer[sbBase{} + {}]", streamWrite->bufferIndex, u32Offset); } else { src->addFmt("sb{}[{}]", streamWrite->bufferIndex, streamWrite->exportArrayBase + i); } src->add(" = "); _emitTypeConversionPrefix(shaderContext, shaderContext->typeTracker.defaultDataType, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->addFmt("{}.", _getRegisterVarName(shaderContext, cfInstruction->exportSourceGPR+burstIndex)); if (i == 0) src->add("x"); else if (i == 1) src->add("y"); else if (i == 2) src->add("z"); else if (i == 3) src->add("w"); _emitTypeConversionSuffix(shaderContext, shaderContext->typeTracker.defaultDataType, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(";" _CRLF); } } return; } if (shaderContext->shaderType == LatteConst::ShaderType::Vertex) { if (cfInstruction->memWriteElemSize != 3) cemu_assert_unimplemented(); if ((cfInstruction->exportArrayBase & 3) != 0) cemu_assert_unimplemented(); for (sint32 burstIndex = 0; burstIndex < (sint32)(cfInstruction->exportBurstCount + 1); burstIndex++) { src->addFmt("v2g.passV2GParameter{}.", (cfInstruction->exportArrayBase) / 4 + burstIndex); _emitXYZWByMask(src, cfInstruction->memWriteCompMask); src->addFmt(" = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_SIGNED_INT, burstIndex); src->add(";" _CRLF); } } else if (shaderContext->shaderType == LatteConst::ShaderType::Geometry) { cemu_assert_debug(cfInstruction->memWriteElemSize == 3); //if (cfInstruction->memWriteElemSize != 3) // debugBreakpoint(); cemu_assert_debug((cfInstruction->exportArrayBase & 3) == 0); for (uint32 burstIndex = 0; burstIndex < (cfInstruction->exportBurstCount + 1); burstIndex++) { uint32 parameterExportType = 0; uint32 parameterExportBase = 0; if (LatteGSCopyShaderParser_getExportTypeByOffset(shaderContext->parsedGSCopyShader, parameterOffset + burstIndex * (cfInstruction->memWriteElemSize+1)*4, ¶meterExportType, ¶meterExportBase) == false) { cemu_assert_debug(false); shaderContext->hasError = true; return; } if (parameterExportType == 1 && parameterExportBase == GPU7_DECOMPILER_CF_EXPORT_BASE_POSITION) { src->add("{" _CRLF); src->addFmt("vec4 pos = vec4(0.0,0.0,0.0,1.0);" _CRLF); src->addFmt("pos."); _emitXYZWByMask(src, cfInstruction->memWriteCompMask); src->addFmt(" = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, burstIndex); src->add(";" _CRLF); src->add("SET_POSITION(pos);" _CRLF); src->add("}" _CRLF); } else if (parameterExportType == 2 && parameterExportBase < 16) { src->addFmt("passG2PParameter{}.", parameterExportBase); _emitXYZWByMask(src, cfInstruction->memWriteCompMask); src->addFmt(" = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, burstIndex); src->add(";" _CRLF); } else cemu_assert_debug(false); } } else debugBreakpoint(); // todo } void _emitStreamWriteCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction) { StringBuf* src = shaderContext->shaderSource; if (shaderContext->analyzer.hasStreamoutEnable == false) { #ifdef CEMU_DEBUG_ASSERT src->add("// omitted streamout write" _CRLF); #endif return; } uint32 streamoutBufferIndex; if (cfInstruction->type == GPU7_CF_INST_MEM_STREAM0_WRITE) streamoutBufferIndex = 0; else if (cfInstruction->type == GPU7_CF_INST_MEM_STREAM1_WRITE) streamoutBufferIndex = 1; else cemu_assert_unimplemented(); if (shaderContext->shaderType == LatteConst::ShaderType::Vertex) { uint32 arraySize = cfInstruction->memWriteArraySize + 1; for (sint32 i = 0; i < (sint32)arraySize; i++) { if ((cfInstruction->memWriteCompMask&(1 << i)) == 0) continue; if (shaderContext->options->useTFViaSSBO) { uint32 u32Offset = cfInstruction->exportArrayBase + i; src->addFmt("sb_buffer[sbBase{} + {}]", streamoutBufferIndex, u32Offset); } else { src->addFmt("sb{}[{}]", streamoutBufferIndex, cfInstruction->exportArrayBase + i); } src->add(" = "); _emitTypeConversionPrefix(shaderContext, shaderContext->typeTracker.defaultDataType, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(_getRegisterVarName(shaderContext, cfInstruction->exportSourceGPR)); _appendChannelAccess(src, i); _emitTypeConversionSuffix(shaderContext, shaderContext->typeTracker.defaultDataType, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(";" _CRLF); } } else cemu_assert_debug(false); } void _emitCFCall(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction) { StringBuf* src = shaderContext->shaderSource; uint32 subroutineAddr = cfInstruction->addr; LatteDecompilerSubroutineInfo* subroutineInfo = nullptr; // find subroutine for (auto& subroutineItr : shaderContext->list_subroutines) { if (subroutineItr.cfAddr == subroutineAddr) { subroutineInfo = &subroutineItr; break; } } if (subroutineInfo == nullptr) { cemu_assert_debug(false); return; } // inline function if (shaderContext->isSubroutine) { cemu_assert_debug(false); // inlining with cascaded function calls not supported return; } // init CF stack variables src->addFmt("activeMaskStackSub{:04x}[0] = true;" _CRLF, subroutineInfo->cfAddr); src->addFmt("activeMaskStackCSub{:04x}[0] = true;" _CRLF, subroutineInfo->cfAddr); src->addFmt("activeMaskStackCSub{:04x}[1] = true;" _CRLF, subroutineInfo->cfAddr); shaderContext->isSubroutine = true; shaderContext->subroutineInfo = subroutineInfo; for(auto& cfInstruction : subroutineInfo->instructions) LatteDecompiler_emitClauseCode(shaderContext, &cfInstruction, true); shaderContext->isSubroutine = false; shaderContext->subroutineInfo = nullptr; } void LatteDecompiler_emitClauseCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, bool isSubroutine) { StringBuf* src = shaderContext->shaderSource; if( cfInstruction->type == GPU7_CF_INST_ALU || cfInstruction->type == GPU7_CF_INST_ALU_PUSH_BEFORE || cfInstruction->type == GPU7_CF_INST_ALU_POP_AFTER || cfInstruction->type == GPU7_CF_INST_ALU_POP2_AFTER || cfInstruction->type == GPU7_CF_INST_ALU_BREAK || cfInstruction->type == GPU7_CF_INST_ALU_ELSE_AFTER ) { // emit ALU code if (shaderContext->analyzer.modifiesPixelActiveState) { if(cfInstruction->type == GPU7_CF_INST_ALU_PUSH_BEFORE) src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1 - 1)); else src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1)); } if (cfInstruction->type == GPU7_CF_INST_ALU_PUSH_BEFORE) { src->addFmt("{} = {};" _CRLF, _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth-1)); src->addFmt("{} = {};" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth)); } _emitALUClauseCode(shaderContext, cfInstruction); if( shaderContext->analyzer.modifiesPixelActiveState ) src->add("}" _CRLF); cemu_assert_debug(!(shaderContext->analyzer.modifiesPixelActiveState == false && cfInstruction->type != GPU7_CF_INST_ALU)); // handle ELSE case of PUSH_BEFORE if( cfInstruction->type == GPU7_CF_INST_ALU_PUSH_BEFORE ) { src->add("else {" _CRLF); src->addFmt("{} = false;" _CRLF, _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth)); src->addFmt("{} = false;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1)); src->add("}" _CRLF); } // post clause handler if( cfInstruction->type == GPU7_CF_INST_ALU_POP_AFTER ) { src->addFmt("{} = {} == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1 - 1), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth - 1), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth - 1)); } else if( cfInstruction->type == GPU7_CF_INST_ALU_POP2_AFTER ) { src->addFmt("{} = {} == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1 - 2), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth - 2), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth - 2)); } else if( cfInstruction->type == GPU7_CF_INST_ALU_ELSE_AFTER ) { // no condition test // pop stack if( cfInstruction->popCount != 0 ) debugBreakpoint(); // else operation src->addFmt("{} = {} == false;" _CRLF, _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth)); src->addFmt("{} = {} == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth)); } } else if( cfInstruction->type == GPU7_CF_INST_TEX ) { // emit TEX code if (shaderContext->analyzer.modifiesPixelActiveState) { src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth+1)); } _emitTEXClauseCode(shaderContext, cfInstruction); if (shaderContext->analyzer.modifiesPixelActiveState) { src->add("}" _CRLF); } } else if( cfInstruction->type == GPU7_CF_INST_EXPORT || cfInstruction->type == GPU7_CF_INST_EXPORT_DONE ) { // emit export code _emitExportCode(shaderContext, cfInstruction); } else if( cfInstruction->type == GPU7_CF_INST_ELSE ) { // todo: Condition test, popCount? src->addFmt("{} = {} == false;" _CRLF, _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth)); src->addFmt("{} = {} == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth)); } else if( cfInstruction->type == GPU7_CF_INST_POP ) { src->addFmt("{} = {} == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1 - cfInstruction->popCount), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth - cfInstruction->popCount), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth - cfInstruction->popCount)); } else if( cfInstruction->type == GPU7_CF_INST_LOOP_START_DX10 || cfInstruction->type == GPU7_CF_INST_LOOP_START_NO_AL) { // start of loop // if pixel is disabled, then skip loop if (ActiveSettings::ShaderPreventInfiniteLoopsEnabled()) { // with iteration limit to prevent infinite loops src->addFmt("int loopCounter{} = 0;" _CRLF, (sint32)cfInstruction->cfAddr); src->addFmt("while( {} == true && loopCounter{} < 500 )" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1), (sint32)cfInstruction->cfAddr); src->add("{" _CRLF); src->addFmt("loopCounter{}++;" _CRLF, (sint32)cfInstruction->cfAddr); } else { src->addFmt("while( {} == true )" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1)); src->add("{" _CRLF); } } else if( cfInstruction->type == GPU7_CF_INST_LOOP_END ) { // this might not always work if( cfInstruction->popCount != 0 ) debugBreakpoint(); src->add("}" _CRLF); } else if( cfInstruction->type == GPU7_CF_INST_LOOP_BREAK ) { if( cfInstruction->popCount != 0 ) debugBreakpoint(); if (shaderContext->analyzer.modifiesPixelActiveState) { src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1)); } // note: active stack level is set to the same level as the loop begin. popCount is ignored src->add("break;" _CRLF); if (shaderContext->analyzer.modifiesPixelActiveState) src->add("}" _CRLF); } else if( cfInstruction->type == GPU7_CF_INST_MEM_STREAM0_WRITE || cfInstruction->type == GPU7_CF_INST_MEM_STREAM1_WRITE ) { _emitStreamWriteCode(shaderContext, cfInstruction); } else if( cfInstruction->type == GPU7_CF_INST_MEM_RING_WRITE ) { _emitCFRingWriteCode(shaderContext, cfInstruction); } else if( cfInstruction->type == GPU7_CF_INST_EMIT_VERTEX ) { if( shaderContext->analyzer.modifiesPixelActiveState ) src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1)); // write point size if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false) src->add("gl_PointSize = uf_pointSize;" _CRLF); // emit vertex src->add("EmitVertex();" _CRLF); // increment transform feedback pointer if (shaderContext->analyzer.useSSBOForStreamout) { for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++) { if (!shaderContext->output->streamoutBufferWriteMask[i]) continue; cemu_assert_debug((shaderContext->output->streamoutBufferStride[i] & 3) == 0); src->addFmt("sbBase{} += {};" _CRLF, i, shaderContext->output->streamoutBufferStride[i] / 4); } } if( shaderContext->analyzer.modifiesPixelActiveState ) src->add("}" _CRLF); } else if (cfInstruction->type == GPU7_CF_INST_CALL) { _emitCFCall(shaderContext, cfInstruction); } else if (cfInstruction->type == GPU7_CF_INST_RETURN) { // todo (handle properly) } else { cemu_assert_debug(false); } } void LatteDecompiler_emitGLSLHelperFunctions(LatteDecompilerShaderContext* shaderContext, StringBuf* fCStr_shaderSource) { if( shaderContext->analyzer.hasRedcCUBE ) { fCStr_shaderSource->add("void redcCUBE(vec4 src0, vec4 src1, out vec3 stm, out int faceId)\r\n" "{\r\n" "// stm -> x .. s, y .. t, z .. MajorAxis*2.0\r\n" "vec3 inputCoord = normalize(vec3(src1.y, src1.x, src0.x));\r\n" "float rx = inputCoord.x;\r\n" "float ry = inputCoord.y;\r\n" "float rz = inputCoord.z;\r\n" "if( abs(rx) > abs(ry) && abs(rx) > abs(rz) )\r\n" "{\r\n" "stm.z = rx*2.0;\r\n" "stm.xy = vec2(ry,rz); \r\n" "if( rx >= 0.0 )\r\n" "{\r\n" "faceId = 0;\r\n" "}\r\n" "else\r\n" "{\r\n" "faceId = 1;\r\n" "}\r\n" "}\r\n" "else if( abs(ry) > abs(rx) && abs(ry) > abs(rz) )\r\n" "{\r\n" "stm.z = ry*2.0;\r\n" "stm.xy = vec2(rx,rz); \r\n" "if( ry >= 0.0 )\r\n" "{\r\n" "faceId = 2;\r\n" "}\r\n" "else\r\n" "{\r\n" "faceId = 3;\r\n" "}\r\n" "}\r\n" "else //if( abs(rz) > abs(ry) && abs(rz) > abs(rx) )\r\n" "{\r\n" "stm.z = rz*2.0;\r\n" "stm.xy = vec2(rx,ry); \r\n" "if( rz >= 0.0 )\r\n" "{\r\n" "faceId = 4;\r\n" "}\r\n" "else\r\n" "{\r\n" "faceId = 5;\r\n" "}\r\n" "}\r\n" "}\r\n"); } if( shaderContext->analyzer.hasCubeMapTexture ) { fCStr_shaderSource->add("vec3 redcCUBEReverse(vec2 st, int faceId)\r\n" "{\r\n" "st.yx = st.xy;\r\n" "vec3 v;\r\n" "float majorAxis = 1.0;\r\n" "if( faceId == 0 )\r\n" "{\r\n" "v.yz = (st-vec2(1.5))*(majorAxis*2.0);\r\n" "v.x = 1.0;\r\n" "}\r\n" "else if( faceId == 1 )\r\n" "{\r\n" "v.yz = (st-vec2(1.5))*(majorAxis*2.0);\r\n" "v.x = -1.0;\r\n" "}\r\n" "else if( faceId == 2 )\r\n" "{\r\n" "v.xz = (st-vec2(1.5))*(majorAxis*2.0);\r\n" "v.y = 1.0;\r\n" "}\r\n" "else if( faceId == 3 )\r\n" "{\r\n" "v.xz = (st-vec2(1.5))*(majorAxis*2.0);\r\n" "v.y = -1.0;\r\n" "}\r\n" "else if( faceId == 4 )\r\n" "{\r\n" "v.xy = (st-vec2(1.5))*(majorAxis*2.0);\r\n" "v.z = 1.0;\r\n" "}\r\n" "else\r\n" "{\r\n" "v.xy = (st-vec2(1.5))*(majorAxis*2.0);\r\n" "v.z = -1.0;\r\n" "}\r\n" "return v;\r\n" "}\r\n"); } // clamp fCStr_shaderSource->add("" "int clampFI32(int v)\r\n" "{\r\n" "if( v == 0x7FFFFFFF )\r\n" " return floatBitsToInt(1.0);\r\n" "else if( v == 0xFFFFFFFF )\r\n" " return floatBitsToInt(0.0);\r\n" "return floatBitsToInt(clamp(intBitsToFloat(v), 0.0, 1.0));\r\n" "}\r\n"); // mul non-ieee way (0*NaN/INF => 0.0) if (shaderContext->options->strictMul) { // things we tried: //fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ return mix(a*b,0.0,a==0.0||b==0.0); }" STR_LINEBREAK); //fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ return mix(vec2(a*b,0.0),vec2(0.0,0.0),(equal(vec2(a),vec2(0.0,0.0))||equal(vec2(b),vec2(0.0,0.0)))).x; }" STR_LINEBREAK); //fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; }" STR_LINEBREAK); //fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){float r = a*b;r = intBitsToFloat(floatBitsToInt(r)&(((floatBitsToInt(a) != 0) && (floatBitsToInt(b) != 0))?0xFFFFFFFF:0));return r;}" STR_LINEBREAK); works // for "min" it used to be: float mul_nonIEEE(float a, float b){ return min(a*b,min(abs(a)*3.40282347E+38F,abs(b)*3.40282347E+38F)); } if( LatteGPUState.glVendor == GLVENDOR_NVIDIA && !ActiveSettings::DumpShadersEnabled()) fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){return mix(0.0, a*b, (a != 0.0) && (b != 0.0));}" _CRLF); // compiles faster on Nvidia and also results in lower RAM usage (OpenGL) else fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; }" _CRLF); // DXKV-like: fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ return (b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b); }" _CRLF); } } #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitGLSLHeader.hpp" void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* shaderContext, LatteParsedFetchShaderAttribute_t& attrib) { auto src = shaderContext->shaderSource; static const char* dsMappingTableFloat[6] = { "int(attrDecoder.x)", "int(attrDecoder.y)", "int(attrDecoder.z)", "int(attrDecoder.w)", /*"floatBitsToInt(0.0)"*/ "0", /*"floatBitsToInt(1.0)"*/ "0x3f800000" }; static const char* dsMappingTableInt[6] = { "int(attrDecoder.x)", "int(attrDecoder.y)", "int(attrDecoder.z)", "int(attrDecoder.w)", "0", "1" }; // get register index based on vtx semantic table uint32 attributeShaderLoc = 0xFFFFFFFF; for (sint32 f = 0; f < 32; f++) { if (shaderContext->contextRegisters[mmSQ_VTX_SEMANTIC_0 + f] == attrib.semanticId) { attributeShaderLoc = f; break; } } if (attributeShaderLoc == 0xFFFFFFFF) return; // attribute is not mapped to VS input uint32 registerIndex = attributeShaderLoc + 1; // R0 is skipped // is register used? if ((shaderContext->analyzer.gprUseMask[registerIndex / 8] & (1 << (registerIndex % 8))) == 0) { src->addFmt("// skipped unused attribute for r{}" _CRLF, registerIndex); return; } LatteDecompiler_emitAttributeDecodeGLSL(shaderContext->shader, src, &attrib); if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->addFmt("{} = ivec4(", _getRegisterVarName(shaderContext, registerIndex)); else src->addFmt("{} = vec4(", _getRegisterVarName(shaderContext, registerIndex)); for (sint32 f = 0; f < 4; f++) { uint8 ds = attrib.ds[f]; if (f > 0) src->add(", "); _emitTypeConversionPrefix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, shaderContext->typeTracker.defaultDataType); if (ds >= 6) { cemu_assert_unimplemented(); ds = 4; // read as 0.0 } if (attrib.nfa != 1) { src->add(dsMappingTableFloat[ds]); } else { src->add(dsMappingTableInt[ds]); } _emitTypeConversionSuffix(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, shaderContext->typeTracker.defaultDataType); } src->add(");" _CRLF); } void LatteDecompiler_emitGLSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader) { StringBuf* src = new StringBuf(1024*1024*12); // reserve 12MB for generated source (we resize-to-fit at the end) shaderContext->shaderSource = src; // GLSL shader header src->add("#version 430" _CRLF); // 430 is required for shader storage (Vulkan alternative TF path) src->add("#extension GL_ARB_texture_gather : enable" _CRLF); src->add("#extension GL_ARB_separate_shader_objects : enable" _CRLF); if (shaderContext->analyzer.hasStreamoutWrite || shaderContext->options->usesGeometryShader ) src->add("#extension GL_ARB_enhanced_layouts : enable" _CRLF); // debug info src->addFmt("// shader {:016x}" _CRLF, shaderContext->shaderBaseHash); #ifdef CEMU_DEBUG_ASSERT src->addFmt("// usesIntegerValues: {}" _CRLF, shaderContext->analyzer.usesIntegerValues?"true":"false"); src->addFmt(_CRLF); #endif // header part (definitions for inputs and outputs) LatteDecompiler::emitHeader(shaderContext); // helper functions LatteDecompiler_emitGLSLHelperFunctions(shaderContext, src); // start of main src->add("void main()" _CRLF); src->add("{" _CRLF); // variable definition if (shaderContext->typeTracker.useArrayGPRs == false) { // each register is a separate variable for (sint32 i = 0; i < 128; i++) { if (shaderContext->analyzer.usesRelativeGPRRead || (shaderContext->analyzer.gprUseMask[i / 8] & (1 << (i & 7))) != 0) { if (shaderContext->typeTracker.genIntReg) src->addFmt("ivec4 R{}i = ivec4(0);" _CRLF, i); else if (shaderContext->typeTracker.genFloatReg) src->addFmt("vec4 R{}f = vec4(0.0);" _CRLF, i); } } } else { // registers are represented using a single large array if (shaderContext->typeTracker.genIntReg) src->addFmt("ivec4 Ri[128];" _CRLF); else if (shaderContext->typeTracker.genFloatReg) src->addFmt("vec4 Rf[128];" _CRLF); for (sint32 i = 0; i < 128; i++) { if (shaderContext->typeTracker.genIntReg) src->addFmt("Ri[{}] = ivec4(0);" _CRLF, i); else if (shaderContext->typeTracker.genFloatReg) src->addFmt("Rf[{}] = vec4(0.0);" _CRLF, i); } } if( shader->shaderType == LatteConst::ShaderType::Vertex ) src->addFmt("uvec4 attrDecoder;" _CRLF); if (shaderContext->typeTracker.genIntReg) src->addFmt("int backupReg0i, backupReg1i, backupReg2i, backupReg3i, backupReg4i;" _CRLF); if (shaderContext->typeTracker.genFloatReg) src->addFmt("float backupReg0f, backupReg1f, backupReg2f, backupReg3f, backupReg4f;" _CRLF); if (shaderContext->typeTracker.genIntReg) { src->addFmt("int PV0ix = 0, PV0iy = 0, PV0iz = 0, PV0iw = 0, PV1ix = 0, PV1iy = 0, PV1iz = 0, PV1iw = 0;" _CRLF); src->addFmt("int PS0i = 0, PS1i = 0;" _CRLF); src->addFmt("ivec4 tempi = ivec4(0);" _CRLF); } if (shaderContext->typeTracker.genFloatReg) { src->addFmt("float PV0fx = 0.0, PV0fy = 0.0, PV0fz = 0.0, PV0fw = 0.0, PV1fx = 0.0, PV1fy = 0.0, PV1fz = 0.0, PV1fw = 0.0;" _CRLF); src->addFmt("float PS0f = 0.0, PS1f = 0.0;" _CRLF); src->addFmt("vec4 tempf = vec4(0.0);" _CRLF); } if (shaderContext->analyzer.hasGradientLookup) { src->add("vec4 gradH;" _CRLF); src->add("vec4 gradV;" _CRLF); } src->add("float tempResultf;" _CRLF); src->add("int tempResulti;" _CRLF); src->add("ivec4 ARi = ivec4(0);" _CRLF); src->add("bool predResult = true;" _CRLF); if(shaderContext->analyzer.modifiesPixelActiveState ) { src->addFmt("bool activeMaskStack[{}];" _CRLF, shaderContext->analyzer.activeStackMaxDepth+1); src->addFmt("bool activeMaskStackC[{}];" _CRLF, shaderContext->analyzer.activeStackMaxDepth+2); for (sint32 i = 0; i < shaderContext->analyzer.activeStackMaxDepth; i++) { src->addFmt("activeMaskStack[{}] = false;" _CRLF, i); } for (sint32 i = 0; i < shaderContext->analyzer.activeStackMaxDepth+1; i++) { src->addFmt("activeMaskStackC[{}] = false;" _CRLF, i); } src->addFmt("activeMaskStack[0] = true;" _CRLF); src->addFmt("activeMaskStackC[0] = true;" _CRLF); src->addFmt("activeMaskStackC[1] = true;" _CRLF); // generate vars for each subroutine for (auto& subroutineInfo : shaderContext->list_subroutines) { sint32 subroutineMaxStackDepth = 0; src->addFmt("bool activeMaskStackSub{:04x}[{}];" _CRLF, subroutineInfo.cfAddr, subroutineMaxStackDepth + 1); src->addFmt("bool activeMaskStackCSub{:04x}[{}];" _CRLF, subroutineInfo.cfAddr, subroutineMaxStackDepth + 2); } } // helper variables for cube maps (todo: Only emit when used) if (shaderContext->analyzer.hasRedcCUBE) { src->add("vec3 cubeMapSTM;" _CRLF); src->add("int cubeMapFaceId;" _CRLF); } for(sint32 i=0; ioutput->textureUnitMask[i]) continue; if( shader->textureUnitDim[i] != Latte::E_DIM::DIM_CUBEMAP ) continue; src->addFmt("float cubeMapArrayIndex{} = 0.0;" _CRLF, i); } // init base offset for streamout buffer writes if (shaderContext->analyzer.useSSBOForStreamout && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)) { for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++) { if(!shaderContext->output->streamoutBufferWriteMask[i]) continue; cemu_assert_debug((shaderContext->output->streamoutBufferStride[i]&3) == 0); if (shader->shaderType == LatteConst::ShaderType::Vertex) // vertex shader src->addFmt("int sbBase{} = uf_streamoutBufferBase{}/4 + (gl_VertexID + uf_verticesPerInstance * gl_InstanceID)*{};" _CRLF, i, i, shaderContext->output->streamoutBufferStride[i] / 4); else // geometry shader { uint32 gsOutPrimType = shaderContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE]; uint32 bytesPerVertex = shaderContext->contextRegisters[mmSQ_GS_VERT_ITEMSIZE] * 4; uint32 maxVerticesInGS = ((shaderContext->contextRegisters[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF) * 4) / bytesPerVertex; cemu_assert_debug(gsOutPrimType == 0); // currently we only properly handle GS output primitive points src->addFmt("int sbBase{} = uf_streamoutBufferBase{}/4 + (gl_PrimitiveIDIn * {})*{};" _CRLF, i, i, maxVerticesInGS, shaderContext->output->streamoutBufferStride[i] / 4); } } } // code to load inputs from previous stage if( shader->shaderType == LatteConst::ShaderType::Vertex ) { if( (shaderContext->analyzer.gprUseMask[0/8]&(1<<(0%8))) != 0 ) { if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->addFmt("{} = ivec4(gl_VertexID, 0, 0, gl_InstanceID);" _CRLF, _getRegisterVarName(shaderContext, 0)); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt("{} = floatBitsToInt(ivec4(gl_VertexID, 0, 0, gl_InstanceID));" _CRLF, _getRegisterVarName(shaderContext, 0)); else cemu_assert_unimplemented(); } LatteFetchShader* parsedFetchShader = shaderContext->fetchShader; for(auto& bufferGroup : parsedFetchShader->bufferGroups) { for(sint32 i=0; ibufferGroupsInvalid) { // these attributes point to non-existent buffers // todo - figure out how the hardware actually handles this, currently we assume the input values are zero for (sint32 i = 0; i < bufferGroup.attribCount; i++) LatteDecompiler_emitAttributeImport(shaderContext, bufferGroup.attrib[i]); } } else if (shader->shaderType == LatteConst::ShaderType::Pixel) { LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); uint32 psControl0 = shaderContext->contextRegisters[mmSPI_PS_IN_CONTROL_0]; uint32 psControl1 = shaderContext->contextRegisters[mmSPI_PS_IN_CONTROL_1]; uint32 spiInterpControl = shaderContext->contextRegisters[mmSPI_INTERP_CONTROL_0]; uint8 spriteEnable = (spiInterpControl >> 1) & 1; cemu_assert_debug(spriteEnable == 0); uint8 frontFace_enabled = (psControl1 >> 8) & 1; uint8 frontFace_chan = (psControl1 >> 9) & 3; uint8 frontFace_allBits = (psControl1 >> 11) & 1; uint8 frontFace_regIndex = (psControl1 >> 12) & 0x1F; // handle param_gen if (psInputTable->paramGen != 0) { cemu_assert_debug((psInputTable->paramGen) == 1); // handle the other bits (the same set of coordinates with different perspective/projection settings?) uint32 paramGenGPRIndex = psInputTable->paramGenGPR; if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt("{} = gl_PointCoord.xyxy;" _CRLF, _getRegisterVarName(shaderContext, paramGenGPRIndex)); else src->addFmt("{} = floatBitsToInt(gl_PointCoord.xyxy);" _CRLF, _getRegisterVarName(shaderContext, paramGenGPRIndex)); } for (sint32 i = 0; i < psInputTable->count; i++) { uint32 psControl0 = shaderContext->contextRegisters[mmSPI_PS_IN_CONTROL_0]; uint32 spi0_paramGen = (psControl0 >> 15) & 0xF; sint32 gprIndex = i;// +spi0_paramGen + paramRegOffset; if ((shaderContext->analyzer.gprUseMask[gprIndex / 8] & (1 << (gprIndex % 8))) == 0 && shaderContext->analyzer.usesRelativeGPRRead == false) continue; uint32 psInputSemanticId = psInputTable->import[i].semanticId; if (psInputSemanticId == LATTE_ANALYZER_IMPORT_INDEX_SPIPOSITION) { if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt("{} = GET_FRAGCOORD();" _CRLF, _getRegisterVarName(shaderContext, gprIndex)); else src->addFmt("{} = floatBitsToInt(GET_FRAGCOORD());" _CRLF, _getRegisterVarName(shaderContext, gprIndex)); continue; } if (shaderContext->options->usesGeometryShader) { // import from geometry shader if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->addFmt("{} = floatBitsToInt(passG2PParameter{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt("{} = passG2PParameter{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId & 0x7F); else cemu_assert_unimplemented(); } else { // import from vertex shader if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->addFmt("{} = floatBitsToInt(passParameterSem{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt("{} = passParameterSem{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId); else cemu_assert_unimplemented(); } } // front facing attribute if (frontFace_enabled) { if ((shaderContext->analyzer.gprUseMask[0 / 8] & (1 << (0 % 8))) != 0) { if (frontFace_allBits) cemu_assert_debug(false); if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->addFmt("{}.{} = floatBitsToInt(gl_FrontFacing?1.0:0.0);" _CRLF, _getRegisterVarName(shaderContext, frontFace_regIndex), _getElementStrByIndex(frontFace_chan)); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt("{}.{} = gl_FrontFacing?1.0:0.0;" _CRLF, _getRegisterVarName(shaderContext, frontFace_regIndex), _getElementStrByIndex(frontFace_chan)); else cemu_assert_debug(false); } } } for(auto& cfInstruction : shaderContext->cfInstructions) LatteDecompiler_emitClauseCode(shaderContext, &cfInstruction, false); if( shader->shaderType == LatteConst::ShaderType::Geometry ) src->add("EndPrimitive();" _CRLF); // vertex shader should write renderstate point size at the end if required but not modified by shader if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false) { if (shader->shaderType == LatteConst::ShaderType::Vertex && shaderContext->options->usesGeometryShader == false) src->add("gl_PointSize = uf_pointSize;" _CRLF); } // end of shader main src->add("}" _CRLF); src->shrink_to_fit(); shader->strBuf_shaderSource = src; }