#include "Cafe/HW/Latte/Core/LatteConst.h" #include "Cafe/HW/Latte/Core/LatteShaderAssembly.h" #include "Cafe/HW/Latte/ISA/RegDefines.h" #include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency #include "Cafe/HW/Latte/Core/Latte.h" #include "Cafe/HW/Latte/Core/LatteDraw.h" #include "Cafe/HW/Latte/Core/LatteShader.h" #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h" #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h" #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h" #include "Cafe/HW/Latte/Core/FetchShader.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "config/ActiveSettings.h" #include "util/helpers/StringBuf.h" #include #include #define _CRLF "\r\n" void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext, StringBuf* src, LatteParsedFetchShaderAttribute_t* attrib); /* * Variable names: * R0-R127 temp * Most variables are multi-typed and the respective type is appended to the name * Type suffixes are: f (float), i (32bit int), ui (unsigned 32bit int) * Examples: R13ui.x, tempf.z */ // local prototypes void _emitTypeConversionPrefixMSL(LatteDecompilerShaderContext* shaderContext, sint32 sourceType, sint32 destinationType, sint32 componentCount = 1); void _emitTypeConversionSuffixMSL(LatteDecompilerShaderContext* shaderContext, sint32 sourceType, sint32 destinationType); void LatteDecompiler_emitClauseCodeMSL(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, bool isSubroutine); static const char* _getElementStrByIndex(uint32 channel) { switch (channel) { case 0: return "x"; case 1: return "y"; case 2: return "z"; case 3: return "w"; } return "UNDEFINED"; } static char _tempGenString[64][256]; static uint32 _tempGenStringIndex = 0; static char* _getTempString() { char* str = _tempGenString[_tempGenStringIndex]; _tempGenStringIndex = (_tempGenStringIndex+1)%64; return str; } static char* _getActiveMaskVarName(LatteDecompilerShaderContext* shaderContext, sint32 index) { char* varName = _getTempString(); if (shaderContext->isSubroutine) sprintf(varName, "activeMaskStackSub%04x[%d]", shaderContext->subroutineInfo->cfAddr, index); else sprintf(varName, "activeMaskStack[%d]", index); return varName; } static char* _getActiveMaskCVarName(LatteDecompilerShaderContext* shaderContext, sint32 index) { char* varName = _getTempString(); if (shaderContext->isSubroutine) sprintf(varName, "activeMaskStackCSub%04x[%d]", shaderContext->subroutineInfo->cfAddr, index); else sprintf(varName, "activeMaskStackC[%d]", index); return varName; } static char* _getRegisterVarName(LatteDecompilerShaderContext* shaderContext, uint32 index, sint32 destRelIndexMode=-1) { auto type = shaderContext->typeTracker.defaultDataType; char* tempStr = _getTempString(); if (shaderContext->typeTracker.useArrayGPRs == false) { if (type == LATTE_DECOMPILER_DTYPE_SIGNED_INT) sprintf(tempStr, "R%di", index); else if (type == LATTE_DECOMPILER_DTYPE_FLOAT) sprintf(tempStr, "R%df", index); } else { char destRelOffset[32]; if (destRelIndexMode >= 0) { if (destRelIndexMode == GPU7_INDEX_AR_X) strcpy(destRelOffset, "ARi.x"); else if (destRelIndexMode == GPU7_INDEX_AR_Y) strcpy(destRelOffset, "ARi.y"); else if (destRelIndexMode == GPU7_INDEX_AR_Z) strcpy(destRelOffset, "ARi.z"); else if (destRelIndexMode == GPU7_INDEX_AR_W) strcpy(destRelOffset, "ARi.w"); else debugBreakpoint(); if (type == LATTE_DECOMPILER_DTYPE_SIGNED_INT) { sprintf(tempStr, "Ri[%d+%s]", index, destRelOffset); } else if (type == LATTE_DECOMPILER_DTYPE_FLOAT) { sprintf(tempStr, "Rf[%d+%s]", index, destRelOffset); } } else { if (type == LATTE_DECOMPILER_DTYPE_SIGNED_INT) { sprintf(tempStr, "Ri[%d]", index); } else if (type == LATTE_DECOMPILER_DTYPE_FLOAT) { sprintf(tempStr, "Rf[%d]", index); } } } return tempStr; } static void _appendRegisterTypeSuffix(StringBuf* src, sint32 dataType) { if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->add("i"); else if (dataType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT) src->add("ui"); else if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->add("f"); else cemu_assert_unimplemented(); } // appends x/y/z/w static void _appendChannel(StringBuf* src, sint32 channelIndex) { cemu_assert_debug(channelIndex >= 0 && channelIndex <= 3); switch (channelIndex) { case 0: src->add("x"); return; case 1: src->add("y"); return; case 2: src->add("z"); return; case 3: src->add("w"); return; } } // appends .x/.y/.z/.w static void _appendChannelAccess(StringBuf* src, sint32 channelIndex) { cemu_assert_debug(channelIndex >= 0 && channelIndex <= 3); switch (channelIndex) { case 0: src->add(".x"); return; case 1: src->add(".y"); return; case 2: src->add(".z"); return; case 3: src->add(".w"); return; } } static void _appendPVPS(LatteDecompilerShaderContext* shaderContext, StringBuf* src, uint32 groupIndex, uint8 aluUnit) { cemu_assert_debug(aluUnit < 5); if (aluUnit == 4) { src->addFmt("PS{}", (groupIndex & 1)); _appendRegisterTypeSuffix(src, shaderContext->typeTracker.defaultDataType); return; } src->addFmt("PV{}", (groupIndex & 1)); _appendRegisterTypeSuffix(src, shaderContext->typeTracker.defaultDataType); _appendChannel(src, aluUnit); } std::string _FormatFloatAsConstant(float f) { char floatAsStr[64]; size_t floatAsStrLen = fmt::format_to_n(floatAsStr, 64, "{:#}", f).size; size_t floatAsStrLenOrg = floatAsStrLen; if(floatAsStrLen > 0 && floatAsStr[floatAsStrLen-1] == '.') { floatAsStr[floatAsStrLen] = '0'; floatAsStrLen++; } cemu_assert(floatAsStrLen < 50); // constant suspiciously long? floatAsStr[floatAsStrLen] = '\0'; cemu_assert_debug(floatAsStrLen >= 3); // shortest possible form is "0.0" return floatAsStr; } // tracks PV/PS and register backups struct ALUClauseTemporariesState { struct PVPSAlias { enum class LOCATION_TYPE : uint8 { LOCATION_NONE, LOCATION_GPR, LOCATION_PVPS, }; LOCATION_TYPE location{ LOCATION_TYPE::LOCATION_NONE }; uint8 index; // GPR index or temporary index uint8 aluUnit; // x,y,z,w (or 5 for PS) void SetLocationGPR(uint8 gprIndex, uint8 channel) { cemu_assert_debug(channel < 4); this->location = LOCATION_TYPE::LOCATION_GPR; this->index = gprIndex; this->aluUnit = channel; } void SetLocationPSPVTemporary(uint8 aluUnit, uint32 groupIndex) { cemu_assert_debug(aluUnit < 5); this->location = LOCATION_TYPE::LOCATION_PVPS; this->index = groupIndex & 1; this->aluUnit = aluUnit; } }; struct GPRTemporary { GPRTemporary(uint8 gprIndex, uint8 channel, uint8 backupVarIndex) : gprIndex(gprIndex), channel(channel), backupVarIndex(backupVarIndex) {} uint8 gprIndex; uint8 channel; uint8 backupVarIndex; }; void TrackGroupOutputPVPS(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstr, size_t numInstr) { // unset current for (auto& it : m_pvps) it.location = PVPSAlias::LOCATION_TYPE::LOCATION_NONE; for (size_t i = 0; i < numInstr; i++) { LatteDecompilerALUInstruction& inst = aluInstr[i]; if (!inst.isOP3 && inst.opcode == ALU_OP2_INST_NOP) continue; // skip NOP instruction if (inst.writeMask == 0) { // map to temporary m_pvps[inst.aluUnit].SetLocationPSPVTemporary(inst.aluUnit, aluInstr->instructionGroupIndex); } else { // map to GPR if(inst.destRel == 0) // is PV/PS set for indexed writes? m_pvps[inst.aluUnit].SetLocationGPR(inst.destGpr, inst.destElem); } } } bool HasPVPS(uint8 aluUnitIndex) const { cemu_assert_debug(aluUnitIndex < 5); return m_pvps[aluUnitIndex].location != PVPSAlias::LOCATION_TYPE::LOCATION_NONE; } void EmitPVPSAccess(LatteDecompilerShaderContext* shaderContext, uint8 aluUnitIndex, uint32 currentGroupIndex) const { switch (m_pvps[aluUnitIndex].location) { case PVPSAlias::LOCATION_TYPE::LOCATION_GPR: { sint32 temporaryIndex = GetTemporaryForGPR(m_pvps[aluUnitIndex].index, m_pvps[aluUnitIndex].aluUnit); if (temporaryIndex < 0) { shaderContext->shaderSource->add(_getRegisterVarName(shaderContext, m_pvps[aluUnitIndex].index, -1)); _appendChannelAccess(shaderContext->shaderSource, m_pvps[aluUnitIndex].aluUnit); } else { // use temporary instead of GPR shaderContext->shaderSource->addFmt("backupReg{}", temporaryIndex); _appendRegisterTypeSuffix(shaderContext->shaderSource, shaderContext->typeTracker.defaultDataType); } break; } case PVPSAlias::LOCATION_TYPE::LOCATION_PVPS: _appendPVPS(shaderContext, shaderContext->shaderSource, currentGroupIndex-1, m_pvps[aluUnitIndex].aluUnit); break; default: cemuLog_log(LogType::Force, "Shader {:016x} accesses PV/PS without writing to it", shaderContext->shaderBaseHash); cemu_assert_suspicious(); break; } } /* * Check for GPR channels which are modified before they are read within the same group * These registers need to be copied to a temporary */ void CreateGPRTemporaries(LatteDecompilerShaderContext* shaderContext, std::span aluInstructions) { uint8 registerChannelWriteMask[(LATTE_NUM_GPR * 4 + 7) / 8] = { 0 }; m_gprTemporaries.clear(); for (auto& aluInstruction : aluInstructions) { // ignore NOP instructions if (aluInstruction.isOP3 == false && aluInstruction.opcode == ALU_OP2_INST_NOP) continue; cemu_assert_debug(aluInstruction.destElem <= 3); // check if any previously written register is read for (sint32 f = 0; f < 3; f++) { uint32 readGPRIndex; uint32 readGPRChannel; if (GPU7_ALU_SRC_IS_GPR(aluInstruction.sourceOperand[f].sel)) { readGPRIndex = GPU7_ALU_SRC_GET_GPR_INDEX(aluInstruction.sourceOperand[f].sel); cemu_assert_debug(aluInstruction.sourceOperand[f].chan <= 3); readGPRChannel = aluInstruction.sourceOperand[f].chan; } else if (GPU7_ALU_SRC_IS_PV(aluInstruction.sourceOperand[f].sel) || GPU7_ALU_SRC_IS_PS(aluInstruction.sourceOperand[f].sel)) { uint8 aluUnitIndex = 0; if (GPU7_ALU_SRC_IS_PV(aluInstruction.sourceOperand[f].sel)) aluUnitIndex = aluInstruction.sourceOperand[f].chan; else aluUnitIndex = 4; // if aliased to a GPR, then consider it a GPR read if(m_pvps[aluUnitIndex].location != PVPSAlias::LOCATION_TYPE::LOCATION_GPR) continue; readGPRIndex = m_pvps[aluUnitIndex].index; readGPRChannel = m_pvps[aluUnitIndex].aluUnit; } else continue; // track GPR read if ((registerChannelWriteMask[(readGPRIndex * 4 + aluInstruction.sourceOperand[f].chan) / 8] & (1 << ((readGPRIndex * 4 + aluInstruction.sourceOperand[f].chan) % 8))) != 0) { // register is overwritten by previous instruction, a temporary variable is required if (GetTemporaryForGPR(readGPRIndex, readGPRChannel) < 0) m_gprTemporaries.emplace_back(readGPRIndex, readGPRChannel, m_gprTemporaries.size()); } } // track write if (aluInstruction.writeMask != 0) registerChannelWriteMask[(aluInstruction.destGpr * 4 + aluInstruction.destElem) / 8] |= (1 << ((aluInstruction.destGpr * 4 + aluInstruction.destElem) % 8)); } // output code to move GPRs into temporaries StringBuf* src = shaderContext->shaderSource; for (auto& it : m_gprTemporaries) { src->addFmt("backupReg{}", it.backupVarIndex); _appendRegisterTypeSuffix(src, shaderContext->typeTracker.defaultDataType); src->add(" = "); src->add(_getRegisterVarName(shaderContext, it.gprIndex)); _appendChannelAccess(src, it.channel); src->add(";" _CRLF); } } // returns -1 if none present sint32 GetTemporaryForGPR(uint8 gprIndex, uint8 channel) const { for (auto& it : m_gprTemporaries) { if (it.gprIndex == gprIndex && it.channel == channel) return (sint32)it.backupVarIndex; } return -1; } private: PVPSAlias m_pvps[5]{}; boost::container::small_vector m_gprTemporaries; }; sint32 _getVertexShaderOutParamSemanticId(uint32* contextRegisters, sint32 index); sint32 _getInputRegisterDataType(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex); sint32 _getALUInstructionOutputDataType(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction); bool _isReductionInstruction(LatteDecompilerALUInstruction* aluInstruction); /* * Writes the name of the output variable and channel * E.g. R5f.x or tempf.x if writeMask is 0 */ static void _emitInstructionOutputVariableName(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction) { auto src = shaderContext->shaderSource; sint32 outputDataType = _getALUInstructionOutputDataType(shaderContext, aluInstruction); if( aluInstruction->writeMask == 0 ) { // does not output to GPR if( !_isReductionInstruction(aluInstruction) ) { // output to PV/PS _appendPVPS(shaderContext, src, aluInstruction->instructionGroupIndex, aluInstruction->aluUnit); return; } else { // output to temp src->add("temp"); _appendRegisterTypeSuffix(src, outputDataType); } _appendChannelAccess(src, aluInstruction->aluUnit); } else { // output to GPR. Aliasing to PV/PS happens at the end of the group src->add(_getRegisterVarName(shaderContext, aluInstruction->destGpr, aluInstruction->destRel==0?-1:aluInstruction->indexMode)); _appendChannelAccess(src, aluInstruction->destElem); } } static void _emitInstructionPVPSOutputVariableName(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction) { _appendPVPS(shaderContext, shaderContext->shaderSource, aluInstruction->instructionGroupIndex, aluInstruction->aluUnit); } static void _emitRegisterAccessCode(LatteDecompilerShaderContext* shaderContext, sint32 gprIndex, sint32 channel0, sint32 channel1, sint32 channel2, sint32 channel3, sint32 dataType = -1) { StringBuf* src = shaderContext->shaderSource; sint32 registerElementDataType = shaderContext->typeTracker.defaultDataType; cemu_assert_debug(gprIndex >= 0 && gprIndex <= 127); sint32 channelArray[4]; channelArray[0] = channel0; channelArray[1] = channel1; channelArray[2] = channel2; channelArray[3] = channel3; sint32 numComponents = 0; for (sint32 i = 0; i < 4; i++) { if (channelArray[i] >= 0 && channelArray[i] <= 3) numComponents++; } if (dataType >= 0) { _emitTypeConversionPrefixMSL(shaderContext, registerElementDataType, dataType, numComponents); } if (shaderContext->typeTracker.useArrayGPRs) src->add("R"); else src->addFmt("R{}", gprIndex); _appendRegisterTypeSuffix(src, registerElementDataType); if (shaderContext->typeTracker.useArrayGPRs) src->addFmt("[{}]", gprIndex); src->add("."); for (sint32 i = 0; i < 4; i++) { if (channelArray[i] >= 0 && channelArray[i] <= 3) src->add(_getElementStrByIndex(channelArray[i])); else if (channelArray[i] == -1) { // channel not used } else { cemu_assert_unimplemented(); } } if (dataType >= 0) _emitTypeConversionSuffixMSL(shaderContext, registerElementDataType, dataType); } // optimized variant of _emitRegisterAccessCode for raw one channel reads static void _emitRegisterChannelAccessCode(LatteDecompilerShaderContext* shaderContext, sint32 gprIndex, sint32 channel, sint32 dataType) { cemu_assert_debug(gprIndex >= 0 && gprIndex <= 127); cemu_assert_debug(channel >= 0 && channel < 4); StringBuf* src = shaderContext->shaderSource; sint32 registerElementDataType = shaderContext->typeTracker.defaultDataType; _emitTypeConversionPrefixMSL(shaderContext, registerElementDataType, dataType); if (shaderContext->typeTracker.useArrayGPRs) src->add("R"); else src->addFmt("R{}", gprIndex); _appendRegisterTypeSuffix(src, registerElementDataType); if (shaderContext->typeTracker.useArrayGPRs) src->addFmt("[{}]", gprIndex); src->add("."); src->add(_getElementStrByIndex(channel)); _emitTypeConversionSuffixMSL(shaderContext, registerElementDataType, dataType); } static void _emitALURegisterInputAccessCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex) { StringBuf* src = shaderContext->shaderSource; sint32 currentRegisterElementType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex); cemu_assert_debug(GPU7_ALU_SRC_IS_GPR(aluInstruction->sourceOperand[operandIndex].sel)); sint32 gprIndex = GPU7_ALU_SRC_GET_GPR_INDEX(aluInstruction->sourceOperand[operandIndex].sel); sint32 temporaryIndex = shaderContext->aluPVPSState->GetTemporaryForGPR(gprIndex, aluInstruction->sourceOperand[operandIndex].chan); if(temporaryIndex >= 0) { // access via backup variable src->addFmt("backupReg{}", temporaryIndex); _appendRegisterTypeSuffix(src, currentRegisterElementType); } else { // access via register variable _emitRegisterAccessCode(shaderContext, gprIndex, aluInstruction->sourceOperand[operandIndex].chan, -1, -1, -1); } } static void _emitPVPSAccessCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex, uint8 aluUnitIndex) { cemu_assert_debug(aluInstruction->instructionGroupIndex > 0); // PV/PS is uninitialized for group 0 // PV/PS vars are currently always using the default type (shaderContext->typeTracker.defaultDataType) shaderContext->aluPVPSState->EmitPVPSAccess(shaderContext, aluUnitIndex, aluInstruction->instructionGroupIndex); } /* * Emits the expression used for calculating the index for uniform access * For static access, this is a number * For dynamic access, this is AR.* + base */ static void _emitUniformAccessIndexCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex) { StringBuf* src = shaderContext->shaderSource; bool isUniformRegister = GPU7_ALU_SRC_IS_CFILE(aluInstruction->sourceOperand[operandIndex].sel); sint32 uniformOffset = 0; // index into array, for relative accesses this is the base offset if( isUniformRegister ) { uniformOffset = GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction->sourceOperand[operandIndex].sel); } else { if( GPU7_ALU_SRC_IS_CBANK0(aluInstruction->sourceOperand[operandIndex].sel) ) { uniformOffset = GPU7_ALU_SRC_GET_CBANK0_INDEX(aluInstruction->sourceOperand[operandIndex].sel) + aluInstruction->cfInstruction->cBank0AddrBase; } else { uniformOffset = GPU7_ALU_SRC_GET_CBANK1_INDEX(aluInstruction->sourceOperand[operandIndex].sel) + aluInstruction->cfInstruction->cBank1AddrBase; } } if( aluInstruction->sourceOperand[operandIndex].rel != 0 ) { if (aluInstruction->indexMode == GPU7_INDEX_AR_X) src->addFmt("ARi.x+{}", uniformOffset); else if (aluInstruction->indexMode == GPU7_INDEX_AR_Y) src->addFmt("ARi.y+{}", uniformOffset); else if (aluInstruction->indexMode == GPU7_INDEX_AR_Z) src->addFmt("ARi.z+{}", uniformOffset); else if (aluInstruction->indexMode == GPU7_INDEX_AR_W) src->addFmt("ARi.w+{}", uniformOffset); else cemu_assert_unimplemented(); } else { src->addFmt("{}", uniformOffset); } } static void _emitUniformAccessCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex, sint32 requiredType) { StringBuf* src = shaderContext->shaderSource; if(shaderContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_REMAPPED ) { // uniform registers or buffers are accessed statically with predictable offsets // find entry in remapped uniform if( aluInstruction->sourceOperand[operandIndex].rel != 0 ) debugBreakpoint(); bool isUniformRegister = GPU7_ALU_SRC_IS_CFILE(aluInstruction->sourceOperand[operandIndex].sel); sint32 uniformOffset = 0; // index into array sint32 uniformBufferIndex = 0; if( isUniformRegister ) { uniformOffset = GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction->sourceOperand[operandIndex].sel); uniformBufferIndex = 0; } else { if( GPU7_ALU_SRC_IS_CBANK0(aluInstruction->sourceOperand[operandIndex].sel) ) { uniformOffset = GPU7_ALU_SRC_GET_CBANK0_INDEX(aluInstruction->sourceOperand[operandIndex].sel) + aluInstruction->cfInstruction->cBank0AddrBase; uniformBufferIndex = aluInstruction->cfInstruction->cBank0Index; } else { uniformOffset = GPU7_ALU_SRC_GET_CBANK1_INDEX(aluInstruction->sourceOperand[operandIndex].sel) + aluInstruction->cfInstruction->cBank1AddrBase; uniformBufferIndex = aluInstruction->cfInstruction->cBank1Index; } } LatteDecompilerRemappedUniformEntry_t* remappedUniformEntry = NULL; for(size_t i=0; i< shaderContext->shader->list_remappedUniformEntries.size(); i++) { LatteDecompilerRemappedUniformEntry_t* remappedUniformEntryItr = shaderContext->shader->list_remappedUniformEntries.data() + i; if( remappedUniformEntryItr->isRegister && isUniformRegister ) { if( remappedUniformEntryItr->index == uniformOffset ) { remappedUniformEntry = remappedUniformEntryItr; break; } } else { if( remappedUniformEntryItr->kcacheBankId == uniformBufferIndex && remappedUniformEntryItr->index == uniformOffset ) { remappedUniformEntry = remappedUniformEntryItr; break; } } } cemu_assert_debug(remappedUniformEntry); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType); src->addFmt("supportBuffer.remapped[{}]", remappedUniformEntry->mappedIndex); _appendChannelAccess(src, aluInstruction->sourceOperand[operandIndex].chan); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType); } else if( shaderContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE ) { // uniform registers are accessed with unpredictable (dynamic) offset _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType); src->add("supportBuffer.uniformRegister["); _emitUniformAccessIndexCode(shaderContext, aluInstruction, operandIndex); src->add("]"); _appendChannelAccess(src, aluInstruction->sourceOperand[operandIndex].chan); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType); } else if( shaderContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK ) { // uniform buffers are available as a whole bool isUniformRegister = GPU7_ALU_SRC_IS_CFILE(aluInstruction->sourceOperand[operandIndex].sel); if( isUniformRegister ) debugBreakpoint(); sint32 uniformBufferIndex = 0; if( GPU7_ALU_SRC_IS_CBANK0(aluInstruction->sourceOperand[operandIndex].sel) ) { uniformBufferIndex = aluInstruction->cfInstruction->cBank0Index; } else { uniformBufferIndex = aluInstruction->cfInstruction->cBank1Index; } _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType); src->addFmt("ubuff{}.d[", uniformBufferIndex); _emitUniformAccessIndexCode(shaderContext, aluInstruction, operandIndex); src->addFmt("]"); _appendChannelAccess(src, aluInstruction->sourceOperand[operandIndex].chan); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType); } else debugBreakpoint(); } // Generates (slow) code to read an indexed GPR static void _emitCodeToReadRelativeGPR(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex, sint32 requiredType) { StringBuf* src = shaderContext->shaderSource; uint32 gprBaseIndex = GPU7_ALU_SRC_GET_GPR_INDEX(aluInstruction->sourceOperand[operandIndex].sel); cemu_assert_debug(aluInstruction->sourceOperand[operandIndex].rel != 0); if( shaderContext->typeTracker.useArrayGPRs ) { _emitTypeConversionPrefixMSL(shaderContext, shaderContext->typeTracker.defaultDataType, requiredType); src->add(_getRegisterVarName(shaderContext, gprBaseIndex, aluInstruction->indexMode)); _appendChannelAccess(src, aluInstruction->sourceOperand[operandIndex].chan); _emitTypeConversionSuffixMSL(shaderContext, shaderContext->typeTracker.defaultDataType, requiredType); return; } char indexAccessCode[64]; if (aluInstruction->indexMode == GPU7_INDEX_AR_X) sprintf(indexAccessCode, "ARi.x"); else if (aluInstruction->indexMode == GPU7_INDEX_AR_Y) sprintf(indexAccessCode, "ARi.y"); else if (aluInstruction->indexMode == GPU7_INDEX_AR_Z) sprintf(indexAccessCode, "ARi.z"); else if (aluInstruction->indexMode == GPU7_INDEX_AR_W) sprintf(indexAccessCode, "ARi.w"); else cemu_assert_unimplemented(); if( LATTE_DECOMPILER_DTYPE_SIGNED_INT != requiredType ) _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType); // generated code looks like this: // result = ((lookupIndex==0)?GPR5:(lookupIndex==1)?GPR6:(lookupIndex==2)?GPR7:...:(lookupIndex==122)?GPR127:0) src->add("("); for(sint32 i=gprBaseIndex; ianalyzer.gprUseMask[i / 8] & (1 << (i % 8))) == 0 ) continue; src->addFmt("({}=={})?", indexAccessCode, i-gprBaseIndex); // code to access gpr uint32 gprIndex = i; src->add(_getRegisterVarName(shaderContext, i)); _appendChannelAccess(src, aluInstruction->sourceOperand[operandIndex].chan); src->add(":"); } src->add("0)"); if( LATTE_DECOMPILER_DTYPE_SIGNED_INT != requiredType ) _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType); } static void _emitOperandInputCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex, sint32 requiredType) { StringBuf* src = shaderContext->shaderSource; if( operandIndex < 0 || operandIndex >= 3 ) debugBreakpoint(); sint32 requiredTypeOut = requiredType; if( requiredType != LATTE_DECOMPILER_DTYPE_FLOAT && (aluInstruction->sourceOperand[operandIndex].abs != 0 || aluInstruction->sourceOperand[operandIndex].neg != 0) ) { // we need to apply float operations on the input but it's not read as a float // force internal required type to float and then cast it back to whatever type is actually required requiredType = LATTE_DECOMPILER_DTYPE_FLOAT; } if( requiredTypeOut != requiredType ) _emitTypeConversionPrefixMSL(shaderContext, requiredType, requiredTypeOut); if( aluInstruction->sourceOperand[operandIndex].neg != 0 ) src->add("-("); if( aluInstruction->sourceOperand[operandIndex].abs != 0 ) src->add("abs("); if( GPU7_ALU_SRC_IS_GPR(aluInstruction->sourceOperand[operandIndex].sel) ) { if( aluInstruction->sourceOperand[operandIndex].rel != 0 ) { _emitCodeToReadRelativeGPR(shaderContext, aluInstruction, operandIndex, requiredType); } else { uint32 gprIndex = GPU7_ALU_SRC_GET_GPR_INDEX(aluInstruction->sourceOperand[operandIndex].sel); if( requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { // signed int 32bit sint32 currentRegisterElementType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex); // write code for register input _emitTypeConversionPrefixMSL(shaderContext, currentRegisterElementType, requiredType); _emitALURegisterInputAccessCode(shaderContext, aluInstruction, operandIndex); _emitTypeConversionSuffixMSL(shaderContext, currentRegisterElementType, requiredType); } else if( requiredType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT ) { // unsigned int 32bit sint32 currentRegisterElementType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex); if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { // need to convert from int to uint src->add("uint("); } else if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT ) { // no extra work necessary } else debugBreakpoint(); // write code for register input _emitALURegisterInputAccessCode(shaderContext, aluInstruction, operandIndex); if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { src->add(")"); } } else if( requiredType == LATTE_DECOMPILER_DTYPE_FLOAT ) { // float 32bit sint32 currentRegisterElementType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex); if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { // need to convert (not cast) from int bits to float src->add("as_type("); // TODO: correct? } else if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_FLOAT ) { // no extra work necessary } else debugBreakpoint(); // write code for register input _emitALURegisterInputAccessCode(shaderContext, aluInstruction, operandIndex); if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { src->add(")"); } } else debugBreakpoint(); } } else if( GPU7_ALU_SRC_IS_CONST_0F(aluInstruction->sourceOperand[operandIndex].sel) ) { if(requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT || requiredType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT) src->add("0"); else if( requiredType == LATTE_DECOMPILER_DTYPE_FLOAT ) src->add("0.0"); } else if( GPU7_ALU_SRC_IS_CONST_1F(aluInstruction->sourceOperand[operandIndex].sel) ) { _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType); src->add("1.0"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType); } else if( GPU7_ALU_SRC_IS_CONST_0_5F(aluInstruction->sourceOperand[operandIndex].sel) ) { _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType); src->add("0.5"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType); } else if( GPU7_ALU_SRC_IS_CONST_1I(aluInstruction->sourceOperand[operandIndex].sel) ) { if (requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->add("int(1)"); else if (requiredType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT) src->add("uint(1)"); else cemu_assert_suspicious(); } else if( GPU7_ALU_SRC_IS_CONST_M1I(aluInstruction->sourceOperand[operandIndex].sel) ) { if( requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) src->add("int(-1)"); else cemu_assert_suspicious(); } else if( GPU7_ALU_SRC_IS_LITERAL(aluInstruction->sourceOperand[operandIndex].sel) ) { if( requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) src->addFmt("int(0x{:x})", aluInstruction->literalData.w[aluInstruction->sourceOperand[operandIndex].chan]); else if( requiredType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT ) src->addFmt("uint(0x{:x})", aluInstruction->literalData.w[aluInstruction->sourceOperand[operandIndex].chan]); else if (requiredType == LATTE_DECOMPILER_DTYPE_FLOAT) { uint32 constVal = aluInstruction->literalData.w[aluInstruction->sourceOperand[operandIndex].chan]; sint32 exponent = (constVal >> 23) & 0xFF; exponent -= 127; if ((constVal & 0xFF) == 0 && exponent >= -10 && exponent <= 10) { src->add(_FormatFloatAsConstant(*(float*)&constVal)); } else src->addFmt("as_type(0x{:08x})", constVal); } } else if( GPU7_ALU_SRC_IS_CFILE(aluInstruction->sourceOperand[operandIndex].sel) ) { _emitUniformAccessCode(shaderContext, aluInstruction, operandIndex, requiredType); } else if( GPU7_ALU_SRC_IS_CBANK0(aluInstruction->sourceOperand[operandIndex].sel) || GPU7_ALU_SRC_IS_CBANK1(aluInstruction->sourceOperand[operandIndex].sel) ) { _emitUniformAccessCode(shaderContext, aluInstruction, operandIndex, requiredType); } else if( GPU7_ALU_SRC_IS_PV(aluInstruction->sourceOperand[operandIndex].sel) ) { sint32 currentPVDataType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex); _emitTypeConversionPrefixMSL(shaderContext, currentPVDataType, requiredType); _emitPVPSAccessCode(shaderContext, aluInstruction, operandIndex, aluInstruction->sourceOperand[operandIndex].chan); _emitTypeConversionSuffixMSL(shaderContext, currentPVDataType, requiredType); } else if( GPU7_ALU_SRC_IS_PS(aluInstruction->sourceOperand[operandIndex].sel) ) { sint32 currentPSDataType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex); _emitTypeConversionPrefixMSL(shaderContext, currentPSDataType, requiredType); _emitPVPSAccessCode(shaderContext, aluInstruction, operandIndex, 4); _emitTypeConversionSuffixMSL(shaderContext, currentPSDataType, requiredType); } else { cemuLog_log(LogType::Force, "Unsupported shader ALU operand sel {:#x}\n", aluInstruction->sourceOperand[operandIndex].sel); debugBreakpoint(); } if( aluInstruction->sourceOperand[operandIndex].abs != 0 ) src->add(")"); if( aluInstruction->sourceOperand[operandIndex].neg != 0 ) src->add(")"); if( requiredTypeOut != requiredType ) _emitTypeConversionSuffixMSL(shaderContext, requiredType, requiredTypeOut); } void _emitTypeConversionPrefixMSL(LatteDecompilerShaderContext* shaderContext, sint32 sourceType, sint32 destinationType, sint32 componentCount) { if( sourceType == destinationType ) return; StringBuf* src = shaderContext->shaderSource; if (destinationType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) { if (componentCount == 1) src->add("as_type("); else src->addFmt("as_type(", componentCount); } else if (destinationType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT) { if (componentCount == 1) src->add("as_type("); else src->addFmt("as_type(", componentCount); } else if (destinationType == LATTE_DECOMPILER_DTYPE_FLOAT) { if (componentCount == 1) src->add("as_type("); else src->addFmt("as_type(", componentCount); } else cemu_assert_debug(false); } void _emitTypeConversionSuffixMSL(LatteDecompilerShaderContext* shaderContext, sint32 sourceType, sint32 destinationType) { if( sourceType == destinationType ) return; StringBuf* src = shaderContext->shaderSource; src->add(")"); } template static void _emitALUOperationBinary(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, const char* operandStr) { StringBuf* src = shaderContext->shaderSource; sint32 outputType = _getALUInstructionOutputDataType(shaderContext, aluInstruction); _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, TDataType, outputType); _emitOperandInputCode(shaderContext, aluInstruction, 0, TDataType); src->add((char*)operandStr); _emitOperandInputCode(shaderContext, aluInstruction, 1, TDataType); _emitTypeConversionSuffixMSL(shaderContext, TDataType, outputType); src->add(";" _CRLF); } static bool _isSameGPROperand(LatteDecompilerALUInstruction* aluInstruction, sint32 opIndexA, sint32 opIndexB) { if (aluInstruction->sourceOperand[opIndexA].sel != aluInstruction->sourceOperand[opIndexB].sel) return false; if (!GPU7_ALU_SRC_IS_GPR(aluInstruction->sourceOperand[opIndexA].sel)) return false; if (aluInstruction->sourceOperand[opIndexA].chan != aluInstruction->sourceOperand[opIndexB].chan) return false; if (aluInstruction->sourceOperand[opIndexA].abs != aluInstruction->sourceOperand[opIndexB].abs) return false; if (aluInstruction->sourceOperand[opIndexA].neg != aluInstruction->sourceOperand[opIndexB].neg) return false; if (aluInstruction->sourceOperand[opIndexA].rel != aluInstruction->sourceOperand[opIndexB].rel) return false; return true; } static bool _operandHasModifiers(LatteDecompilerALUInstruction* aluInstruction, sint32 opIndex) { return aluInstruction->sourceOperand[opIndex].abs != 0 || aluInstruction->sourceOperand[opIndex].neg != 0; } static void _emitALUOP2InstructionCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, LatteDecompilerALUInstruction* aluInstruction) { StringBuf* src = shaderContext->shaderSource; sint32 outputType = _getALUInstructionOutputDataType(shaderContext, aluInstruction); // data type of output if( aluInstruction->opcode == ALU_OP2_INST_MOV ) { bool requiresFloatMove = false; requiresFloatMove = aluInstruction->sourceOperand[0].abs != 0 || aluInstruction->sourceOperand[0].neg != 0; if( requiresFloatMove ) { // abs/neg operations are applied to source operand, do float based move _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitOperandInputCode(shaderContext, aluInstruction, 0, outputType); src->add(";" _CRLF); } } else if( aluInstruction->opcode == ALU_OP2_INST_MOVA_FLOOR ) { cemu_assert_debug(aluInstruction->writeMask == 0); cemu_assert_debug(aluInstruction->omod == 0); src->add("tempResultf = "); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(";" _CRLF); src->add("tempResultf = floor(tempResultf);" _CRLF); src->add("tempResultf = clamp(tempResultf, -256.0, 255.0);" _CRLF); // set AR if( aluInstruction->destElem == 0 ) src->add("ARi.x = int(tempResultf);" _CRLF); else if( aluInstruction->destElem == 1 ) src->add("ARi.y = int(tempResultf);" _CRLF); else if( aluInstruction->destElem == 2 ) src->add("ARi.z = int(tempResultf);" _CRLF); else src->add("ARi.w = int(tempResultf);" _CRLF); // set output _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); if( outputType != LATTE_DECOMPILER_DTYPE_SIGNED_INT ) debugBreakpoint(); // todo src->add("as_type(tempResultf)"); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_MOVA_INT ) { cemu_assert_debug(aluInstruction->writeMask == 0); cemu_assert_debug(aluInstruction->omod == 0); src->add("tempResulti = "); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(";" _CRLF); src->add("tempResulti = clamp(tempResulti, -256, 255);" _CRLF); // set AR if( aluInstruction->destElem == 0 ) src->add("ARi.x = tempResulti;" _CRLF); else if( aluInstruction->destElem == 1 ) src->add("ARi.y = tempResulti;" _CRLF); else if( aluInstruction->destElem == 2 ) src->add("ARi.z = tempResulti;" _CRLF); else src->add("ARi.w = tempResulti;" _CRLF); // set output _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); if( outputType != LATTE_DECOMPILER_DTYPE_SIGNED_INT ) debugBreakpoint(); // todo src->add("tempResulti"); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_ADD ) { _emitALUOperationBinary(shaderContext, aluInstruction, " + "); } else if( aluInstruction->opcode == ALU_OP2_INST_MUL ) { // 0*anything is always 0 _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); // if any operand is a non-zero literal or constant we can use standard multiplication bool useDefaultMul = false; if (GPU7_ALU_SRC_IS_CONST_0F(aluInstruction->sourceOperand[0].sel) || GPU7_ALU_SRC_IS_CONST_0F(aluInstruction->sourceOperand[1].sel)) { // result is always zero src->add("0.0"); } else { // multiply if (GPU7_ALU_SRC_IS_LITERAL(aluInstruction->sourceOperand[0].sel) || GPU7_ALU_SRC_IS_LITERAL(aluInstruction->sourceOperand[1].sel) || GPU7_ALU_SRC_IS_ANY_CONST(aluInstruction->sourceOperand[0].sel) || GPU7_ALU_SRC_IS_ANY_CONST(aluInstruction->sourceOperand[1].sel)) { useDefaultMul = true; } if (shaderContext->options->strictMul && useDefaultMul == false) { src->add("mul_nonIEEE("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(", "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); } else { _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(" * "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); } } _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_MUL_IEEE ) { // 0*anything according to IEEE rules _emitALUOperationBinary(shaderContext, aluInstruction, " * "); } else if (aluInstruction->opcode == ALU_OP2_INST_RECIP_IEEE) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("1.0"); src->add(" / "); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if (aluInstruction->opcode == ALU_OP2_INST_RECIP_FF) { // untested (BotW bombs) src->add("tempResultf = 1.0 / ("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(");" _CRLF); // INF becomes 0.0 src->add("if( isinf(tempResultf) == true && (as_type(tempResultf)&0x80000000) == 0 ) tempResultf = 0.0;" _CRLF); // -INF becomes -0.0 src->add("else if( isinf(tempResultf) == true && (as_type(tempResultf)&0x80000000) != 0 ) tempResultf = -0.0;" _CRLF); // assign result to output _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("tempResultf"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_IEEE || aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_CLAMPED || aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_FF ) { // todo: This should be correct but testing is needed src->add("tempResultf = 1.0 / sqrt("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(");" _CRLF); if (aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_CLAMPED) { // note: if( -INF < 0.0 ) does not resolve to true src->add("if( isinf(tempResultf) == true && (as_type(tempResultf)&0x80000000) != 0 ) tempResultf = -3.40282347E+38F;" _CRLF); src->add("else if( isinf(tempResultf) == true && (as_type(tempResultf)&0x80000000) == 0 ) tempResultf = 3.40282347E+38F;" _CRLF); } else if (aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_FF) { // untested (BotW bombs) src->add("if( isinf(tempResultf) == true && (as_type(tempResultf)&0x80000000) != 0 ) tempResultf = -0.0;" _CRLF); src->add("else if( isinf(tempResultf) == true && (as_type(tempResultf)&0x80000000) == 0 ) tempResultf = 0.0;" _CRLF); } // assign result to output _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("tempResultf"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_MAX || aluInstruction->opcode == ALU_OP2_INST_MIN || aluInstruction->opcode == ALU_OP2_INST_MAX_DX10 || aluInstruction->opcode == ALU_OP2_INST_MIN_DX10 ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); if( aluInstruction->opcode == ALU_OP2_INST_MAX ) src->add("max"); else if( aluInstruction->opcode == ALU_OP2_INST_MIN ) src->add("min"); else if (aluInstruction->opcode == ALU_OP2_INST_MAX_DX10) src->add("max"); else if (aluInstruction->opcode == ALU_OP2_INST_MIN_DX10) src->add("min"); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(", "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_FLOOR || aluInstruction->opcode == ALU_OP2_INST_FRACT || aluInstruction->opcode == ALU_OP2_INST_TRUNC ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); if( aluInstruction->opcode == ALU_OP2_INST_FLOOR ) src->add("floor"); else if( aluInstruction->opcode == ALU_OP2_INST_FRACT ) src->add("fract"); else src->add("trunc"); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_LOG_CLAMPED || aluInstruction->opcode == ALU_OP2_INST_LOG_IEEE ) { src->add("tempResultf = max(0.0, "); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(");" _CRLF); src->add("tempResultf = log2(tempResultf);" _CRLF); if( aluInstruction->opcode == ALU_OP2_INST_LOG_CLAMPED ) { src->add("if( isinf(tempResultf) == true ) tempResultf = -3.40282347E+38F;" _CRLF); } // assign result to output _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("tempResultf"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_RNDNE ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("rint("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_EXP_IEEE ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("exp2"); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_SQRT_IEEE ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("sqrt"); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_SIN || aluInstruction->opcode == ALU_OP2_INST_COS ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); if( aluInstruction->opcode == ALU_OP2_INST_SIN ) src->add("sin"); else src->add("cos"); src->add("(("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")/0.1591549367)"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_FLT_TO_INT ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add("int"); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_FLT_TO_UINT ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_UNSIGNED_INT, outputType); src->add("uint"); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_UNSIGNED_INT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_INT_TO_FLOAT ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("float("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(")"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_UINT_TO_FLOAT ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("float("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_UNSIGNED_INT); src->add(")"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if (aluInstruction->opcode == ALU_OP2_INST_AND_INT) _emitALUOperationBinary(shaderContext, aluInstruction, " & "); else if (aluInstruction->opcode == ALU_OP2_INST_OR_INT) _emitALUOperationBinary(shaderContext, aluInstruction, " | "); else if (aluInstruction->opcode == ALU_OP2_INST_XOR_INT) _emitALUOperationBinary(shaderContext, aluInstruction, " ^ "); else if( aluInstruction->opcode == ALU_OP2_INST_NOT_INT ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add("~("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(")"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_ADD_INT ) _emitALUOperationBinary(shaderContext, aluInstruction, " + "); else if( aluInstruction->opcode == ALU_OP2_INST_MAX_INT || aluInstruction->opcode == ALU_OP2_INST_MIN_INT || aluInstruction->opcode == ALU_OP2_INST_MAX_UINT || aluInstruction->opcode == ALU_OP2_INST_MIN_UINT) { // not verified bool isUnsigned = aluInstruction->opcode == ALU_OP2_INST_MAX_UINT || aluInstruction->opcode == ALU_OP2_INST_MIN_UINT; auto opType = isUnsigned ? LATTE_DECOMPILER_DTYPE_UNSIGNED_INT : LATTE_DECOMPILER_DTYPE_SIGNED_INT; _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, opType, outputType); if( aluInstruction->opcode == ALU_OP2_INST_MAX_INT || aluInstruction->opcode == ALU_OP2_INST_MAX_UINT ) src->add("max("); else src->add("min("); _emitOperandInputCode(shaderContext, aluInstruction, 0, opType); src->add(", "); _emitOperandInputCode(shaderContext, aluInstruction, 1, opType); _emitTypeConversionSuffixMSL(shaderContext, opType, outputType); src->add(");" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_SUB_INT ) { // note: The AMD doc says src1 is on the left side but tests indicate otherwise. It's src0 - src1. _emitALUOperationBinary(shaderContext, aluInstruction, " - "); } else if (aluInstruction->opcode == ALU_OP2_INST_MULLO_INT) _emitALUOperationBinary(shaderContext, aluInstruction, " * "); else if (aluInstruction->opcode == ALU_OP2_INST_MULLO_UINT) _emitALUOperationBinary(shaderContext, aluInstruction, " * "); else if( aluInstruction->opcode == ALU_OP2_INST_LSHL_INT ) _emitALUOperationBinary(shaderContext, aluInstruction, " << "); else if( aluInstruction->opcode == ALU_OP2_INST_LSHR_INT ) _emitALUOperationBinary(shaderContext, aluInstruction, " >> "); else if( aluInstruction->opcode == ALU_OP2_INST_ASHR_INT ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(" >> "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_SIGNED_INT); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_SETGT || aluInstruction->opcode == ALU_OP2_INST_SETGE || aluInstruction->opcode == ALU_OP2_INST_SETNE || aluInstruction->opcode == ALU_OP2_INST_SETE ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); if( aluInstruction->opcode == ALU_OP2_INST_SETGT ) src->add(" > "); else if( aluInstruction->opcode == ALU_OP2_INST_SETGE ) src->add(" >= "); else if (aluInstruction->opcode == ALU_OP2_INST_SETNE) src->add(" != "); else if (aluInstruction->opcode == ALU_OP2_INST_SETE) src->add(" == "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")?1.0:0.0"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_SETGT_DX10 || aluInstruction->opcode == ALU_OP2_INST_SETE_DX10 || aluInstruction->opcode == ALU_OP2_INST_SETNE_DX10 || aluInstruction->opcode == ALU_OP2_INST_SETGE_DX10 ) { if( aluInstruction->omod != 0 ) debugBreakpoint(); _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add("(("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); if( aluInstruction->opcode == ALU_OP2_INST_SETE_DX10 ) src->add(" == "); else if( aluInstruction->opcode == ALU_OP2_INST_SETNE_DX10 ) src->add(" != "); else if( aluInstruction->opcode == ALU_OP2_INST_SETGT_DX10 ) src->add(" > "); else if( aluInstruction->opcode == ALU_OP2_INST_SETGE_DX10 ) src->add(" >= "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")?-1:0)"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";"); src->add(_CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_SETE_INT || aluInstruction->opcode == ALU_OP2_INST_SETNE_INT || aluInstruction->opcode == ALU_OP2_INST_SETGT_INT || aluInstruction->opcode == ALU_OP2_INST_SETGE_INT ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT); if( aluInstruction->opcode == ALU_OP2_INST_SETE_INT ) src->add(" == "); else if( aluInstruction->opcode == ALU_OP2_INST_SETNE_INT ) src->add(" != "); else if( aluInstruction->opcode == ALU_OP2_INST_SETGT_INT ) src->add(" > "); else if( aluInstruction->opcode == ALU_OP2_INST_SETGE_INT ) src->add(" >= "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(")?-1:0"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_SETGE_UINT || aluInstruction->opcode == ALU_OP2_INST_SETGT_UINT ) { // todo: Unsure if the result is unsigned or signed _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add("("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_UNSIGNED_INT); if( aluInstruction->opcode == ALU_OP2_INST_SETGE_UINT ) src->add(" >= "); else if( aluInstruction->opcode == ALU_OP2_INST_SETGT_UINT ) src->add(" > "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_UNSIGNED_INT); src->add(")?int(0xFFFFFFFF):int(0x0)"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_PRED_SETGT || aluInstruction->opcode == ALU_OP2_INST_PRED_SETGE || aluInstruction->opcode == ALU_OP2_INST_PRED_SETE || aluInstruction->opcode == ALU_OP2_INST_PRED_SETNE || aluInstruction->opcode == ALU_OP2_INST_PRED_SETNE_INT || aluInstruction->opcode == ALU_OP2_INST_PRED_SETE_INT || aluInstruction->opcode == ALU_OP2_INST_PRED_SETGE_INT || aluInstruction->opcode == ALU_OP2_INST_PRED_SETGT_INT ) { cemu_assert_debug(aluInstruction->writeMask == 0); bool isIntPred = (aluInstruction->opcode == ALU_OP2_INST_PRED_SETNE_INT) || (aluInstruction->opcode == ALU_OP2_INST_PRED_SETE_INT) || (aluInstruction->opcode == ALU_OP2_INST_PRED_SETGE_INT) || (aluInstruction->opcode == ALU_OP2_INST_PRED_SETGT_INT); src->add("predResult"); src->add(" = ("); _emitOperandInputCode(shaderContext, aluInstruction, 0, isIntPred?LATTE_DECOMPILER_DTYPE_SIGNED_INT:LATTE_DECOMPILER_DTYPE_FLOAT); if (aluInstruction->opcode == ALU_OP2_INST_PRED_SETE || aluInstruction->opcode == ALU_OP2_INST_PRED_SETE_INT) src->add(" == "); else if (aluInstruction->opcode == ALU_OP2_INST_PRED_SETGT || aluInstruction->opcode == ALU_OP2_INST_PRED_SETGT_INT) src->add(" > "); else if (aluInstruction->opcode == ALU_OP2_INST_PRED_SETGE || aluInstruction->opcode == ALU_OP2_INST_PRED_SETGE_INT) src->add(" >= "); else if (aluInstruction->opcode == ALU_OP2_INST_PRED_SETNE || aluInstruction->opcode == ALU_OP2_INST_PRED_SETNE_INT) src->add(" != "); else cemu_assert_debug(false); _emitOperandInputCode(shaderContext, aluInstruction, 1, isIntPred?LATTE_DECOMPILER_DTYPE_SIGNED_INT:LATTE_DECOMPILER_DTYPE_FLOAT); src->add(");" _CRLF); // handle result of predicate instruction based on current ALU clause type if( cfInstruction->type == GPU7_CF_INST_ALU_PUSH_BEFORE ) { src->addFmt("{} = predResult;" _CRLF, _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth)); src->addFmt("{} = predResult == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth)); } else if( cfInstruction->type == GPU7_CF_INST_ALU_BREAK ) { // leave current loop src->add("if( predResult == false ) break;" _CRLF); } else cemu_assert_debug(false); } else if( aluInstruction->opcode == ALU_OP2_INST_KILLE_INT || aluInstruction->opcode == ALU_OP2_INST_KILLNE_INT || aluInstruction->opcode == ALU_OP2_INST_KILLGT_INT) { src->add("if( "); src->add(" ("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT); if( aluInstruction->opcode == ALU_OP2_INST_KILLE_INT ) src->add(" == "); else if (aluInstruction->opcode == ALU_OP2_INST_KILLNE_INT) src->add(" != "); else if (aluInstruction->opcode == ALU_OP2_INST_KILLGT_INT) src->add(" > "); else debugBreakpoint(); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(")"); src->add(") discard_fragment();"); src->add(_CRLF); } else if( aluInstruction->opcode == ALU_OP2_INST_KILLGT || aluInstruction->opcode == ALU_OP2_INST_KILLGE || aluInstruction->opcode == ALU_OP2_INST_KILLE ) { src->add("if( "); src->add(" ("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); if( aluInstruction->opcode == ALU_OP2_INST_KILLGT ) src->add(" > "); else if( aluInstruction->opcode == ALU_OP2_INST_KILLGE ) src->add(" >= "); else if( aluInstruction->opcode == ALU_OP2_INST_KILLE ) src->add(" == "); else debugBreakpoint(); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); src->add(") discard_fragment();"); src->add(_CRLF); } else { src->add("Unsupported instruction;" _CRLF); debug_printf("Unsupported ALU op2 instruction 0x%x\n", aluInstruction->opcode); shaderContext->shader->hasError = true; } } static void _emitALUOP3InstructionCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, LatteDecompilerALUInstruction* aluInstruction) { StringBuf* src = shaderContext->shaderSource; cemu_assert_debug(aluInstruction->destRel == 0); // todo sint32 outputType = _getALUInstructionOutputDataType(shaderContext, aluInstruction); /* check for common no-op or mov-like instructions */ if (aluInstruction->opcode == ALU_OP3_INST_CMOVGE || aluInstruction->opcode == ALU_OP3_INST_CMOVE || aluInstruction->opcode == ALU_OP3_INST_CMOVGT || aluInstruction->opcode == ALU_OP3_INST_CNDE_INT || aluInstruction->opcode == ALU_OP3_INST_CNDGT_INT || aluInstruction->opcode == ALU_OP3_INST_CMOVGE_INT) { if (_isSameGPROperand(aluInstruction, 1, 2) && !_operandHasModifiers(aluInstruction, 1)) { // the condition is irrelevant as both operands are the same _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitOperandInputCode(shaderContext, aluInstruction, 1, outputType); src->add(";" _CRLF); return; } } /* generic handlers */ if( aluInstruction->opcode == ALU_OP3_INST_MULADD || aluInstruction->opcode == ALU_OP3_INST_MULADD_D2 || aluInstruction->opcode == ALU_OP3_INST_MULADD_M2 || aluInstruction->opcode == ALU_OP3_INST_MULADD_M4 || aluInstruction->opcode == ALU_OP3_INST_MULADD_IEEE ) { // todo: The difference between MULADD and MULADD IEEE is that the former has 0*anything=0 rule similar to MUL/MUL_IEEE? _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); if (aluInstruction->opcode != ALU_OP3_INST_MULADD) // avoid unnecessary parenthesis to improve code readability slightly src->add("("); bool useDefaultMul = false; if (GPU7_ALU_SRC_IS_LITERAL(aluInstruction->sourceOperand[0].sel) || GPU7_ALU_SRC_IS_LITERAL(aluInstruction->sourceOperand[1].sel) || GPU7_ALU_SRC_IS_ANY_CONST(aluInstruction->sourceOperand[0].sel) || GPU7_ALU_SRC_IS_ANY_CONST(aluInstruction->sourceOperand[1].sel)) { useDefaultMul = true; } if (aluInstruction->opcode == ALU_OP3_INST_MULADD_IEEE) useDefaultMul = true; if (shaderContext->options->strictMul && useDefaultMul == false) { src->add("mul_nonIEEE("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); } else { _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(" * "); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); } src->add(" + "); _emitOperandInputCode(shaderContext, aluInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); if(aluInstruction->opcode != ALU_OP3_INST_MULADD) src->add(")"); if( aluInstruction->opcode == ALU_OP3_INST_MULADD_D2 ) src->add("/2.0"); else if( aluInstruction->opcode == ALU_OP3_INST_MULADD_M2 ) src->add("*2.0"); else if( aluInstruction->opcode == ALU_OP3_INST_MULADD_M4 ) src->add("*4.0"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if(aluInstruction->opcode == ALU_OP3_INST_CNDE_INT || aluInstruction->opcode == ALU_OP3_INST_CNDGT_INT || aluInstruction->opcode == ALU_OP3_INST_CMOVGE_INT) { bool requiresFloatResult = (aluInstruction->sourceOperand[1].neg != 0) || (aluInstruction->sourceOperand[2].neg != 0); _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, requiresFloatResult?LATTE_DECOMPILER_DTYPE_FLOAT:LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add("(("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT); if (aluInstruction->opcode == ALU_OP3_INST_CNDE_INT) src->add(" == "); else if (aluInstruction->opcode == ALU_OP3_INST_CNDGT_INT) src->add(" > "); else if (aluInstruction->opcode == ALU_OP3_INST_CMOVGE_INT) src->add(" >= "); src->add("0)?("); _emitOperandInputCode(shaderContext, aluInstruction, 1, requiresFloatResult?LATTE_DECOMPILER_DTYPE_FLOAT:LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add("):("); _emitOperandInputCode(shaderContext, aluInstruction, 2, requiresFloatResult?LATTE_DECOMPILER_DTYPE_FLOAT:LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add("))"); _emitTypeConversionSuffixMSL(shaderContext, requiresFloatResult?LATTE_DECOMPILER_DTYPE_FLOAT:LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";" _CRLF); } else if( aluInstruction->opcode == ALU_OP3_INST_CMOVGE || aluInstruction->opcode == ALU_OP3_INST_CMOVE || aluInstruction->opcode == ALU_OP3_INST_CMOVGT ) { _emitInstructionOutputVariableName(shaderContext, aluInstruction); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add("(("); _emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); if (aluInstruction->opcode == ALU_OP3_INST_CMOVE) src->add(" == "); else if (aluInstruction->opcode == ALU_OP3_INST_CMOVGE) src->add(" >= "); else if (aluInstruction->opcode == ALU_OP3_INST_CMOVGT) src->add(" > "); src->add("0.0)?("); _emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add("):("); _emitOperandInputCode(shaderContext, aluInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add("))"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";" _CRLF); } else { src->add("Unsupported instruction;" _CRLF); debug_printf("Unsupported ALU op3 instruction 0x%x\n", aluInstruction->opcode); shaderContext->shader->hasError = true; } } static void _emitALUReductionInstructionCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluRedcInstruction[4]) { StringBuf* src = shaderContext->shaderSource; if( aluRedcInstruction[0]->isOP3 == false && (aluRedcInstruction[0]->opcode == ALU_OP2_INST_DOT4 || aluRedcInstruction[0]->opcode == ALU_OP2_INST_DOT4_IEEE) ) { // todo: Figure out and implement the difference between normal DOT4 and DOT4_IEEE sint32 outputType = _getALUInstructionOutputDataType(shaderContext, aluRedcInstruction[0]); _emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[0]); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); // dot(float4(op0),float4(op1)) src->add("dot(float4("); _emitOperandInputCode(shaderContext, aluRedcInstruction[0], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[1], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[2], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[3], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add("),float4("); _emitOperandInputCode(shaderContext, aluRedcInstruction[0], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[1], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[2], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[3], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add("))"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); } else if( aluRedcInstruction[0]->isOP3 == false && (aluRedcInstruction[0]->opcode == ALU_OP2_INST_CUBE) ) { /* * How the CUBE instruction works (guessed mostly, based on DirectX/OpenGL spec): Input: float4, 3d direction vector (can be unnormalized) + w component (which can be ignored, since it only scales the vector but does not affect the direction) First we figure out the major axis (closest axis-aligned vector). There are six possible vectors: +rx 0 -rx 1 +ry 2 -ry 3 +rz 4 -rz 5 The major axis vector is calculated by looking at the largest (absolute) 3d vector component and then setting the other components to 0.0 The value that remains in the axis vector is referred to as 'MajorAxis' by the AMD documentation. The S,T coordinates are taken from the other two components. Example: -0.5,0.2,0.4 -> -rx -> -0.5,0.0,0.0 MajorAxis: -0.5, S: 0.2 T: 0.4 The CUBE reduction instruction requires a specific mapping for the input vector: src0 = Rn.zzxy src1 = Rn.yxzz It's probably related to the way the instruction works internally? If we look at the individual components per ALU unit: z y -> Compare y/z z x -> Compare x/z x z -> Compare x/z y z -> Compare y/z */ sint32 outputType; src->add("redcCUBE("); src->add("float4("); _emitOperandInputCode(shaderContext, aluRedcInstruction[0], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[1], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[2], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[3], 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add("),"); src->add("float4("); _emitOperandInputCode(shaderContext, aluRedcInstruction[0], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[1], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[2], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluRedcInstruction[3], 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add("),"); src->add("cubeMapSTM,cubeMapFaceId);" _CRLF); // dst.X (S) outputType = _getALUInstructionOutputDataType(shaderContext, aluRedcInstruction[0]); _emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[0]); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("cubeMapSTM.x"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); // dst.Y (T) outputType = _getALUInstructionOutputDataType(shaderContext, aluRedcInstruction[1]); _emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[1]); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("cubeMapSTM.y"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); // dst.Z (MajorAxis) outputType = _getALUInstructionOutputDataType(shaderContext, aluRedcInstruction[2]); _emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[2]); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add("cubeMapSTM.z"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType); src->add(";" _CRLF); // dst.W (FaceId) outputType = _getALUInstructionOutputDataType(shaderContext, aluRedcInstruction[3]); _emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[3]); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add("cubeMapFaceId"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType); src->add(";" _CRLF); } else cemu_assert_unimplemented(); } static void _emitALUClauseRegisterBackupCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, sint32 startIndex) { sint32 instructionGroupIndex = cfInstruction->instructionsALU[startIndex].instructionGroupIndex; size_t groupSize = 1; while ((startIndex + groupSize) < cfInstruction->instructionsALU.size()) { if (instructionGroupIndex != cfInstruction->instructionsALU[startIndex + groupSize].instructionGroupIndex) break; groupSize++; } shaderContext->aluPVPSState->CreateGPRTemporaries(shaderContext, { cfInstruction->instructionsALU.data() + startIndex, groupSize }); } /* bool _isPVUsedInNextGroup(LatteDecompilerCFInstruction* cfInstruction, sint32 startIndex, sint32 pvUnit) { sint32 currentGroupIndex = cfInstruction->instructionsALU[startIndex].instructionGroupIndex; for (sint32 i = startIndex + 1; i < (sint32)cfInstruction->instructionsALU.size(); i++) { LatteDecompilerALUInstruction& aluInstructionItr = cfInstruction->instructionsALU[i]; if(aluInstructionItr.instructionGroupIndex == currentGroupIndex ) continue; if ((sint32)aluInstructionItr.instructionGroupIndex > currentGroupIndex + 1) return false; // check OP code type if (aluInstructionItr.isOP3) { // op0 if (GPU7_ALU_SRC_IS_PV(aluInstructionItr.sourceOperand[0].sel)) { uint32 chan = aluInstructionItr.sourceOperand[0].chan; if (pvUnit == chan) return true; } // op1 if (GPU7_ALU_SRC_IS_PV(aluInstructionItr.sourceOperand[1].sel)) { uint32 chan = aluInstructionItr.sourceOperand[1].chan; if (pvUnit == chan) return true; } // op2 if (GPU7_ALU_SRC_IS_PV(aluInstructionItr.sourceOperand[2].sel)) { uint32 chan = aluInstructionItr.sourceOperand[2].chan; if (pvUnit == chan) return true; } } else { // op0 if (GPU7_ALU_SRC_IS_PV(aluInstructionItr.sourceOperand[0].sel)) { uint32 chan = aluInstructionItr.sourceOperand[0].chan; if (pvUnit == chan) return true; } // op1 if (GPU7_ALU_SRC_IS_PV(aluInstructionItr.sourceOperand[1].sel)) { uint32 chan = aluInstructionItr.sourceOperand[1].chan; if (pvUnit == chan) return true; } // todo: Not all operations use both operands } } return false; } */ static void _emitFloat3(LatteDecompilerShaderContext* shaderContext, uint32 dataType, LatteDecompilerALUInstruction* aluInst0, sint32 opIdx0, LatteDecompilerALUInstruction* aluInst1, sint32 opIdx1, LatteDecompilerALUInstruction* aluInst2, sint32 opIdx2) { StringBuf* src = shaderContext->shaderSource; if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT) { src->add("float3("); _emitOperandInputCode(shaderContext, aluInst0, opIdx0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluInst1, opIdx1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitOperandInputCode(shaderContext, aluInst2, opIdx2, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); } else if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) { src->add("int3("); _emitOperandInputCode(shaderContext, aluInst0, opIdx0, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(","); _emitOperandInputCode(shaderContext, aluInst1, opIdx1, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(","); _emitOperandInputCode(shaderContext, aluInst2, opIdx2, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(")"); } else cemu_assert_unimplemented(); } static void _emitGPRVectorAssignment(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction** aluInstructions, sint32 count) { StringBuf* src = shaderContext->shaderSource; // output var name (GPR) src->add(_getRegisterVarName(shaderContext, aluInstructions[0]->destGpr, -1)); src->add("."); for (sint32 f = 0; f < count; f++) { src->add(_getElementStrByIndex(aluInstructions[f]->destElem)); } src->add(" = "); } static void _emitALUClauseCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction) { ALUClauseTemporariesState pvpsState; shaderContext->aluPVPSState = &pvpsState; StringBuf* src = shaderContext->shaderSource; LatteDecompilerALUInstruction* aluRedcInstruction[4]; size_t groupStartIndex = 0; for(size_t i=0; iinstructionsALU.size(); i++) { LatteDecompilerALUInstruction& aluInstruction = cfInstruction->instructionsALU[i]; if( aluInstruction.indexInGroup == 0 ) { src->addFmt("// {}" _CRLF, aluInstruction.instructionGroupIndex); // apply PV/PS updates for previous group if (i > 0) { pvpsState.TrackGroupOutputPVPS(shaderContext, cfInstruction->instructionsALU.data() + groupStartIndex, i - groupStartIndex); } groupStartIndex = i; // backup registers which are read after being written _emitALUClauseRegisterBackupCode(shaderContext, cfInstruction, i); } // detect reduction instructions and use a special handler bool isReductionOperation = _isReductionInstruction(&aluInstruction); if( isReductionOperation ) { cemu_assert_debug((i + 4) <= cfInstruction->instructionsALU.size()); aluRedcInstruction[0] = &aluInstruction; aluRedcInstruction[1] = &cfInstruction->instructionsALU[i + 1]; aluRedcInstruction[2] = &cfInstruction->instructionsALU[i + 2]; aluRedcInstruction[3] = &cfInstruction->instructionsALU[i + 3]; if( aluRedcInstruction[0]->isOP3 != aluRedcInstruction[1]->isOP3 || aluRedcInstruction[1]->isOP3 != aluRedcInstruction[2]->isOP3 || aluRedcInstruction[2]->isOP3 != aluRedcInstruction[3]->isOP3 ) debugBreakpoint(); if( aluRedcInstruction[0]->opcode != aluRedcInstruction[1]->opcode || aluRedcInstruction[1]->opcode != aluRedcInstruction[2]->opcode || aluRedcInstruction[2]->opcode != aluRedcInstruction[3]->opcode ) debugBreakpoint(); if( aluRedcInstruction[0]->omod != aluRedcInstruction[1]->omod || aluRedcInstruction[1]->omod != aluRedcInstruction[2]->omod || aluRedcInstruction[2]->omod != aluRedcInstruction[3]->omod ) debugBreakpoint(); if( aluRedcInstruction[0]->destClamp != aluRedcInstruction[1]->destClamp || aluRedcInstruction[1]->destClamp != aluRedcInstruction[2]->destClamp || aluRedcInstruction[2]->destClamp != aluRedcInstruction[3]->destClamp ) debugBreakpoint(); _emitALUReductionInstructionCode(shaderContext, aluRedcInstruction); i += 3; // skip the instructions that are part of the reduction operation } else /* not a reduction operation */ { if( aluInstruction.isOP3 ) { // op3 _emitALUOP3InstructionCode(shaderContext, cfInstruction, &aluInstruction); } else { // op2 if( aluInstruction.opcode == ALU_OP2_INST_NOP ) continue; // skip NOP instruction _emitALUOP2InstructionCode(shaderContext, cfInstruction, &aluInstruction); } } // handle omod sint32 outputDataType = _getALUInstructionOutputDataType(shaderContext, &aluInstruction); if( aluInstruction.omod != ALU_OMOD_NONE ) { if( outputDataType == LATTE_DECOMPILER_DTYPE_FLOAT ) { _emitInstructionOutputVariableName(shaderContext, &aluInstruction); if( aluInstruction.omod == ALU_OMOD_MUL2 ) src->add(" *= 2.0;" _CRLF); else if( aluInstruction.omod == ALU_OMOD_MUL4 ) src->add(" *= 4.0;" _CRLF); else if( aluInstruction.omod == ALU_OMOD_DIV2 ) src->add(" /= 2.0;" _CRLF); } else if( outputDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { _emitInstructionOutputVariableName(shaderContext, &aluInstruction); src->add(" = "); src->add("as_type(as_type("); // TODO: correct? _emitInstructionOutputVariableName(shaderContext, &aluInstruction); src->add(")"); if( aluInstruction.omod == 1 ) src->add(" * 2.0"); else if( aluInstruction.omod == 2 ) src->add(" * 4.0"); else if( aluInstruction.omod == 3 ) src->add(" / 2.0"); src->add(");" _CRLF); } else { cemu_assert_unimplemented(); } } // handle clamp if( aluInstruction.destClamp != 0 ) { if( outputDataType == LATTE_DECOMPILER_DTYPE_FLOAT ) { _emitInstructionOutputVariableName(shaderContext, &aluInstruction); src->add(" = clamp("); _emitInstructionOutputVariableName(shaderContext, &aluInstruction); src->add(", 0.0, 1.0);" _CRLF); } else if( outputDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { _emitInstructionOutputVariableName(shaderContext, &aluInstruction); src->add(" = clampFI32("); _emitInstructionOutputVariableName(shaderContext, &aluInstruction); src->add(");" _CRLF); } else { cemu_assert_unimplemented(); } } // handle result broadcasting for reduction instructions if( isReductionOperation ) { // reduction operations set all four PV components (todo: Needs further research. According to AMD docs, dot4 only sets PV.x? update: Unlike DOT4, CUBE sets all PV elements accordingly to their GPR output?) if( aluRedcInstruction[0]->opcode == ALU_OP2_INST_CUBE ) { // CUBE for (sint32 f = 0; f < 4; f++) { if (aluRedcInstruction[f]->writeMask != 0) continue; _emitInstructionPVPSOutputVariableName(shaderContext, aluRedcInstruction[f]); src->add(" = "); _emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[0]); src->add(";" _CRLF); } } else { // DOT4, DOT4_IEEE, etc. // reduction operation result is only set for output in redc[0], we also need to update redc[1] to redc[3] for(sint32 f=0; f<4; f++) { if( aluRedcInstruction[f]->writeMask == 0 ) _emitInstructionPVPSOutputVariableName(shaderContext, aluRedcInstruction[f]); else { if (f == 0) continue; _emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[f]); } src->add(" = "); _emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[0]); src->add(";" _CRLF); } } } } shaderContext->aluPVPSState = nullptr; } /* * Emits code to access one component (xyzw) of the texture coordinate input vector */ static void _emitTEXSampleCoordInputComponent(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction, sint32 componentIndex, sint32 interpretSrcAsType) { cemu_assert(componentIndex >= 0 && componentIndex < 4); cemu_assert_debug(interpretSrcAsType == LATTE_DECOMPILER_DTYPE_SIGNED_INT || interpretSrcAsType == LATTE_DECOMPILER_DTYPE_FLOAT); StringBuf* src = shaderContext->shaderSource; sint32 elementSel = texInstruction->textureFetch.srcSel[componentIndex]; if (elementSel < 4) { _emitRegisterChannelAccessCode(shaderContext, texInstruction->srcGpr, elementSel, interpretSrcAsType); return; } const char* resultElemTable[4] = {"x","y","z","w"}; if(interpretSrcAsType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) { if( elementSel == 4 ) src->add("as_type(0.0)"); else if( elementSel == 5 ) src->add("as_type(1.0)"); } else if(interpretSrcAsType == LATTE_DECOMPILER_DTYPE_FLOAT ) { if( elementSel == 4 ) src->add("0.0"); else if( elementSel == 5 ) src->add("1.0"); } } static const char* _texGprAccessElemTable[8] = {"x","y","z","w","_","_","_","_"}; static char* _getTexGPRAccess(LatteDecompilerShaderContext* shaderContext, sint32 gprIndex, uint32 dataType, sint8 selX, sint8 selY, sint8 selZ, sint8 selW, char* tempBuffer) { // as_type(R{}i.w) *tempBuffer = '\0'; uint8 elemCount = (selX >= 0 ? 1 : 0) + (selY >= 0 ? 1 : 0) + (selZ >= 0 ? 1 : 0) + (selW >= 0 ? 1 : 0); if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) { if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) ; // no conversion else if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT) { if (elemCount == 1) strcat(tempBuffer, "as_type("); else strcat(tempBuffer, ("as_type(").c_str()); } else cemu_assert_unimplemented(); strcat(tempBuffer, _getRegisterVarName(shaderContext, gprIndex)); // _texGprAccessElemTable strcat(tempBuffer, "."); if (selX >= 0) strcat(tempBuffer, _texGprAccessElemTable[selX]); if (selY >= 0) strcat(tempBuffer, _texGprAccessElemTable[selY]); if (selZ >= 0) strcat(tempBuffer, _texGprAccessElemTable[selZ]); if (selW >= 0) strcat(tempBuffer, _texGprAccessElemTable[selW]); if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) ; // no conversion else if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT) strcat(tempBuffer, ")"); else cemu_assert_unimplemented(); } else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) { if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) cemu_assert_unimplemented(); else if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT) ; // no conversion else cemu_assert_unimplemented(); strcat(tempBuffer, _getRegisterVarName(shaderContext, gprIndex)); // _texGprAccessElemTable strcat(tempBuffer, "."); if (selX >= 0) strcat(tempBuffer, _texGprAccessElemTable[selX]); if (selY >= 0) strcat(tempBuffer, _texGprAccessElemTable[selY]); if (selZ >= 0) strcat(tempBuffer, _texGprAccessElemTable[selZ]); if (selW >= 0) strcat(tempBuffer, _texGprAccessElemTable[selW]); if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) cemu_assert_unimplemented(); else if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT) ; // no conversion else cemu_assert_unimplemented(); } else cemu_assert_unimplemented(); return tempBuffer; } static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { StringBuf* src = shaderContext->shaderSource; if (texInstruction->textureFetch.textureIndex < 0 || texInstruction->textureFetch.textureIndex >= LATTE_NUM_MAX_TEX_UNITS) { // skip out of bounds texture unit access return; } auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex]; char tempBuffer0[32]; char tempBuffer1[32]; src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr)); src->add("."); const char* resultElemTable[4] = {"x","y","z","w"}; sint32 numWrittenElements = 0; for(sint32 f = 0; f < 4; f++) { if( texInstruction->dstSel[f] < 4 ) { src->add(resultElemTable[f]); numWrittenElements++; } else if( texInstruction->dstSel[f] == 7 ) { // masked and not written } else { debugBreakpoint(); } } // texture sampler opcode uint32 texOpcode = texInstruction->opcode; // TODO: is this needed? if (shaderContext->shaderType == LatteConst::ShaderType::Vertex) { // vertex shader forces LOD to zero, but certain sampler types don't support textureLod(...) API if (texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ) texOpcode = GPU7_TEX_INST_SAMPLE_C; } // check if offset is used bool hasOffset = false; if( texInstruction->textureFetch.offsetX != 0 || texInstruction->textureFetch.offsetY != 0 || texInstruction->textureFetch.offsetZ != 0 ) hasOffset = true; // emit sample code if (shaderContext->shader->textureIsIntegerFormat[texInstruction->textureFetch.textureIndex]) { // integer samplers if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) // uint to int { if (numWrittenElements == 1) src->add(" = int("); else shaderContext->shaderSource->addFmt(" = int{}(", numWrittenElements); } else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) { if (numWrittenElements == 1) src->add(" = as_type("); else shaderContext->shaderSource->addFmt(" = as_type(", numWrittenElements); } } else { // float samplers if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) { if (numWrittenElements == 1) src->add(" = as_type("); else shaderContext->shaderSource->addFmt(" = as_type(", numWrittenElements); } else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->add(" = ("); } bool isCompare = shaderContext->shader->textureUsesDepthCompare[texInstruction->textureFetch.textureIndex]; bool emulateCompare = (isCompare && !IsValidDepthTextureType(texDim)); bool isGather = (texOpcode == GPU7_TEX_INST_FETCH4); bool unnormalizationHandled = false; bool useTexelCoordinates = false; bool isRead = ((texOpcode == GPU7_TEX_INST_SAMPLE && (texInstruction->textureFetch.unnormalized[0] && texInstruction->textureFetch.unnormalized[1] && texInstruction->textureFetch.unnormalized[2] && texInstruction->textureFetch.unnormalized[3])) || texOpcode == GPU7_TEX_INST_LD); // handle illegal combinations if (texOpcode == GPU7_TEX_INST_FETCH4 && (texDim == Latte::E_DIM::DIM_1D || texDim == Latte::E_DIM::DIM_1D_ARRAY)) { // fetch4 is not allowed on 1D textures // seen in YWW during boss fight of Level 1-4 // todo - investigate what this returns on actual HW if (numWrittenElements == 1) shaderContext->shaderSource->add("0.0"); else shaderContext->shaderSource->addFmt("float{}(0.0)", numWrittenElements); shaderContext->shaderSource->add(");" _CRLF); return; } // Do a framebuffer fetch if possible uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex]; if (static_cast(g_renderer.get())->SupportsFramebufferFetch() && renderTargetIndex != 255) { // TODO: support comparison samplers // TODO: support swizzling src->addFmt("col{}", renderTargetIndex); } else { // sample_compare returns a float, need to convert to float4 if (isCompare) src->addFmt("float4("); if (emulateCompare) { cemu_assert_debug(!isGather); src->add("sampleCompareEmulate("); } src->addFmt("tex{}", texInstruction->textureFetch.textureIndex); if (!emulateCompare) { src->add("."); if (isRead) { if (hasOffset) cemu_assert_unimplemented(); src->add("read("); unnormalizationHandled = true; useTexelCoordinates = true; } else { if (isGather) src->add("gather"); else src->add("sample"); if (isCompare) src->add("_compare"); src->addFmt("(samplr{}, ", texInstruction->textureFetch.textureIndex); } } else { src->addFmt(", samplr{}, ", texInstruction->textureFetch.textureIndex); } // for textureGather() add shift (todo: depends on rounding mode set in sampler registers?) if (texOpcode == GPU7_TEX_INST_FETCH4) { if (texDim == Latte::E_DIM::DIM_2D) { //src->addFmt2("(vec2(-0.1) / vec2(textureSize(tex{},0).xy)) + ", texInstruction->textureIndex); // vec2(-0.00001) is minimum to break Nvidia // vec2(0.0001) is minimum to fix shadows on Intel, also fixes it on AMD (Windows and Linux) // todo - emulating coordinate rounding mode correctly is tricky // GX2 supports two modes: Truncate or rounding according to DX9 rules // Vulkan uses truncate mode when point sampling (min and mag is both nearest) otherwise it uses rounding // adding a small fixed bias is enough to avoid vendor-specific cases where small inaccuracies cause the number to get rounded down due to truncation src->addFmt("float2(0.0001) + "); } } const sint32 texCoordDataType = (texOpcode == GPU7_TEX_INST_LD) ? LATTE_DECOMPILER_DTYPE_SIGNED_INT : LATTE_DECOMPILER_DTYPE_FLOAT; if(useTexelCoordinates) { // handle integer coordinates for texelFetch if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA) { src->add("uint2("); src->add("float2("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, texCoordDataType); src->addFmt(", "); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, texCoordDataType); src->addFmt(")*supportBuffer.tex{}Scale", texInstruction->textureFetch.textureIndex); // close float2 and scale src->add("), 0"); // close int2 and lod param // todo - lod } else if (texDim == Latte::E_DIM::DIM_1D) { // VC DS games forget to initialize textures and use texel fetch on an uninitialized texture (a dim of 0 maps to 1D) src->add("uint("); src->add("float("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, (texOpcode == GPU7_TEX_INST_LD) ? LATTE_DECOMPILER_DTYPE_SIGNED_INT : LATTE_DECOMPILER_DTYPE_FLOAT); src->addFmt(")*supportBuffer.tex{}Scale.x", texInstruction->textureFetch.textureIndex); src->add("), 0"); // todo - lod } else cemu_assert_debug(false); } else /* useTexelCoordinates == false */ { // float coordinates if ( (texOpcode == GPU7_TEX_INST_SAMPLE_C || texOpcode == GPU7_TEX_INST_SAMPLE_C_L || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ) ) { // shadow sampler if (texDim == Latte::E_DIM::DIM_2D_ARRAY) { // 3 coords + compare value src->add("float2("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(", "); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add("), uint(rint("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); src->add("))"); src->addFmt(", {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer0)); } else if (texDim == Latte::E_DIM::DIM_CUBEMAP) { // 2 coords + faceId if (texInstruction->textureFetch.srcSel[0] >= 4 || texInstruction->textureFetch.srcSel[1] >= 4) { debugBreakpoint(); } src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0)); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->addFmt(")"); src->addFmt(", uint(cubeMapArrayIndex{})", texInstruction->textureFetch.textureIndex); // cubemap index } else if (texDim == Latte::E_DIM::DIM_1D) { // 1 coord + 1 unused coord (per spec) + compare value if (texInstruction->textureFetch.srcSel[0] >= 4) { debugBreakpoint(); } src->addFmt("{}, {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1)); } else { // 2 coords + compare value (as float3) if (texInstruction->textureFetch.srcSel[0] >= 4 && texInstruction->textureFetch.srcSel[1] >= 4) { debugBreakpoint(); } src->addFmt("float2({}), {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1)); } } else if(texDim == Latte::E_DIM::DIM_2D_ARRAY) { // 3 coords src->add("float2("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(", "); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add("), uint(rint("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); src->add("))"); } else if(texDim == Latte::E_DIM::DIM_3D) { // 3 coords src->add("float3("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(", "); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(", "); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); } else if( texDim == Latte::E_DIM::DIM_CUBEMAP ) { // 2 coords + faceId cemu_assert_debug(texInstruction->textureFetch.srcSel[0] < 4); cemu_assert_debug(texInstruction->textureFetch.srcSel[1] < 4); src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0)); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(")"); src->addFmt(", uint(cubeMapArrayIndex{})", texInstruction->textureFetch.textureIndex); // cubemap index } else if( texDim == Latte::E_DIM::DIM_1D ) { // 1 coord src->add(_getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0)); } else { // 2 coords src->add("float2("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(","); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); // avoid truncate to effectively round downwards on texel edges if (ActiveSettings::ForceSamplerRoundToPrecision()) src->addFmt("+ float2(1.0)/float2(tex{}.get_width(), tex{}.get_height())/512.0", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); } // lod or lod bias parameter // 1D textures don't support lod if (texDim != Latte::E_DIM::DIM_1D && texDim != Latte::E_DIM::DIM_1D_ARRAY) { if (texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LB || texOpcode == GPU7_TEX_INST_SAMPLE_C_L) { src->add(", "); if (texOpcode == GPU7_TEX_INST_SAMPLE_LB) { src->addFmt("bias({})", _FormatFloatAsConstant((float)texInstruction->textureFetch.lodBias / 16.0f)); } else { // TODO: is this correct? src->add("level("); _emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 3, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); } } else if (texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ) { src->add(", level(0.0)"); } } } // gradient parameters if (texOpcode == GPU7_TEX_INST_SAMPLE_G) { if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_1D) { src->add(", gradient2d(gradH.xy, gradV.xy)"); } else { cemu_assert_unimplemented(); } } // offset if( texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ || texOpcode == GPU7_TEX_INST_SAMPLE || texOpcode == GPU7_TEX_INST_SAMPLE_C ) { if( hasOffset ) { uint8 offsetComponentCount = 0; if( texDim == Latte::E_DIM::DIM_1D ) offsetComponentCount = 1; else if( texDim == Latte::E_DIM::DIM_2D ) offsetComponentCount = 2; else if( texDim == Latte::E_DIM::DIM_3D ) offsetComponentCount = 3; else if( texDim == Latte::E_DIM::DIM_2D_ARRAY ) offsetComponentCount = 2; else cemu_assert_unimplemented(); if( (texInstruction->textureFetch.offsetX&1) ) cemu_assert_unimplemented(); if( (texInstruction->textureFetch.offsetY&1) ) cemu_assert_unimplemented(); if ((texInstruction->textureFetch.offsetZ & 1)) cemu_assert_unimplemented(); if( offsetComponentCount == 1 ) src->addFmt(",{}", texInstruction->textureFetch.offsetX/2); else if( offsetComponentCount == 2 ) src->addFmt(",int2({},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2); else if( offsetComponentCount == 3 ) src->addFmt(",int3({},{},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2); } } // lod bias (TODO: wht?) src->add(")"); } if (isCompare) src->add(")"); if (texOpcode == GPU7_TEX_INST_SAMPLE_C || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ) { src->add("."); if (numWrittenElements > 1) { // result is copied into multiple channels for (sint32 f = 0; f < numWrittenElements; f++) { cemu_assert_debug(texInstruction->dstSel[f] == 0); // only x component is defined src->add("x"); } } else { src->add("x"); } } else { src->add("."); for (sint32 f = 0; f < 4; f++) { if (texInstruction->dstSel[f] < 4) { uint8 elemIndex = texInstruction->dstSel[f]; if (isGather) { // 's textureGather() and GPU7's FETCH4 instruction have a different order of elements // xyzw: top-left, top-right, bottom-right, bottom-left // textureGather xyzw // fetch4 yzxw // translate index from fetch4 to textureGather order static uint8 fetchToGather[4] = { 2, // x -> z 0, // y -> x 1, // z -> y 3, // w -> w }; elemIndex = fetchToGather[elemIndex]; } src->add(resultElemTable[elemIndex]); } else if (texInstruction->dstSel[f] == 7) { // masked and not written } else { cemu_assert_unimplemented(); } } } src->add(");"); // debug #ifdef CEMU_DEBUG_ASSERT if(texInstruction->opcode == GPU7_TEX_INST_LD ) src->add(" // TEX_INST_LD"); else if(texInstruction->opcode == GPU7_TEX_INST_SAMPLE ) src->add(" // TEX_INST_SAMPLE"); else if(texInstruction->opcode == GPU7_TEX_INST_SAMPLE_L ) src->add(" // TEX_INST_SAMPLE_L"); else if(texInstruction->opcode == GPU7_TEX_INST_SAMPLE_LZ ) src->add(" // TEX_INST_SAMPLE_LZ"); else if(texInstruction->opcode == GPU7_TEX_INST_SAMPLE_C ) src->add(" // TEX_INST_SAMPLE_C"); else if(texInstruction->opcode == GPU7_TEX_INST_SAMPLE_G ) src->add(" // TEX_INST_SAMPLE_G"); else src->addFmt(" // 0x{:02x}", texInstruction->opcode); if (texInstruction->opcode != texOpcode) src->addFmt(" (applied as 0x{:02x})", texOpcode); src->addFmt(" OffsetXYZ {:02x} {:02x} {:02x}", (uint8)texInstruction->textureFetch.offsetX&0xFF, (uint8)texInstruction->textureFetch.offsetY&0xFF, (uint8)texInstruction->textureFetch.offsetZ&0xFF); #endif src->add("" _CRLF); } static void _emitTEXGetTextureResInfoCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { StringBuf* src = shaderContext->shaderSource; src->addFmt("R{}", texInstruction->dstGpr); src->add("i"); src->add("."); const char* resultElemTable[4] = {"x","y","z","w"}; sint32 numWrittenElements = 0; for(sint32 f=0; f<4; f++) { if( texInstruction->dstSel[f] < 4 ) { src->add(resultElemTable[f]); numWrittenElements++; } else if( texInstruction->dstSel[f] == 7 ) { // masked and not written } else { cemu_assert_unimplemented(); } } // todo - mip index parameter? if (static_cast(g_renderer.get())->SupportsFramebufferFetch() && shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex] != 255) { // TODO: use the render target size src->addFmt(" = int4(1920, 1080, 1, 1)."); } else { auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex]; if (texDim == Latte::E_DIM::DIM_1D) src->addFmt(" = int4(tex{}.get_width(), 1, 1, 1).", texInstruction->textureFetch.textureIndex); else if (texDim == Latte::E_DIM::DIM_1D_ARRAY) src->addFmt(" = int4(tex{}.get_width(), tex{}.get_array_size(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); else if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA) src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); else if (texDim == Latte::E_DIM::DIM_2D_ARRAY) src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), tex{}.get_array_size(), 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); else { cemu_assert_debug(false); src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex); } } for(sint32 f=0; f<4; f++) { if( texInstruction->dstSel[f] < 4 ) { src->add(resultElemTable[texInstruction->dstSel[f]]); numWrittenElements++; } else if( texInstruction->dstSel[f] == 7 ) { // masked and not written } else { debugBreakpoint(); } } src->add(";" _CRLF); } static void _emitTEXGetCompTexLodCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { StringBuf* src = shaderContext->shaderSource; src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr)); src->add("."); const char* resultElemTable[4] = {"x","y","z","w"}; sint32 numWrittenElements = 0; for(sint32 f=0; f<4; f++) { if( texInstruction->dstSel[f] < 4 ) { src->add(resultElemTable[f]); numWrittenElements++; } else if( texInstruction->dstSel[f] == 7 ) { // masked and not written } else { debugBreakpoint(); } } src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType); if (static_cast(g_renderer.get())->SupportsFramebufferFetch() && shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex] != 255) { // We assume that textures accessed as framebuffer fetch are always sampled at pixel coordinates, therefore the lod would always be 0.0 src->add("float4(0.0, 0.0, 0.0, 0.0)"); } else { if (shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex] == Latte::E_DIM::DIM_CUBEMAP) { // 3 coordinates if(shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, {}.{}{}{}), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]); else src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, as_type({}.{}{}{})), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]); } else { if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, {}.{}{}), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]); else src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, as_type({}.{}{})), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]); debugBreakpoint(); } } _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType); src->add("."); for(sint32 f=0; f<4; f++) { if( texInstruction->dstSel[f] < 4 ) { src->add(resultElemTable[texInstruction->dstSel[f]]); numWrittenElements++; } else if( texInstruction->dstSel[f] == 7 ) { // masked and not written } else { debugBreakpoint(); } } src->add(";" _CRLF); } static void _emitTEXSetCubemapIndexCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { StringBuf* src = shaderContext->shaderSource; src->addFmt("cubeMapArrayIndex{}", texInstruction->textureFetch.textureIndex); const char* resultElemTable[4] = {"x","y","z","w"}; if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->addFmt(" = as_type(R{}i.{});" _CRLF, texInstruction->srcGpr, resultElemTable[texInstruction->textureFetch.srcSel[0]]); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt(" = R{}f.{};" _CRLF, texInstruction->srcGpr, resultElemTable[texInstruction->textureFetch.srcSel[0]]); else cemu_assert_unimplemented(); } static void _emitTEXGetGradientsHV(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { StringBuf* src = shaderContext->shaderSource; sint32 componentCount = 0; for (sint32 i = 0; i < 4; i++) { if (texInstruction->dstSel[i] == 7) continue; componentCount++; } src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr)); src->add("."); const char* resultElemTable[4] = { "x","y","z","w" }; sint32 numWrittenElements = 0; for (sint32 f = 0; f < 4; f++) { if (texInstruction->dstSel[f] < 4) { src->add(resultElemTable[f]); numWrittenElements++; } else if (texInstruction->dstSel[f] == 7) { // masked and not written } else { debugBreakpoint(); } } const char* funcName; if (texInstruction->opcode == GPU7_TEX_INST_GET_GRADIENTS_H) funcName = "dfdx"; else funcName = "dfdy"; src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType, componentCount); src->addFmt("{}(", funcName); _emitRegisterAccessCode(shaderContext, texInstruction->srcGpr, (componentCount >= 1) ? texInstruction->textureFetch.srcSel[0] : -1, (componentCount >= 2) ? texInstruction->textureFetch.srcSel[1] : -1, (componentCount >= 3) ? texInstruction->textureFetch.srcSel[2] : -1, (componentCount >= 4) ? texInstruction->textureFetch.srcSel[3] : -1, LATTE_DECOMPILER_DTYPE_FLOAT); src->add(")"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType); src->add(";" _CRLF); } static void _emitTEXSetGradientsHV(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { StringBuf* src = shaderContext->shaderSource; if (texInstruction->opcode == GPU7_TEX_INST_SET_GRADIENTS_H) src->add("gradH = "); else src->add("gradV = "); _emitRegisterAccessCode(shaderContext, texInstruction->srcGpr, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], texInstruction->textureFetch.srcSel[2], texInstruction->textureFetch.srcSel[3], LATTE_DECOMPILER_DTYPE_FLOAT); src->add(";" _CRLF); } static void _emitGSReadInputVFetchCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { StringBuf* src = shaderContext->shaderSource; src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr)); src->add("."); const char* resultElemTable[4] = {"x","y","z","w"}; sint32 numWrittenElements = 0; for(sint32 f=0; f<4; f++) { if( texInstruction->dstSel[f] < 4 ) { src->add(resultElemTable[f]); numWrittenElements++; } else if( texInstruction->dstSel[f] == 7 ) { // masked and not written } else { cemu_assert_unimplemented(); } } src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, shaderContext->typeTracker.defaultDataType, numWrittenElements); src->add("(objectPayload.vertexOut["); if (texInstruction->textureFetch.srcSel[0] >= 4) cemu_assert_unimplemented(); if (texInstruction->textureFetch.srcSel[1] >= 4) cemu_assert_unimplemented(); src->add("vertexIndex"); src->addFmt("].passParameterSem{}.", texInstruction->textureFetch.offset/16); for(sint32 f=0; f<4; f++) { if( texInstruction->dstSel[f] < 4 ) { src->add(resultElemTable[texInstruction->dstSel[f]]); } else if( texInstruction->dstSel[f] == 7 ) { // masked and not written } else { cemu_assert_unimplemented(); } } src->add(")"); _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, shaderContext->typeTracker.defaultDataType); src->add(";" _CRLF); } static sint32 _writeDestMaskXYZW(LatteDecompilerShaderContext* shaderContext, sint8* dstSel) { StringBuf* src = shaderContext->shaderSource; const char* resultElemTable[4] = { "x","y","z","w" }; sint32 numWrittenElements = 0; for (sint32 f = 0; f < 4; f++) { if (dstSel[f] < 4) { src->add(resultElemTable[f]); numWrittenElements++; } else if (dstSel[f] == 7) { // masked and not written } else { cemu_assert_unimplemented(); } } return numWrittenElements; } static void _emitTEXVFetchCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { // handle special case where geometry shader reads input attributes from vertex shader via ringbuffer StringBuf* src = shaderContext->shaderSource; if( texInstruction->textureFetch.textureIndex == 0x9F && shaderContext->shaderType == LatteConst::ShaderType::Geometry ) { _emitGSReadInputVFetchCode(shaderContext, texInstruction); return; } src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr)); src->add("."); _writeDestMaskXYZW(shaderContext, texInstruction->dstSel); const char* resultElemTable[4] = {"x","y","z","w"}; uint32 numWrittenElements = 0; for (sint32 f=0; f<4; f++) { if (texInstruction->dstSel[f] < 4) numWrittenElements++; } src->add(" = "); if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) { if (numWrittenElements == 1) src->add("as_type("); else src->addFmt("as_type(", numWrittenElements); } else src->add("("); src->addFmt("ubuff{}.d[", texInstruction->textureFetch.textureIndex - 0x80); if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->addFmt("{}.{}", _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]]); else src->addFmt("as_type({}.{})", _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]]); src->add("]."); for (sint32 f=0; f<4; f++) { if (texInstruction->dstSel[f] < 4) { src->add(resultElemTable[texInstruction->dstSel[f]]); } else if (texInstruction->dstSel[f] == 7) { // masked and not written } else { debugBreakpoint(); } } src->add(");" _CRLF); } static void _emitTEXReadMemCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction) { StringBuf* src = shaderContext->shaderSource; src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr)); src->add("."); sint32 count = _writeDestMaskXYZW(shaderContext, texInstruction->dstSel); src->add(" = "); if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) { if (count == 1) src->add("as_type("); else src->addFmt("as_type(", count); } else src->add("("); sint32 readCount; if (texInstruction->memRead.format == FMT_32_FLOAT) { readCount = 1; // todo src->add("0.0"); } else if (texInstruction->memRead.format == FMT_32_32_FLOAT) { readCount = 2; // todo src->add("float2(0.0,0.0)"); } else if (texInstruction->memRead.format == FMT_32_32_32_FLOAT) { readCount = 3; // todo src->add("float3(0.0,0.0,0.0)"); } else { cemu_assert_unimplemented(); } if (count < readCount) { if (count == 1) src->add(".x"); else if (count == 2) src->add(".xy"); else if (count == 3) src->add(".xyz"); } src->add(");" _CRLF); } static void _emitTEXClauseCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction) { cemu_assert_debug(cfInstruction->instructionsALU.empty()); for(auto& texInstruction : cfInstruction->instructionsTEX) { if( texInstruction.opcode == GPU7_TEX_INST_SAMPLE || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_L || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LB || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LZ || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_L || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_LZ || texInstruction.opcode == GPU7_TEX_INST_FETCH4 || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_G || texInstruction.opcode == GPU7_TEX_INST_LD ) _emitTEXSampleTextureCode(shaderContext, &texInstruction); else if( texInstruction.opcode == GPU7_TEX_INST_GET_TEXTURE_RESINFO ) _emitTEXGetTextureResInfoCode(shaderContext, &texInstruction); else if( texInstruction.opcode == GPU7_TEX_INST_GET_COMP_TEX_LOD ) _emitTEXGetCompTexLodCode(shaderContext, &texInstruction); else if( texInstruction.opcode == GPU7_TEX_INST_SET_CUBEMAP_INDEX ) _emitTEXSetCubemapIndexCode(shaderContext, &texInstruction); else if (texInstruction.opcode == GPU7_TEX_INST_GET_GRADIENTS_H || texInstruction.opcode == GPU7_TEX_INST_GET_GRADIENTS_V) _emitTEXGetGradientsHV(shaderContext, &texInstruction); else if (texInstruction.opcode == GPU7_TEX_INST_SET_GRADIENTS_H || texInstruction.opcode == GPU7_TEX_INST_SET_GRADIENTS_V) _emitTEXSetGradientsHV(shaderContext, &texInstruction); else if (texInstruction.opcode == GPU7_TEX_INST_VFETCH) _emitTEXVFetchCode(shaderContext, &texInstruction); else if (texInstruction.opcode == GPU7_TEX_INST_MEM) _emitTEXReadMemCode(shaderContext, &texInstruction); else cemu_assert_unimplemented(); } } // generate the code for reading the source input GPR (or constants) for exports static void _emitExportGPRReadCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, sint32 requiredType, uint32 burstIndex) { StringBuf* src = shaderContext->shaderSource; uint32 numOutputs = 4; if( cfInstruction->type == GPU7_CF_INST_MEM_RING_WRITE ) { numOutputs = (cfInstruction->memWriteCompMask&1)?1:0; numOutputs += (cfInstruction->memWriteCompMask&2)?1:0; numOutputs += (cfInstruction->memWriteCompMask&4)?1:0; numOutputs += (cfInstruction->memWriteCompMask&8)?1:0; } if (requiredType == LATTE_DECOMPILER_DTYPE_FLOAT) { if(numOutputs == 1) src->add("float("); else src->addFmt("float{}(", numOutputs); } else if (requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) { if (numOutputs == 1) src->add("int("); else src->addFmt("int{}(", numOutputs); } else cemu_assert_unimplemented(); sint32 actualOutputs = 0; for(sint32 i=0; i<4; i++) { // todo: Use type of register element based on information from type tracker (currently we assume it's always a signed integer) uint32 exportSel = 0; if( cfInstruction->type == GPU7_CF_INST_MEM_RING_WRITE ) { exportSel = i; if( (cfInstruction->memWriteCompMask&(1<exportComponentSel[i]; } if( actualOutputs > 0 ) src->add(", "); actualOutputs++; if( exportSel < 4 ) { _emitRegisterAccessCode(shaderContext, cfInstruction->exportSourceGPR+burstIndex, exportSel, -1, -1, -1, requiredType); } else if (exportSel == 4) { // constant zero src->add("0"); } else if (exportSel == 5) { // constant one src->add("1.0"); } else if( exportSel == 7 ) { // element masked (which means 0 is exported?) src->add("0"); } else { cemu_assert_debug(false); src->add("0"); } } if( requiredType == LATTE_DECOMPILER_DTYPE_FLOAT ) src->add(")"); else if( requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT ) src->add(")"); else cemu_assert_unimplemented(); } static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction) { StringBuf* src = shaderContext->shaderSource; src->add("// export" _CRLF); if(shaderContext->shaderType == LatteConst::ShaderType::Vertex ) { if (!shaderContext->contextRegistersNew->IsRasterizationEnabled()) { src->add("// Rasterization disabled" _CRLF); return; } if( cfInstruction->exportBurstCount != 0 ) debugBreakpoint(); if (cfInstruction->exportType == 1 && cfInstruction->exportArrayBase == GPU7_DECOMPILER_CF_EXPORT_BASE_POSITION) { // export position // GX2 special state 0 disables rasterizer viewport offset and scaling (probably, exact mechanism is not known). Handle this here bool hasAnyViewportScaleDisabled = !shaderContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_X_SCALE_ENA() || !shaderContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Y_SCALE_ENA() || !shaderContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Z_SCALE_ENA(); if (hasAnyViewportScaleDisabled) { src->add("float4 finalPos = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0); src->add(";" _CRLF); src->add("finalPos.xy = finalPos.xy * supportBuffer.windowSpaceToClipSpaceTransform - float2(1.0,1.0);" _CRLF); src->add("SET_POSITION(finalPos);"); } else { src->add("SET_POSITION("); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0); src->add(");" _CRLF); } } else if (cfInstruction->exportType == 1 && cfInstruction->exportArrayBase == GPU7_DECOMPILER_CF_EXPORT_POINT_SIZE ) { // export gl_PointSize if (shaderContext->analyzer.outputPointSize) { cemu_assert_debug(shaderContext->analyzer.writesPointSize); src->add("out.pointSize = ("); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0); src->add(").x"); src->add(";" _CRLF); } } else if( cfInstruction->exportType == 2 && cfInstruction->exportArrayBase < 32 ) { // export parameter sint32 paramIndex = cfInstruction->exportArrayBase; uint32 vsSemanticId = _getVertexShaderOutParamSemanticId(shaderContext->contextRegisters, paramIndex); if (vsSemanticId != 0xFF) { src->addFmt("out.passParameterSem{} = ", vsSemanticId); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0); src->add(";" _CRLF); } else { src->add("// skipped export to semanticId 255" _CRLF); } } else cemu_assert_unimplemented(); } else if(shaderContext->shaderType == LatteConst::ShaderType::Pixel ) { if( cfInstruction->exportType == 0 && cfInstruction->exportArrayBase < 8 ) { for(uint32 i=0; i<(cfInstruction->exportBurstCount+1); i++) { sint32 pixelColorOutputIndex = LatteDecompiler_getColorOutputIndexFromExportIndex(shaderContext, cfInstruction->exportArrayBase+i); // if color output is for target 0, then also handle alpha test bool alphaTestEnable = shaderContext->contextRegistersNew->SX_ALPHA_TEST_CONTROL.get_ALPHA_TEST_ENABLE(); auto alphaTestFunc = shaderContext->contextRegistersNew->SX_ALPHA_TEST_CONTROL.get_ALPHA_FUNC(); if( pixelColorOutputIndex == 0 && alphaTestEnable && alphaTestFunc == Latte::E_COMPAREFUNC::NEVER ) { // never pass alpha test src->add("discard_fragment();" _CRLF); } else if( pixelColorOutputIndex == 0 && alphaTestEnable && alphaTestFunc != Latte::E_COMPAREFUNC::ALWAYS) { src->add("if( (("); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i); src->add(").a "); switch( alphaTestFunc ) { case Latte::E_COMPAREFUNC::LESS: src->add("<"); break; case Latte::E_COMPAREFUNC::EQUAL: src->add("=="); break; case Latte::E_COMPAREFUNC::LEQUAL: src->add("<="); break; case Latte::E_COMPAREFUNC::GREATER: src->add(">"); break; case Latte::E_COMPAREFUNC::NOTEQUAL: src->add("!="); break; case Latte::E_COMPAREFUNC::GEQUAL: src->add(">="); break; } src->add(" supportBuffer.alphaTestRef"); src->add(") == false) discard_fragment();" _CRLF); } // pixel color output auto dataType = GetColorBufferDataType(pixelColorOutputIndex, *shaderContext->contextRegistersNew); if (dataType != MetalDataType::NONE) { src->addFmt("out.passPixelColor{} = as_type<{}>(", pixelColorOutputIndex, GetDataTypeStr(dataType)); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i); src->add(");" _CRLF); } if( cfInstruction->exportArrayBase+i >= 8 ) cemu_assert_unimplemented(); } } else if( cfInstruction->exportType == 0 && cfInstruction->exportArrayBase == 61 ) { // pixel depth or gl_FragStencilRefARB if( cfInstruction->exportBurstCount > 0 ) cemu_assert_unimplemented(); if (cfInstruction->exportComponentSel[0] == 7) { cemu_assert_unimplemented(); // gl_FragDepth ? } if (cfInstruction->exportComponentSel[1] != 7) { cemu_assert_unimplemented(); // exporting to gl_FragStencilRefARB } if (cfInstruction->exportComponentSel[2] != 7) { cemu_assert_unimplemented(); // ukn } if (cfInstruction->exportComponentSel[3] != 7) { cemu_assert_unimplemented(); // ukn } if (!shaderContext->shader->depthMask) return; src->add("out.passDepth = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0); src->add(".x"); src->add(";" _CRLF); } else cemu_assert_unimplemented(); } } static void _emitXYZWByMask(StringBuf* src, uint32 mask) { if( (mask&(1<<0)) != 0 ) src->add("x"); if( (mask&(1<<1)) != 0 ) src->add("y"); if( (mask&(1<<2)) != 0 ) src->add("z"); if( (mask&(1<<3)) != 0 ) src->add("w"); } static void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction) { StringBuf* src = shaderContext->shaderSource; // calculate parameter output (based on ring buffer output offset relative to GS unit) uint32 bytesPerVertex = shaderContext->contextRegisters[mmSQ_GS_VERT_ITEMSIZE] * 4; bytesPerVertex = std::max(bytesPerVertex, (uint32)1); // avoid division by zero uint32 parameterOffset = ((cfInstruction->exportArrayBase * 4) % bytesPerVertex); // for geometry shaders with streamout, MEM_RING_WRITE is used to pass the data to the copy shader, which then uses STREAM*_WRITE if (shaderContext->shaderType == LatteConst::ShaderType::Geometry && shaderContext->analyzer.hasStreamoutEnable) { // if streamout is enabled, we generate transform feedback output code instead of the normal gs output for (uint32 burstIndex = 0; burstIndex < (cfInstruction->exportBurstCount + 1); burstIndex++) { parameterOffset = ((cfInstruction->exportArrayBase * 4 + burstIndex*0x10) % bytesPerVertex); // find matching stream write in copy shader LatteGSCopyShaderStreamWrite_t* streamWrite = nullptr; for (auto& it : shaderContext->parsedGSCopyShader->list_streamWrites) { if (it.offset == parameterOffset) { streamWrite = ⁢ break; } } if (streamWrite == nullptr) { cemu_assert_suspicious(); return; } for (sint32 i = 0; i < 4; i++) { if ((cfInstruction->memWriteCompMask&(1 << i)) == 0) continue; uint32 u32Offset = streamWrite->exportArrayBase + i; src->addFmt("sb[sbBase{} + {}]", streamWrite->bufferIndex, u32Offset); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, shaderContext->typeTracker.defaultDataType, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->addFmt("{}.", _getRegisterVarName(shaderContext, cfInstruction->exportSourceGPR+burstIndex)); if (i == 0) src->add("x"); else if (i == 1) src->add("y"); else if (i == 2) src->add("z"); else if (i == 3) src->add("w"); _emitTypeConversionSuffixMSL(shaderContext, shaderContext->typeTracker.defaultDataType, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(";" _CRLF); } } return; } if (shaderContext->shaderType == LatteConst::ShaderType::Vertex) { if (!shaderContext->contextRegistersNew->IsRasterizationEnabled()) { src->add("// Rasterization disabled" _CRLF); return; } if (cfInstruction->memWriteElemSize != 3) cemu_assert_unimplemented(); if ((cfInstruction->exportArrayBase & 3) != 0) cemu_assert_unimplemented(); for (sint32 burstIndex = 0; burstIndex < (sint32)(cfInstruction->exportBurstCount + 1); burstIndex++) { src->addFmt("out.passParameterSem{}.", (cfInstruction->exportArrayBase) / 4 + burstIndex); _emitXYZWByMask(src, cfInstruction->memWriteCompMask); src->addFmt(" = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_SIGNED_INT, burstIndex); src->add(";" _CRLF); } } else if (shaderContext->shaderType == LatteConst::ShaderType::Geometry) { cemu_assert_debug(cfInstruction->memWriteElemSize == 3); //if (cfInstruction->memWriteElemSize != 3) // debugBreakpoint(); cemu_assert_debug((cfInstruction->exportArrayBase & 3) == 0); for (uint32 burstIndex = 0; burstIndex < (cfInstruction->exportBurstCount + 1); burstIndex++) { uint32 parameterExportType = 0; uint32 parameterExportBase = 0; if (LatteGSCopyShaderParser_getExportTypeByOffset(shaderContext->parsedGSCopyShader, parameterOffset + burstIndex * (cfInstruction->memWriteElemSize+1)*4, ¶meterExportType, ¶meterExportBase) == false) { cemu_assert_debug(false); shaderContext->hasError = true; return; } if (parameterExportType == 1 && parameterExportBase == GPU7_DECOMPILER_CF_EXPORT_BASE_POSITION) { src->add("{" _CRLF); src->addFmt("float4 pos = float4(0.0,0.0,0.0,1.0);" _CRLF); src->addFmt("pos."); _emitXYZWByMask(src, cfInstruction->memWriteCompMask); src->addFmt(" = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, burstIndex); src->add(";" _CRLF); src->add("SET_POSITION(pos);" _CRLF); src->add("}" _CRLF); } else if (parameterExportType == 2 && parameterExportBase < 16) { src->addFmt("out.passParameterSem{}.", parameterExportBase); _emitXYZWByMask(src, cfInstruction->memWriteCompMask); src->addFmt(" = "); _emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, burstIndex); src->add(";" _CRLF); } else cemu_assert_debug(false); } } else debugBreakpoint(); // todo } static void _emitStreamWriteCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction) { StringBuf* src = shaderContext->shaderSource; if (shaderContext->analyzer.hasStreamoutEnable == false) { #ifdef CEMU_DEBUG_ASSERT src->add("// omitted streamout write" _CRLF); #endif return; } uint32 streamoutBufferIndex; if (cfInstruction->type == GPU7_CF_INST_MEM_STREAM0_WRITE) streamoutBufferIndex = 0; else if (cfInstruction->type == GPU7_CF_INST_MEM_STREAM1_WRITE) streamoutBufferIndex = 1; else cemu_assert_unimplemented(); if (shaderContext->shaderType == LatteConst::ShaderType::Vertex) { uint32 arraySize = cfInstruction->memWriteArraySize + 1; for (sint32 i = 0; i < (sint32)arraySize; i++) { if ((cfInstruction->memWriteCompMask&(1 << i)) == 0) continue; uint32 u32Offset = cfInstruction->exportArrayBase + i; src->addFmt("sb[sbBase{} + {}]", streamoutBufferIndex, u32Offset); src->add(" = "); _emitTypeConversionPrefixMSL(shaderContext, shaderContext->typeTracker.defaultDataType, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(_getRegisterVarName(shaderContext, cfInstruction->exportSourceGPR)); _appendChannelAccess(src, i); _emitTypeConversionSuffixMSL(shaderContext, shaderContext->typeTracker.defaultDataType, LATTE_DECOMPILER_DTYPE_SIGNED_INT); src->add(";" _CRLF); } } else cemu_assert_debug(false); } static void _emitCFCall(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction) { StringBuf* src = shaderContext->shaderSource; uint32 subroutineAddr = cfInstruction->addr; LatteDecompilerSubroutineInfo* subroutineInfo = nullptr; // find subroutine for (auto& subroutineItr : shaderContext->list_subroutines) { if (subroutineItr.cfAddr == subroutineAddr) { subroutineInfo = &subroutineItr; break; } } if (subroutineInfo == nullptr) { cemu_assert_debug(false); return; } // inline function if (shaderContext->isSubroutine) { cemu_assert_debug(false); // inlining with cascaded function calls not supported return; } // init CF stack variables src->addFmt("activeMaskStackSub{:04x}[0] = true;" _CRLF, subroutineInfo->cfAddr); src->addFmt("activeMaskStackCSub{:04x}[0] = true;" _CRLF, subroutineInfo->cfAddr); src->addFmt("activeMaskStackCSub{:04x}[1] = true;" _CRLF, subroutineInfo->cfAddr); shaderContext->isSubroutine = true; shaderContext->subroutineInfo = subroutineInfo; for(auto& cfInstruction : subroutineInfo->instructions) LatteDecompiler_emitClauseCodeMSL(shaderContext, &cfInstruction, true); shaderContext->isSubroutine = false; shaderContext->subroutineInfo = nullptr; } void LatteDecompiler_emitClauseCodeMSL(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, bool isSubroutine) { StringBuf* src = shaderContext->shaderSource; if( cfInstruction->type == GPU7_CF_INST_ALU || cfInstruction->type == GPU7_CF_INST_ALU_PUSH_BEFORE || cfInstruction->type == GPU7_CF_INST_ALU_POP_AFTER || cfInstruction->type == GPU7_CF_INST_ALU_POP2_AFTER || cfInstruction->type == GPU7_CF_INST_ALU_BREAK || cfInstruction->type == GPU7_CF_INST_ALU_ELSE_AFTER ) { // emit ALU code if (shaderContext->analyzer.modifiesPixelActiveState) { if(cfInstruction->type == GPU7_CF_INST_ALU_PUSH_BEFORE) src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1 - 1)); else src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1)); } if (cfInstruction->type == GPU7_CF_INST_ALU_PUSH_BEFORE) { src->addFmt("{} = {};" _CRLF, _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth-1)); src->addFmt("{} = {};" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth)); } _emitALUClauseCode(shaderContext, cfInstruction); if( shaderContext->analyzer.modifiesPixelActiveState ) src->add("}" _CRLF); cemu_assert_debug(!(shaderContext->analyzer.modifiesPixelActiveState == false && cfInstruction->type != GPU7_CF_INST_ALU)); // handle ELSE case of PUSH_BEFORE if( cfInstruction->type == GPU7_CF_INST_ALU_PUSH_BEFORE ) { src->add("else {" _CRLF); src->addFmt("{} = false;" _CRLF, _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth)); src->addFmt("{} = false;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1)); src->add("}" _CRLF); } // post clause handler if( cfInstruction->type == GPU7_CF_INST_ALU_POP_AFTER ) { src->addFmt("{} = {} == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1 - 1), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth - 1), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth - 1)); } else if( cfInstruction->type == GPU7_CF_INST_ALU_POP2_AFTER ) { src->addFmt("{} = {} == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1 - 2), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth - 2), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth - 2)); } else if( cfInstruction->type == GPU7_CF_INST_ALU_ELSE_AFTER ) { // no condition test // pop stack if( cfInstruction->popCount != 0 ) debugBreakpoint(); // else operation src->addFmt("{} = {} == false;" _CRLF, _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth)); src->addFmt("{} = {} == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth)); } } else if( cfInstruction->type == GPU7_CF_INST_TEX ) { // emit TEX code if (shaderContext->analyzer.modifiesPixelActiveState) { src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth+1)); } _emitTEXClauseCode(shaderContext, cfInstruction); if (shaderContext->analyzer.modifiesPixelActiveState) { src->add("}" _CRLF); } } else if( cfInstruction->type == GPU7_CF_INST_EXPORT || cfInstruction->type == GPU7_CF_INST_EXPORT_DONE ) { // emit export code _emitExportCode(shaderContext, cfInstruction); } else if( cfInstruction->type == GPU7_CF_INST_ELSE ) { // todo: Condition test, popCount? src->addFmt("{} = {} == false;" _CRLF, _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth)); src->addFmt("{} = {} == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth)); } else if( cfInstruction->type == GPU7_CF_INST_POP ) { src->addFmt("{} = {} == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1 - cfInstruction->popCount), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth - cfInstruction->popCount), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth - cfInstruction->popCount)); } else if( cfInstruction->type == GPU7_CF_INST_LOOP_START_DX10 || cfInstruction->type == GPU7_CF_INST_LOOP_START_NO_AL) { // start of loop // if pixel is disabled, then skip loop if (ActiveSettings::ShaderPreventInfiniteLoopsEnabled()) { // with iteration limit to prevent infinite loops src->addFmt("int loopCounter{} = 0;" _CRLF, (sint32)cfInstruction->cfAddr); src->addFmt("while( {} == true && loopCounter{} < 500 )" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1), (sint32)cfInstruction->cfAddr); src->add("{" _CRLF); src->addFmt("loopCounter{}++;" _CRLF, (sint32)cfInstruction->cfAddr); } else { src->addFmt("while( {} == true )" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1)); src->add("{" _CRLF); } } else if( cfInstruction->type == GPU7_CF_INST_LOOP_END ) { // this might not always work if( cfInstruction->popCount != 0 ) debugBreakpoint(); src->add("}" _CRLF); } else if( cfInstruction->type == GPU7_CF_INST_LOOP_BREAK ) { if( cfInstruction->popCount != 0 ) debugBreakpoint(); if (shaderContext->analyzer.modifiesPixelActiveState) { src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1)); } // note: active stack level is set to the same level as the loop begin. popCount is ignored src->add("break;" _CRLF); if (shaderContext->analyzer.modifiesPixelActiveState) src->add("}" _CRLF); } else if( cfInstruction->type == GPU7_CF_INST_MEM_STREAM0_WRITE || cfInstruction->type == GPU7_CF_INST_MEM_STREAM1_WRITE ) { _emitStreamWriteCode(shaderContext, cfInstruction); } else if( cfInstruction->type == GPU7_CF_INST_MEM_RING_WRITE ) { _emitCFRingWriteCode(shaderContext, cfInstruction); } else if( cfInstruction->type == GPU7_CF_INST_EMIT_VERTEX ) { if( shaderContext->analyzer.modifiesPixelActiveState ) src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1)); // write point size if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false) src->add("out.pointSize = supportBuffer.pointSize;" _CRLF); src->add("mesh.set_vertex(vertexIndex, out);" _CRLF); src->add("vertexIndex++;" _CRLF); // increment transform feedback pointer for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++) { if (!shaderContext->output->streamoutBufferWriteMask[i]) continue; cemu_assert_debug((shaderContext->output->streamoutBufferStride[i] & 3) == 0); src->addFmt("sbBase{} += {};" _CRLF, i, shaderContext->output->streamoutBufferStride[i] / 4); } if( shaderContext->analyzer.modifiesPixelActiveState ) src->add("}" _CRLF); } else if (cfInstruction->type == GPU7_CF_INST_CALL) { _emitCFCall(shaderContext, cfInstruction); } else if (cfInstruction->type == GPU7_CF_INST_RETURN) { // todo (handle properly) } else { cemu_assert_debug(false); } } void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderContext, StringBuf* fCStr_shaderSource) { if( shaderContext->analyzer.hasRedcCUBE ) { fCStr_shaderSource->add("void redcCUBE(float4 src0, float4 src1, thread float3& stm, thread int& faceId)\r\n" "{\r\n" "// stm -> x .. s, y .. t, z .. MajorAxis*2.0\r\n" "float3 inputCoord = normalize(float3(src1.y, src1.x, src0.x));\r\n" "float rx = inputCoord.x;\r\n" "float ry = inputCoord.y;\r\n" "float rz = inputCoord.z;\r\n" "if( abs(rx) > abs(ry) && abs(rx) > abs(rz) )\r\n" "{\r\n" "stm.z = rx*2.0;\r\n" "stm.xy = float2(ry,rz); \r\n" "if( rx >= 0.0 )\r\n" "{\r\n" "faceId = 0;\r\n" "}\r\n" "else\r\n" "{\r\n" "faceId = 1;\r\n" "}\r\n" "}\r\n" "else if( abs(ry) > abs(rx) && abs(ry) > abs(rz) )\r\n" "{\r\n" "stm.z = ry*2.0;\r\n" "stm.xy = float2(rx,rz); \r\n" "if( ry >= 0.0 )\r\n" "{\r\n" "faceId = 2;\r\n" "}\r\n" "else\r\n" "{\r\n" "faceId = 3;\r\n" "}\r\n" "}\r\n" "else //if( abs(rz) > abs(ry) && abs(rz) > abs(rx) )\r\n" "{\r\n" "stm.z = rz*2.0;\r\n" "stm.xy = float2(rx,ry); \r\n" "if( rz >= 0.0 )\r\n" "{\r\n" "faceId = 4;\r\n" "}\r\n" "else\r\n" "{\r\n" "faceId = 5;\r\n" "}\r\n" "}\r\n" "}\r\n"); } if( shaderContext->analyzer.hasCubeMapTexture ) { fCStr_shaderSource->add("float3 redcCUBEReverse(float2 st, int faceId)\r\n" "{\r\n" "st.yx = st.xy;\r\n" "float3 v;\r\n" "float majorAxis = 1.0;\r\n" "if( faceId == 0 )\r\n" "{\r\n" "v.yz = (st-float2(1.5))*(majorAxis*2.0);\r\n" "v.x = 1.0;\r\n" "}\r\n" "else if( faceId == 1 )\r\n" "{\r\n" "v.yz = (st-float2(1.5))*(majorAxis*2.0);\r\n" "v.x = -1.0;\r\n" "}\r\n" "else if( faceId == 2 )\r\n" "{\r\n" "v.xz = (st-float2(1.5))*(majorAxis*2.0);\r\n" "v.y = 1.0;\r\n" "}\r\n" "else if( faceId == 3 )\r\n" "{\r\n" "v.xz = (st-float2(1.5))*(majorAxis*2.0);\r\n" "v.y = -1.0;\r\n" "}\r\n" "else if( faceId == 4 )\r\n" "{\r\n" "v.xy = (st-float2(1.5))*(majorAxis*2.0);\r\n" "v.z = 1.0;\r\n" "}\r\n" "else\r\n" "{\r\n" "v.xy = (st-float2(1.5))*(majorAxis*2.0);\r\n" "v.z = -1.0;\r\n" "}\r\n" "return v;\r\n" "}\r\n"); } // Sample compare emulate // TODO: only add when needed // TODO: lod_options overload // TODO: when the sampler has linear min mag filter, use gather and filter manually // TODO: offset? fCStr_shaderSource->add("" "template\r\n" "float sampleCompareEmulate(TextureT tex, sampler samplr, CoordT coord, float compareValue) {\r\n" "return compareValue < tex.sample(samplr, coord).x ? 1.0 : 0.0;\r\n" "}\r\n" ); // Texture calculate lod // TODO: only add when needed fCStr_shaderSource->add("" "template\r\n" "float2 textureCalculateLod(TextureT tex, sampler samplr, CoordT coord) {\r\n" "float lod = tex.calculate_unclamped_lod(samplr, coord);\r\n" "return float2(floor(lod), fract(lod));\r\n" "}\r\n"); // clamp fCStr_shaderSource->add("" "int clampFI32(int v)\r\n" "{\r\n" "if( v == 0x7FFFFFFF )\r\n" " return as_type(1.0);\r\n" "else if( v == 0xFFFFFFFF )\r\n" " return as_type(0.0);\r\n" "return as_type(clamp(as_type(v), 0.0, 1.0));\r\n" "}\r\n"); // mul non-ieee way (0*NaN/INF => 0.0) if (shaderContext->options->strictMul) { // things we tried: //fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ return mix(a*b,0.0,a==0.0||b==0.0); }" STR_LINEBREAK); //fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ return mix(vec2(a*b,0.0),vec2(0.0,0.0),(equal(vec2(a),vec2(0.0,0.0))||equal(vec2(b),vec2(0.0,0.0)))).x; }" STR_LINEBREAK); //fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; }" STR_LINEBREAK); //fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){float r = a*b;r = intBitsToFloat(floatBitsToInt(r)&(((floatBitsToInt(a) != 0) && (floatBitsToInt(b) != 0))?0xFFFFFFFF:0));return r;}" STR_LINEBREAK); works // for "min" it used to be: float mul_nonIEEE(float a, float b){ return min(a*b,min(abs(a)*3.40282347E+38F,abs(b)*3.40282347E+38F)); } if( LatteGPUState.glVendor == GLVENDOR_NVIDIA && !ActiveSettings::DumpShadersEnabled()) fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){return mix(0.0, a*b, (a != 0.0) && (b != 0.0));}" _CRLF); // compiles faster on Nvidia and also results in lower RAM usage (OpenGL) else fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; }" _CRLF); // DXKV-like: fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ return (b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b); }" _CRLF); } } #include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp" static void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* shaderContext, LatteParsedFetchShaderAttribute_t& attrib) { auto src = shaderContext->shaderSource; static const char* dsMappingTableFloat[6] = { "int(attrDecoder.x)", "int(attrDecoder.y)", "int(attrDecoder.z)", "int(attrDecoder.w)", /*"floatBitsToInt(0.0)"*/ "0", /*"floatBitsToInt(1.0)"*/ "0x3f800000" }; static const char* dsMappingTableInt[6] = { "int(attrDecoder.x)", "int(attrDecoder.y)", "int(attrDecoder.z)", "int(attrDecoder.w)", "0", "1" }; // get register index based on vtx semantic table uint32 attributeShaderLoc = 0xFFFFFFFF; for (sint32 f = 0; f < 32; f++) { if (shaderContext->contextRegisters[mmSQ_VTX_SEMANTIC_0 + f] == attrib.semanticId) { attributeShaderLoc = f; break; } } if (attributeShaderLoc == 0xFFFFFFFF) return; // attribute is not mapped to VS input uint32 registerIndex = attributeShaderLoc + 1; // R0 is skipped // is register used? if ((shaderContext->analyzer.gprUseMask[registerIndex / 8] & (1 << (registerIndex % 8))) == 0) { src->addFmt("// skipped unused attribute for r{}" _CRLF, registerIndex); return; } LatteDecompiler_emitAttributeDecodeMSL(shaderContext->shader, src, &attrib); if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->addFmt("{} = int4(", _getRegisterVarName(shaderContext, registerIndex)); else src->addFmt("{} = float4(", _getRegisterVarName(shaderContext, registerIndex)); for (sint32 f = 0; f < 4; f++) { uint8 ds = attrib.ds[f]; if (f > 0) src->add(", "); _emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, shaderContext->typeTracker.defaultDataType); if (ds >= 6) { cemu_assert_unimplemented(); ds = 4; // read as 0.0 } if (attrib.nfa != 1) { src->add(dsMappingTableFloat[ds]); } else { src->add(dsMappingTableInt[ds]); } _emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, shaderContext->typeTracker.defaultDataType); } src->add(");" _CRLF); } void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader) { bool isRectVertexShader = UseRectEmulation(*shaderContext->contextRegistersNew); bool usesGeometryShader = UseGeometryShader(*shaderContext->contextRegistersNew, shaderContext->options->usesGeometryShader); bool fetchVertexManually = (usesGeometryShader || (shaderContext->fetchShader && shaderContext->fetchShader->mtlFetchVertexManually)); StringBuf* src = new StringBuf(1024*1024*12); // reserve 12MB for generated source (we resize-to-fit at the end) shaderContext->shaderSource = src; // debug info src->addFmt("// shader {:016x}" _CRLF, shaderContext->shaderBaseHash); #ifdef CEMU_DEBUG_ASSERT src->addFmt("// usesIntegerValues: {}" _CRLF, shaderContext->analyzer.usesIntegerValues ? "true" : "false"); src->addFmt(_CRLF); #endif // include metal standard library src->add("#include " _CRLF); src->add("using namespace metal;" _CRLF); // header part (definitions for inputs and outputs) LatteDecompiler::emitHeader(shaderContext, isRectVertexShader, usesGeometryShader, fetchVertexManually); // helper functions LatteDecompiler_emitHelperFunctions(shaderContext, src); const char* functionType = ""; const char* outputTypeName = ""; switch (shader->shaderType) { case LatteConst::ShaderType::Vertex: if (fetchVertexManually) { // TODO: clean this up // fetchVertex will modify vid in case of an object shader and an indexed draw // Vertex buffers std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS "; std::string vertexBuffers = "#define VERTEX_BUFFERS "; std::string inputFetchDefinition = "VertexIn fetchVertex("; if (usesGeometryShader) inputFetchDefinition += "thread uint&"; else inputFetchDefinition += "uint"; inputFetchDefinition += " vid, uint iid"; if (usesGeometryShader) inputFetchDefinition += ", device uint* indexBuffer, uchar indexType"; inputFetchDefinition += " VERTEX_BUFFER_DEFINITIONS) {\n"; // Index buffer if (usesGeometryShader) { inputFetchDefinition += "if (indexType == 1) // UShort\n"; inputFetchDefinition += "vid = ((device ushort*)indexBuffer)[vid];\n"; inputFetchDefinition += "else if (indexType == 2) // UInt\n"; inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid];\n"; } inputFetchDefinition += "VertexIn in;\n"; for (auto& bufferGroup : shaderContext->fetchShader->bufferGroups) { std::optional fetchType; uint32 bufferIndex = bufferGroup.attributeBufferIndex; uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7; uint32 bufferStride = (shaderContext->contextRegisters[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF; for (sint32 j = 0; j < bufferGroup.attribCount; ++j) { auto& attr = bufferGroup.attrib[j]; uint32 semanticId = shaderContext->output->resourceMappingMTL.attributeMapping[attr.semanticId]; if (semanticId == (uint32)-1) continue; // attribute not used? std::string formatName; uint8 componentCount = 0; switch (GetMtlVertexFormat(attr.format)) { case MTL::VertexFormatUChar: formatName = "uchar"; componentCount = 1; break; case MTL::VertexFormatUChar2: formatName = "uchar2"; componentCount = 2; break; case MTL::VertexFormatUChar3: formatName = "uchar3"; componentCount = 3; break; case MTL::VertexFormatUChar4: formatName = "uchar4"; componentCount = 4; break; case MTL::VertexFormatUShort: formatName = "ushort"; componentCount = 1; break; case MTL::VertexFormatUShort2: formatName = "ushort2"; componentCount = 2; break; case MTL::VertexFormatUShort3: formatName = "ushort3"; componentCount = 3; break; case MTL::VertexFormatUShort4: formatName = "ushort4"; componentCount = 4; break; case MTL::VertexFormatUInt: formatName = "uint"; componentCount = 1; break; case MTL::VertexFormatUInt2: formatName = "uint2"; componentCount = 2; break; case MTL::VertexFormatUInt3: formatName = "uint3"; componentCount = 3; break; case MTL::VertexFormatUInt4: formatName = "uint4"; componentCount = 4; break; } // Get the fetch type std::string fetchTypeStr; if (attr.fetchType == LatteConst::VertexFetchType2::VERTEX_DATA) fetchTypeStr = "vid"; else if (attr.fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA) fetchTypeStr = "iid"; else if (attr.fetchType == LatteConst::VertexFetchType2::NO_INDEX_OFFSET_DATA) fetchTypeStr = "0"; // TODO: correct? // Fetch the attribute inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = uint4(uint", semanticId); if (componentCount != 1) inputFetchDefinition += fmt::format("{}", componentCount); inputFetchDefinition += fmt::format("(*(device {}*)", formatName); inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex); inputFetchDefinition += fmt::format(" + {} * {} + {}))", fetchTypeStr, bufferStride, attr.offset); for (uint8 i = 0; i < (4 - componentCount); i++) inputFetchDefinition += ", 0"; inputFetchDefinition += ");\n"; if (fetchType.has_value()) cemu_assert_debug(fetchType == attr.fetchType); else fetchType = attr.fetchType; if (attr.fetchType == LatteConst::INSTANCE_DATA) { cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported } } // TODO: fetch type vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex)); vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex); } inputFetchDefinition += "return in;\n"; inputFetchDefinition += "}\n"; src->add(vertexBufferDefinitions.c_str()); src->add("\n"); src->add(vertexBuffers.c_str()); src->add("\n"); src->add(inputFetchDefinition.c_str()); } if (usesGeometryShader) { functionType = "[[object, max_total_threads_per_threadgroup(VERTICES_PER_VERTEX_PRIMITIVE), max_total_threadgroups_per_mesh_grid(1)]]"; outputTypeName = "void"; } else { functionType = "vertex"; if (shaderContext->contextRegistersNew->IsRasterizationEnabled()) outputTypeName = "VertexOut"; else outputTypeName = "void"; } break; case LatteConst::ShaderType::Geometry: functionType = "[[mesh, max_total_threads_per_threadgroup(1)]]"; outputTypeName = "void"; break; case LatteConst::ShaderType::Pixel: functionType = "fragment"; outputTypeName = "FragmentOut"; break; } // start of main src->addFmt("{} {} main0(", functionType, outputTypeName); LatteDecompiler::emitInputs(shaderContext, isRectVertexShader, usesGeometryShader, fetchVertexManually); src->add(") {" _CRLF); if (fetchVertexManually && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)) { if (shader->shaderType == LatteConst::ShaderType::Vertex) { if (usesGeometryShader) { // Calculate the imaginary vertex id LattePrimitiveMode vsOutPrimType = shaderContext->contextRegistersNew->VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE(); if (PrimitiveRequiresConnection(vsOutPrimType)) src->add("uint vid = tig + tid;" _CRLF); else src->add("uint vid = tig * VERTICES_PER_VERTEX_PRIMITIVE + tid;" _CRLF); src->add("uint iid = vid / supportBuffer.verticesPerInstance;" _CRLF); src->add("vid %= supportBuffer.verticesPerInstance;" _CRLF); // Fetch the input src->add("VertexIn in = fetchVertex(vid, iid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF); // Output is defined as object payload src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF); } else { // Fetch the input src->add("VertexIn in = fetchVertex(vid, iid VERTEX_BUFFERS);" _CRLF); } } else if (shader->shaderType == LatteConst::ShaderType::Geometry) { src->add("GeometryOut out;" _CRLF); // The index of the current vertex that is being emitted src->add("uint vertexIndex = 0;" _CRLF); } } if (shader->shaderType == LatteConst::ShaderType::Pixel || (shaderContext->contextRegistersNew->IsRasterizationEnabled() && !usesGeometryShader)) { src->addFmt("{} out;" _CRLF, outputTypeName); } // variable definition if (shaderContext->typeTracker.useArrayGPRs == false) { // each register is a separate variable for (sint32 i = 0; i < 128; i++) { if (shaderContext->analyzer.usesRelativeGPRRead || (shaderContext->analyzer.gprUseMask[i / 8] & (1 << (i & 7))) != 0) { if (shaderContext->typeTracker.genIntReg) src->addFmt("int4 R{}i = int4(0);" _CRLF, i); else if (shaderContext->typeTracker.genFloatReg) src->addFmt("float4 R{}f = float4(0.0);" _CRLF, i); } } } else { // registers are represented using a single large array if (shaderContext->typeTracker.genIntReg) src->addFmt("int4 Ri[128];" _CRLF); else if (shaderContext->typeTracker.genFloatReg) src->addFmt("float4 Rf[128];" _CRLF); for (sint32 i = 0; i < 128; i++) { if (shaderContext->typeTracker.genIntReg) src->addFmt("Ri[{}] = int4(0);" _CRLF, i); else if (shaderContext->typeTracker.genFloatReg) src->addFmt("Rf[{}] = float4(0.0);" _CRLF, i); } } if( shader->shaderType == LatteConst::ShaderType::Vertex ) src->addFmt("uint4 attrDecoder;" _CRLF); if (shaderContext->typeTracker.genIntReg) src->addFmt("int backupReg0i, backupReg1i, backupReg2i, backupReg3i, backupReg4i;" _CRLF); if (shaderContext->typeTracker.genFloatReg) src->addFmt("float backupReg0f, backupReg1f, backupReg2f, backupReg3f, backupReg4f;" _CRLF); if (shaderContext->typeTracker.genIntReg) { src->addFmt("int PV0ix = 0, PV0iy = 0, PV0iz = 0, PV0iw = 0, PV1ix = 0, PV1iy = 0, PV1iz = 0, PV1iw = 0;" _CRLF); src->addFmt("int PS0i = 0, PS1i = 0;" _CRLF); src->addFmt("int4 tempi = int4(0);" _CRLF); } if (shaderContext->typeTracker.genFloatReg) { src->addFmt("float PV0fx = 0.0, PV0fy = 0.0, PV0fz = 0.0, PV0fw = 0.0, PV1fx = 0.0, PV1fy = 0.0, PV1fz = 0.0, PV1fw = 0.0;" _CRLF); src->addFmt("float PS0f = 0.0, PS1f = 0.0;" _CRLF); src->addFmt("float4 tempf = float4(0.0);" _CRLF); } if (shaderContext->analyzer.hasGradientLookup) { src->add("float4 gradH;" _CRLF); src->add("float4 gradV;" _CRLF); } src->add("float tempResultf;" _CRLF); src->add("int tempResulti;" _CRLF); src->add("int4 ARi = int4(0);" _CRLF); src->add("bool predResult = true;" _CRLF); if(shaderContext->analyzer.modifiesPixelActiveState ) { src->addFmt("bool activeMaskStack[{}];" _CRLF, shaderContext->analyzer.activeStackMaxDepth+1); src->addFmt("bool activeMaskStackC[{}];" _CRLF, shaderContext->analyzer.activeStackMaxDepth+2); for (sint32 i = 0; i < shaderContext->analyzer.activeStackMaxDepth; i++) { src->addFmt("activeMaskStack[{}] = false;" _CRLF, i); } for (sint32 i = 0; i < shaderContext->analyzer.activeStackMaxDepth+1; i++) { src->addFmt("activeMaskStackC[{}] = false;" _CRLF, i); } src->addFmt("activeMaskStack[0] = true;" _CRLF); src->addFmt("activeMaskStackC[0] = true;" _CRLF); src->addFmt("activeMaskStackC[1] = true;" _CRLF); // generate vars for each subroutine for (auto& subroutineInfo : shaderContext->list_subroutines) { sint32 subroutineMaxStackDepth = 0; src->addFmt("bool activeMaskStackSub{:04x}[{}];" _CRLF, subroutineInfo.cfAddr, subroutineMaxStackDepth + 1); src->addFmt("bool activeMaskStackCSub{:04x}[{}];" _CRLF, subroutineInfo.cfAddr, subroutineMaxStackDepth + 2); } } // helper variables for cube maps (todo: Only emit when used) if (shaderContext->analyzer.hasRedcCUBE) { src->add("float3 cubeMapSTM;" _CRLF); src->add("int cubeMapFaceId;" _CRLF); } for(sint32 i=0; ioutput->textureUnitMask[i]) continue; if( shader->textureUnitDim[i] != Latte::E_DIM::DIM_CUBEMAP ) continue; src->addFmt("float cubeMapArrayIndex{} = 0.0;" _CRLF, i); } // init base offset for streamout buffer writes if (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry) { for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++) { if(!shaderContext->output->streamoutBufferWriteMask[i]) continue; cemu_assert_debug((shaderContext->output->streamoutBufferStride[i]&3) == 0); if (shader->shaderType == LatteConst::ShaderType::Vertex) // vertex shader src->addFmt("int sbBase{} = supportBuffer.streamoutBufferBase{}/4 + (vid + supportBuffer.verticesPerInstance * iid)*{};" _CRLF, i, i, shaderContext->output->streamoutBufferStride[i] / 4); else // geometry shader { uint32 gsOutPrimType = shaderContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE]; uint32 bytesPerVertex = shaderContext->contextRegisters[mmSQ_GS_VERT_ITEMSIZE] * 4; uint32 maxVerticesInGS = ((shaderContext->contextRegisters[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF) * 4) / bytesPerVertex; cemu_assert_debug(gsOutPrimType == 0); // currently we only properly handle GS output primitive points src->addFmt("int sbBase{} = supportBuffer.streamoutBufferBase{}/4 + (gl_PrimitiveIDIn * {})*{};" _CRLF, i, i, maxVerticesInGS, shaderContext->output->streamoutBufferStride[i] / 4); } } } // code to load inputs from previous stage if( shader->shaderType == LatteConst::ShaderType::Vertex ) { if( (shaderContext->analyzer.gprUseMask[0/8]&(1<<(0%8))) != 0 ) { if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->addFmt("{} = int4(vid, 0, 0, iid);" _CRLF, _getRegisterVarName(shaderContext, 0)); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt("{} = float4(vid, 0, 0, iid);" _CRLF, _getRegisterVarName(shaderContext, 0)); // TODO: as_type(float4(vid, 0, 0, iid))? else cemu_assert_unimplemented(); } LatteFetchShader* parsedFetchShader = shaderContext->fetchShader; for(auto& bufferGroup : parsedFetchShader->bufferGroups) { for(sint32 i=0; ibufferGroupsInvalid) { // these attributes point to non-existent buffers // todo - figure out how the hardware actually handles this, currently we assume the input values are zero for (sint32 i = 0; i < bufferGroup.attribCount; i++) LatteDecompiler_emitAttributeImport(shaderContext, bufferGroup.attrib[i]); } } else if (shader->shaderType == LatteConst::ShaderType::Pixel) { LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable(); uint32 psControl0 = shaderContext->contextRegisters[mmSPI_PS_IN_CONTROL_0]; uint32 psControl1 = shaderContext->contextRegisters[mmSPI_PS_IN_CONTROL_1]; uint32 spiInterpControl = shaderContext->contextRegisters[mmSPI_INTERP_CONTROL_0]; uint8 spriteEnable = (spiInterpControl >> 1) & 1; cemu_assert_debug(spriteEnable == 0); uint8 frontFace_enabled = (psControl1 >> 8) & 1; uint8 frontFace_chan = (psControl1 >> 9) & 3; uint8 frontFace_allBits = (psControl1 >> 11) & 1; uint8 frontFace_regIndex = (psControl1 >> 12) & 0x1F; // handle param_gen if (psInputTable->paramGen != 0) { cemu_assert_debug((psInputTable->paramGen) == 1); // handle the other bits (the same set of coordinates with different perspective/projection settings?) uint32 paramGenGPRIndex = psInputTable->paramGenGPR; if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt("{} = pointCoord.xyxy;" _CRLF, _getRegisterVarName(shaderContext, paramGenGPRIndex)); else src->addFmt("{} = as_type(pointCoord.xyxy);" _CRLF, _getRegisterVarName(shaderContext, paramGenGPRIndex)); } for (sint32 i = 0; i < psInputTable->count; i++) { uint32 psControl0 = shaderContext->contextRegisters[mmSPI_PS_IN_CONTROL_0]; uint32 spi0_paramGen = (psControl0 >> 15) & 0xF; sint32 gprIndex = i;// +spi0_paramGen + paramRegOffset; if ((shaderContext->analyzer.gprUseMask[gprIndex / 8] & (1 << (gprIndex % 8))) == 0 && shaderContext->analyzer.usesRelativeGPRRead == false) continue; uint32 psInputSemanticId = psInputTable->import[i].semanticId; if (psInputSemanticId == LATTE_ANALYZER_IMPORT_INDEX_SPIPOSITION) { if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt("{} = GET_FRAGCOORD();" _CRLF, _getRegisterVarName(shaderContext, gprIndex)); else src->addFmt("{} = as_type(GET_FRAGCOORD());" _CRLF, _getRegisterVarName(shaderContext, gprIndex)); continue; } if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->addFmt("{} = as_type(in.passParameterSem{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt("{} = in.passParameterSem{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId); else cemu_assert_unimplemented(); } // front facing attribute if (frontFace_enabled) { if ((shaderContext->analyzer.gprUseMask[0 / 8] & (1 << (0 % 8))) != 0) { if (frontFace_allBits) cemu_assert_debug(false); if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) src->addFmt("{}.{} = as_type(frontFacing ? 1.0 : 0.0);" _CRLF, _getRegisterVarName(shaderContext, frontFace_regIndex), _getElementStrByIndex(frontFace_chan)); else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT) src->addFmt("{}.{} = frontFacing ? 1.0 : 0.0;" _CRLF, _getRegisterVarName(shaderContext, frontFace_regIndex), _getElementStrByIndex(frontFace_chan)); else cemu_assert_debug(false); } } } for(auto& cfInstruction : shaderContext->cfInstructions) LatteDecompiler_emitClauseCodeMSL(shaderContext, &cfInstruction, false); //if(shader->shaderType == LatteConst::ShaderType::Geometry) // src->add("EndPrimitive();" _CRLF); // vertex shader should write renderstate point size at the end if required but not modified by shader if (shaderContext->analyzer.outputPointSize && !shaderContext->analyzer.writesPointSize) { if (shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader && shaderContext->contextRegistersNew->IsRasterizationEnabled()) src->add("out.pointSize = supportBuffer.pointSize;" _CRLF); } if (usesGeometryShader && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)) { if (shader->shaderType == LatteConst::ShaderType::Vertex) { src->add("if (tid == 0) {" _CRLF); src->add("meshGridProperties.set_threadgroups_per_grid(uint3(1, 1, 1));" _CRLF); src->add("}" _CRLF); } else if (shader->shaderType == LatteConst::ShaderType::Geometry) { src->add("mesh.set_primitive_count(GET_PRIMITIVE_COUNT(vertexIndex));" _CRLF); // Set indices if (shaderContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE] == 1) // Line strip { src->add("for (uint8_t i = 0; i < GET_PRIMITIVE_COUNT(vertexIndex) * 2; i++) {" _CRLF); src->add("mesh.set_index(i, (i 2 3) + i % 2);" _CRLF); src->add("}" _CRLF); } else if (shaderContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE] == 2) // Triangle strip { src->add("for (uint8_t i = 0; i < GET_PRIMITIVE_COUNT(vertexIndex) * 3; i++) {" _CRLF); src->add("mesh.set_index(i, (i / 3) + i % 3);" _CRLF); src->add("}" _CRLF); } else { src->add("for (uint8_t i = 0; i < vertexIndex; i++) {" _CRLF); src->add("mesh.set_index(i, i);" _CRLF); src->add("}" _CRLF); } } } if (shader->shaderType == LatteConst::ShaderType::Pixel || (shaderContext->contextRegistersNew->IsRasterizationEnabled() && !usesGeometryShader)) { // Return src->add("return out;" _CRLF); } // end of shader main src->add("}" _CRLF); src->shrink_to_fit(); shader->strBuf_shaderSource = src; }