mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-04 14:01:17 +12:00
4447 lines
171 KiB
C++
4447 lines
171 KiB
C++
#include "Cafe/HW/Latte/Core/LatteConst.h"
|
|
#include "Cafe/HW/Latte/Core/LatteShaderAssembly.h"
|
|
#include "Cafe/HW/Latte/ISA/RegDefines.h"
|
|
#include "Cafe/OS/libs/gx2/GX2.h" // todo - remove dependency
|
|
#include "Cafe/HW/Latte/Core/Latte.h"
|
|
#include "Cafe/HW/Latte/Core/LatteDraw.h"
|
|
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
|
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
|
|
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInternal.h"
|
|
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h"
|
|
#include "Cafe/HW/Latte/Core/FetchShader.h"
|
|
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
|
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
|
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
|
#include "config/ActiveSettings.h"
|
|
#include "util/helpers/StringBuf.h"
|
|
|
|
#include <bitset>
|
|
#include <boost/container/small_vector.hpp>
|
|
|
|
#define _CRLF "\r\n"
|
|
|
|
void LatteDecompiler_emitAttributeDecodeMSL(LatteDecompilerShader* shaderContext, StringBuf* src, LatteParsedFetchShaderAttribute_t* attrib);
|
|
|
|
/*
|
|
* Variable names:
|
|
* R0-R127 temp
|
|
* Most variables are multi-typed and the respective type is appended to the name
|
|
* Type suffixes are: f (float), i (32bit int), ui (unsigned 32bit int)
|
|
* Examples: R13ui.x, tempf.z
|
|
*/
|
|
|
|
// local prototypes
|
|
void _emitTypeConversionPrefixMSL(LatteDecompilerShaderContext* shaderContext, sint32 sourceType, sint32 destinationType, sint32 componentCount = 1);
|
|
void _emitTypeConversionSuffixMSL(LatteDecompilerShaderContext* shaderContext, sint32 sourceType, sint32 destinationType);
|
|
void LatteDecompiler_emitClauseCodeMSL(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, bool isSubroutine);
|
|
|
|
static const char* _getElementStrByIndex(uint32 channel)
|
|
{
|
|
switch (channel)
|
|
{
|
|
case 0:
|
|
return "x";
|
|
case 1:
|
|
return "y";
|
|
case 2:
|
|
return "z";
|
|
case 3:
|
|
return "w";
|
|
}
|
|
return "UNDEFINED";
|
|
}
|
|
|
|
static char _tempGenString[64][256];
|
|
static uint32 _tempGenStringIndex = 0;
|
|
|
|
static char* _getTempString()
|
|
{
|
|
char* str = _tempGenString[_tempGenStringIndex];
|
|
_tempGenStringIndex = (_tempGenStringIndex+1)%64;
|
|
return str;
|
|
}
|
|
|
|
static char* _getActiveMaskVarName(LatteDecompilerShaderContext* shaderContext, sint32 index)
|
|
{
|
|
char* varName = _getTempString();
|
|
if (shaderContext->isSubroutine)
|
|
sprintf(varName, "activeMaskStackSub%04x[%d]", shaderContext->subroutineInfo->cfAddr, index);
|
|
else
|
|
sprintf(varName, "activeMaskStack[%d]", index);
|
|
return varName;
|
|
}
|
|
|
|
static char* _getActiveMaskCVarName(LatteDecompilerShaderContext* shaderContext, sint32 index)
|
|
{
|
|
char* varName = _getTempString();
|
|
if (shaderContext->isSubroutine)
|
|
sprintf(varName, "activeMaskStackCSub%04x[%d]", shaderContext->subroutineInfo->cfAddr, index);
|
|
else
|
|
sprintf(varName, "activeMaskStackC[%d]", index);
|
|
return varName;
|
|
}
|
|
|
|
static char* _getRegisterVarName(LatteDecompilerShaderContext* shaderContext, uint32 index, sint32 destRelIndexMode=-1)
|
|
{
|
|
auto type = shaderContext->typeTracker.defaultDataType;
|
|
char* tempStr = _getTempString();
|
|
if (shaderContext->typeTracker.useArrayGPRs == false)
|
|
{
|
|
if (type == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
sprintf(tempStr, "R%di", index);
|
|
else if (type == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
sprintf(tempStr, "R%df", index);
|
|
}
|
|
else
|
|
{
|
|
char destRelOffset[32];
|
|
if (destRelIndexMode >= 0)
|
|
{
|
|
if (destRelIndexMode == GPU7_INDEX_AR_X)
|
|
strcpy(destRelOffset, "ARi.x");
|
|
else if (destRelIndexMode == GPU7_INDEX_AR_Y)
|
|
strcpy(destRelOffset, "ARi.y");
|
|
else if (destRelIndexMode == GPU7_INDEX_AR_Z)
|
|
strcpy(destRelOffset, "ARi.z");
|
|
else if (destRelIndexMode == GPU7_INDEX_AR_W)
|
|
strcpy(destRelOffset, "ARi.w");
|
|
else
|
|
debugBreakpoint();
|
|
if (type == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
{
|
|
sprintf(tempStr, "Ri[%d+%s]", index, destRelOffset);
|
|
}
|
|
else if (type == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
{
|
|
sprintf(tempStr, "Rf[%d+%s]", index, destRelOffset);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (type == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
{
|
|
sprintf(tempStr, "Ri[%d]", index);
|
|
}
|
|
else if (type == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
{
|
|
sprintf(tempStr, "Rf[%d]", index);
|
|
}
|
|
}
|
|
}
|
|
return tempStr;
|
|
}
|
|
|
|
static void _appendRegisterTypeSuffix(StringBuf* src, sint32 dataType)
|
|
{
|
|
if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
src->add("i");
|
|
else if (dataType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT)
|
|
src->add("ui");
|
|
else if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
src->add("f");
|
|
else
|
|
cemu_assert_unimplemented();
|
|
}
|
|
|
|
// appends x/y/z/w
|
|
static void _appendChannel(StringBuf* src, sint32 channelIndex)
|
|
{
|
|
cemu_assert_debug(channelIndex >= 0 && channelIndex <= 3);
|
|
switch (channelIndex)
|
|
{
|
|
case 0:
|
|
src->add("x");
|
|
return;
|
|
case 1:
|
|
src->add("y");
|
|
return;
|
|
case 2:
|
|
src->add("z");
|
|
return;
|
|
case 3:
|
|
src->add("w");
|
|
return;
|
|
}
|
|
}
|
|
|
|
// appends .x/.y/.z/.w
|
|
static void _appendChannelAccess(StringBuf* src, sint32 channelIndex)
|
|
{
|
|
cemu_assert_debug(channelIndex >= 0 && channelIndex <= 3);
|
|
switch (channelIndex)
|
|
{
|
|
case 0:
|
|
src->add(".x");
|
|
return;
|
|
case 1:
|
|
src->add(".y");
|
|
return;
|
|
case 2:
|
|
src->add(".z");
|
|
return;
|
|
case 3:
|
|
src->add(".w");
|
|
return;
|
|
}
|
|
}
|
|
|
|
static void _appendPVPS(LatteDecompilerShaderContext* shaderContext, StringBuf* src, uint32 groupIndex, uint8 aluUnit)
|
|
{
|
|
cemu_assert_debug(aluUnit < 5);
|
|
if (aluUnit == 4)
|
|
{
|
|
src->addFmt("PS{}", (groupIndex & 1));
|
|
_appendRegisterTypeSuffix(src, shaderContext->typeTracker.defaultDataType);
|
|
return;
|
|
}
|
|
src->addFmt("PV{}", (groupIndex & 1));
|
|
_appendRegisterTypeSuffix(src, shaderContext->typeTracker.defaultDataType);
|
|
_appendChannel(src, aluUnit);
|
|
}
|
|
|
|
std::string _FormatFloatAsConstant(float f)
|
|
{
|
|
char floatAsStr[64];
|
|
size_t floatAsStrLen = fmt::format_to_n(floatAsStr, 64, "{:#}", f).size;
|
|
size_t floatAsStrLenOrg = floatAsStrLen;
|
|
if(floatAsStrLen > 0 && floatAsStr[floatAsStrLen-1] == '.')
|
|
{
|
|
floatAsStr[floatAsStrLen] = '0';
|
|
floatAsStrLen++;
|
|
}
|
|
cemu_assert(floatAsStrLen < 50); // constant suspiciously long?
|
|
floatAsStr[floatAsStrLen] = '\0';
|
|
cemu_assert_debug(floatAsStrLen >= 3); // shortest possible form is "0.0"
|
|
return floatAsStr;
|
|
}
|
|
|
|
// tracks PV/PS and register backups
|
|
struct ALUClauseTemporariesState
|
|
{
|
|
struct PVPSAlias
|
|
{
|
|
enum class LOCATION_TYPE : uint8
|
|
{
|
|
LOCATION_NONE,
|
|
LOCATION_GPR,
|
|
LOCATION_PVPS,
|
|
};
|
|
|
|
LOCATION_TYPE location{ LOCATION_TYPE::LOCATION_NONE };
|
|
uint8 index; // GPR index or temporary index
|
|
uint8 aluUnit; // x,y,z,w (or 5 for PS)
|
|
|
|
void SetLocationGPR(uint8 gprIndex, uint8 channel)
|
|
{
|
|
cemu_assert_debug(channel < 4);
|
|
this->location = LOCATION_TYPE::LOCATION_GPR;
|
|
this->index = gprIndex;
|
|
this->aluUnit = channel;
|
|
}
|
|
|
|
void SetLocationPSPVTemporary(uint8 aluUnit, uint32 groupIndex)
|
|
{
|
|
cemu_assert_debug(aluUnit < 5);
|
|
this->location = LOCATION_TYPE::LOCATION_PVPS;
|
|
this->index = groupIndex & 1;
|
|
this->aluUnit = aluUnit;
|
|
}
|
|
};
|
|
|
|
struct GPRTemporary
|
|
{
|
|
GPRTemporary(uint8 gprIndex, uint8 channel, uint8 backupVarIndex) : gprIndex(gprIndex), channel(channel), backupVarIndex(backupVarIndex) {}
|
|
|
|
uint8 gprIndex;
|
|
uint8 channel;
|
|
uint8 backupVarIndex;
|
|
};
|
|
|
|
void TrackGroupOutputPVPS(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstr, size_t numInstr)
|
|
{
|
|
// unset current
|
|
for (auto& it : m_pvps)
|
|
it.location = PVPSAlias::LOCATION_TYPE::LOCATION_NONE;
|
|
for (size_t i = 0; i < numInstr; i++)
|
|
{
|
|
LatteDecompilerALUInstruction& inst = aluInstr[i];
|
|
if (!inst.isOP3 && inst.opcode == ALU_OP2_INST_NOP)
|
|
continue; // skip NOP instruction
|
|
|
|
if (inst.writeMask == 0)
|
|
{
|
|
// map to temporary
|
|
m_pvps[inst.aluUnit].SetLocationPSPVTemporary(inst.aluUnit, aluInstr->instructionGroupIndex);
|
|
}
|
|
else
|
|
{
|
|
// map to GPR
|
|
if(inst.destRel == 0) // is PV/PS set for indexed writes?
|
|
m_pvps[inst.aluUnit].SetLocationGPR(inst.destGpr, inst.destElem);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool HasPVPS(uint8 aluUnitIndex) const
|
|
{
|
|
cemu_assert_debug(aluUnitIndex < 5);
|
|
return m_pvps[aluUnitIndex].location != PVPSAlias::LOCATION_TYPE::LOCATION_NONE;
|
|
}
|
|
|
|
void EmitPVPSAccess(LatteDecompilerShaderContext* shaderContext, uint8 aluUnitIndex, uint32 currentGroupIndex) const
|
|
{
|
|
switch (m_pvps[aluUnitIndex].location)
|
|
{
|
|
case PVPSAlias::LOCATION_TYPE::LOCATION_GPR:
|
|
{
|
|
sint32 temporaryIndex = GetTemporaryForGPR(m_pvps[aluUnitIndex].index, m_pvps[aluUnitIndex].aluUnit);
|
|
if (temporaryIndex < 0)
|
|
{
|
|
shaderContext->shaderSource->add(_getRegisterVarName(shaderContext, m_pvps[aluUnitIndex].index, -1));
|
|
_appendChannelAccess(shaderContext->shaderSource, m_pvps[aluUnitIndex].aluUnit);
|
|
}
|
|
else
|
|
{
|
|
// use temporary instead of GPR
|
|
shaderContext->shaderSource->addFmt("backupReg{}", temporaryIndex);
|
|
_appendRegisterTypeSuffix(shaderContext->shaderSource, shaderContext->typeTracker.defaultDataType);
|
|
}
|
|
break;
|
|
}
|
|
case PVPSAlias::LOCATION_TYPE::LOCATION_PVPS:
|
|
_appendPVPS(shaderContext, shaderContext->shaderSource, currentGroupIndex-1, m_pvps[aluUnitIndex].aluUnit);
|
|
break;
|
|
default:
|
|
cemuLog_log(LogType::Force, "Shader {:016x} accesses PV/PS without writing to it", shaderContext->shaderBaseHash);
|
|
cemu_assert_suspicious();
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Check for GPR channels which are modified before they are read within the same group
|
|
* These registers need to be copied to a temporary
|
|
*/
|
|
void CreateGPRTemporaries(LatteDecompilerShaderContext* shaderContext, std::span<LatteDecompilerALUInstruction> aluInstructions)
|
|
{
|
|
uint8 registerChannelWriteMask[(LATTE_NUM_GPR * 4 + 7) / 8] = { 0 };
|
|
|
|
m_gprTemporaries.clear();
|
|
for (auto& aluInstruction : aluInstructions)
|
|
{
|
|
// ignore NOP instructions
|
|
if (aluInstruction.isOP3 == false && aluInstruction.opcode == ALU_OP2_INST_NOP)
|
|
continue;
|
|
cemu_assert_debug(aluInstruction.destElem <= 3);
|
|
// check if any previously written register is read
|
|
for (sint32 f = 0; f < 3; f++)
|
|
{
|
|
uint32 readGPRIndex;
|
|
uint32 readGPRChannel;
|
|
if (GPU7_ALU_SRC_IS_GPR(aluInstruction.sourceOperand[f].sel))
|
|
{
|
|
readGPRIndex = GPU7_ALU_SRC_GET_GPR_INDEX(aluInstruction.sourceOperand[f].sel);
|
|
cemu_assert_debug(aluInstruction.sourceOperand[f].chan <= 3);
|
|
readGPRChannel = aluInstruction.sourceOperand[f].chan;
|
|
}
|
|
else if (GPU7_ALU_SRC_IS_PV(aluInstruction.sourceOperand[f].sel) || GPU7_ALU_SRC_IS_PS(aluInstruction.sourceOperand[f].sel))
|
|
{
|
|
uint8 aluUnitIndex = 0;
|
|
if (GPU7_ALU_SRC_IS_PV(aluInstruction.sourceOperand[f].sel))
|
|
aluUnitIndex = aluInstruction.sourceOperand[f].chan;
|
|
else
|
|
aluUnitIndex = 4;
|
|
// if aliased to a GPR, then consider it a GPR read
|
|
if(m_pvps[aluUnitIndex].location != PVPSAlias::LOCATION_TYPE::LOCATION_GPR)
|
|
continue;
|
|
readGPRIndex = m_pvps[aluUnitIndex].index;
|
|
readGPRChannel = m_pvps[aluUnitIndex].aluUnit;
|
|
}
|
|
else
|
|
continue;
|
|
// track GPR read
|
|
if ((registerChannelWriteMask[(readGPRIndex * 4 + aluInstruction.sourceOperand[f].chan) / 8] & (1 << ((readGPRIndex * 4 + aluInstruction.sourceOperand[f].chan) % 8))) != 0)
|
|
{
|
|
// register is overwritten by previous instruction, a temporary variable is required
|
|
if (GetTemporaryForGPR(readGPRIndex, readGPRChannel) < 0)
|
|
m_gprTemporaries.emplace_back(readGPRIndex, readGPRChannel, m_gprTemporaries.size());
|
|
}
|
|
}
|
|
// track write
|
|
if (aluInstruction.writeMask != 0)
|
|
registerChannelWriteMask[(aluInstruction.destGpr * 4 + aluInstruction.destElem) / 8] |= (1 << ((aluInstruction.destGpr * 4 + aluInstruction.destElem) % 8));
|
|
}
|
|
// output code to move GPRs into temporaries
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
for (auto& it : m_gprTemporaries)
|
|
{
|
|
src->addFmt("backupReg{}", it.backupVarIndex);
|
|
_appendRegisterTypeSuffix(src, shaderContext->typeTracker.defaultDataType);
|
|
src->add(" = ");
|
|
src->add(_getRegisterVarName(shaderContext, it.gprIndex));
|
|
_appendChannelAccess(src, it.channel);
|
|
src->add(";" _CRLF);
|
|
}
|
|
}
|
|
|
|
// returns -1 if none present
|
|
sint32 GetTemporaryForGPR(uint8 gprIndex, uint8 channel) const
|
|
{
|
|
for (auto& it : m_gprTemporaries)
|
|
{
|
|
if (it.gprIndex == gprIndex && it.channel == channel)
|
|
return (sint32)it.backupVarIndex;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
private:
|
|
PVPSAlias m_pvps[5]{};
|
|
boost::container::small_vector<GPRTemporary, 4> m_gprTemporaries;
|
|
};
|
|
|
|
sint32 _getVertexShaderOutParamSemanticId(uint32* contextRegisters, sint32 index);
|
|
sint32 _getInputRegisterDataType(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex);
|
|
sint32 _getALUInstructionOutputDataType(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction);
|
|
bool _isReductionInstruction(LatteDecompilerALUInstruction* aluInstruction);
|
|
|
|
/*
|
|
* Writes the name of the output variable and channel
|
|
* E.g. R5f.x or tempf.x if writeMask is 0
|
|
*/
|
|
static void _emitInstructionOutputVariableName(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction)
|
|
{
|
|
auto src = shaderContext->shaderSource;
|
|
sint32 outputDataType = _getALUInstructionOutputDataType(shaderContext, aluInstruction);
|
|
if( aluInstruction->writeMask == 0 )
|
|
{
|
|
// does not output to GPR
|
|
if( !_isReductionInstruction(aluInstruction) )
|
|
{
|
|
// output to PV/PS
|
|
_appendPVPS(shaderContext, src, aluInstruction->instructionGroupIndex, aluInstruction->aluUnit);
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
// output to temp
|
|
src->add("temp");
|
|
_appendRegisterTypeSuffix(src, outputDataType);
|
|
}
|
|
_appendChannelAccess(src, aluInstruction->aluUnit);
|
|
}
|
|
else
|
|
{
|
|
// output to GPR. Aliasing to PV/PS happens at the end of the group
|
|
src->add(_getRegisterVarName(shaderContext, aluInstruction->destGpr, aluInstruction->destRel==0?-1:aluInstruction->indexMode));
|
|
_appendChannelAccess(src, aluInstruction->destElem);
|
|
}
|
|
}
|
|
|
|
static void _emitInstructionPVPSOutputVariableName(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction)
|
|
{
|
|
_appendPVPS(shaderContext, shaderContext->shaderSource, aluInstruction->instructionGroupIndex, aluInstruction->aluUnit);
|
|
}
|
|
|
|
static void _emitRegisterAccessCode(LatteDecompilerShaderContext* shaderContext, sint32 gprIndex, sint32 channel0, sint32 channel1, sint32 channel2, sint32 channel3, sint32 dataType = -1)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
sint32 registerElementDataType = shaderContext->typeTracker.defaultDataType;
|
|
cemu_assert_debug(gprIndex >= 0 && gprIndex <= 127);
|
|
|
|
sint32 channelArray[4];
|
|
channelArray[0] = channel0;
|
|
channelArray[1] = channel1;
|
|
channelArray[2] = channel2;
|
|
channelArray[3] = channel3;
|
|
|
|
sint32 numComponents = 0;
|
|
for (sint32 i = 0; i < 4; i++)
|
|
{
|
|
if (channelArray[i] >= 0 && channelArray[i] <= 3)
|
|
numComponents++;
|
|
}
|
|
|
|
if (dataType >= 0)
|
|
{
|
|
_emitTypeConversionPrefixMSL(shaderContext, registerElementDataType, dataType, numComponents);
|
|
}
|
|
if (shaderContext->typeTracker.useArrayGPRs)
|
|
src->add("R");
|
|
else
|
|
src->addFmt("R{}", gprIndex);
|
|
_appendRegisterTypeSuffix(src, registerElementDataType);
|
|
if (shaderContext->typeTracker.useArrayGPRs)
|
|
src->addFmt("[{}]", gprIndex);
|
|
|
|
src->add(".");
|
|
|
|
for (sint32 i = 0; i < 4; i++)
|
|
{
|
|
if (channelArray[i] >= 0 && channelArray[i] <= 3)
|
|
src->add(_getElementStrByIndex(channelArray[i]));
|
|
else if (channelArray[i] == -1)
|
|
{
|
|
// channel not used
|
|
}
|
|
else
|
|
{
|
|
cemu_assert_unimplemented();
|
|
}
|
|
}
|
|
if (dataType >= 0)
|
|
_emitTypeConversionSuffixMSL(shaderContext, registerElementDataType, dataType);
|
|
}
|
|
|
|
// optimized variant of _emitRegisterAccessCode for raw one channel reads
|
|
static void _emitRegisterChannelAccessCode(LatteDecompilerShaderContext* shaderContext, sint32 gprIndex, sint32 channel, sint32 dataType)
|
|
{
|
|
cemu_assert_debug(gprIndex >= 0 && gprIndex <= 127);
|
|
cemu_assert_debug(channel >= 0 && channel < 4);
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
sint32 registerElementDataType = shaderContext->typeTracker.defaultDataType;
|
|
_emitTypeConversionPrefixMSL(shaderContext, registerElementDataType, dataType);
|
|
if (shaderContext->typeTracker.useArrayGPRs)
|
|
src->add("R");
|
|
else
|
|
src->addFmt("R{}", gprIndex);
|
|
_appendRegisterTypeSuffix(src, registerElementDataType);
|
|
if (shaderContext->typeTracker.useArrayGPRs)
|
|
src->addFmt("[{}]", gprIndex);
|
|
src->add(".");
|
|
src->add(_getElementStrByIndex(channel));
|
|
_emitTypeConversionSuffixMSL(shaderContext, registerElementDataType, dataType);
|
|
}
|
|
|
|
static void _emitALURegisterInputAccessCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
sint32 currentRegisterElementType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex);
|
|
cemu_assert_debug(GPU7_ALU_SRC_IS_GPR(aluInstruction->sourceOperand[operandIndex].sel));
|
|
sint32 gprIndex = GPU7_ALU_SRC_GET_GPR_INDEX(aluInstruction->sourceOperand[operandIndex].sel);
|
|
sint32 temporaryIndex = shaderContext->aluPVPSState->GetTemporaryForGPR(gprIndex, aluInstruction->sourceOperand[operandIndex].chan);
|
|
if(temporaryIndex >= 0)
|
|
{
|
|
// access via backup variable
|
|
src->addFmt("backupReg{}", temporaryIndex);
|
|
_appendRegisterTypeSuffix(src, currentRegisterElementType);
|
|
}
|
|
else
|
|
{
|
|
// access via register variable
|
|
_emitRegisterAccessCode(shaderContext, gprIndex, aluInstruction->sourceOperand[operandIndex].chan, -1, -1, -1);
|
|
}
|
|
}
|
|
|
|
static void _emitPVPSAccessCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex, uint8 aluUnitIndex)
|
|
{
|
|
cemu_assert_debug(aluInstruction->instructionGroupIndex > 0); // PV/PS is uninitialized for group 0
|
|
// PV/PS vars are currently always using the default type (shaderContext->typeTracker.defaultDataType)
|
|
shaderContext->aluPVPSState->EmitPVPSAccess(shaderContext, aluUnitIndex, aluInstruction->instructionGroupIndex);
|
|
}
|
|
|
|
/*
|
|
* Emits the expression used for calculating the index for uniform access
|
|
* For static access, this is a number
|
|
* For dynamic access, this is AR.* + base
|
|
*/
|
|
static void _emitUniformAccessIndexCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
bool isUniformRegister = GPU7_ALU_SRC_IS_CFILE(aluInstruction->sourceOperand[operandIndex].sel);
|
|
sint32 uniformOffset = 0; // index into array, for relative accesses this is the base offset
|
|
if( isUniformRegister )
|
|
{
|
|
uniformOffset = GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction->sourceOperand[operandIndex].sel);
|
|
}
|
|
else
|
|
{
|
|
if( GPU7_ALU_SRC_IS_CBANK0(aluInstruction->sourceOperand[operandIndex].sel) )
|
|
{
|
|
uniformOffset = GPU7_ALU_SRC_GET_CBANK0_INDEX(aluInstruction->sourceOperand[operandIndex].sel) + aluInstruction->cfInstruction->cBank0AddrBase;
|
|
}
|
|
else
|
|
{
|
|
uniformOffset = GPU7_ALU_SRC_GET_CBANK1_INDEX(aluInstruction->sourceOperand[operandIndex].sel) + aluInstruction->cfInstruction->cBank1AddrBase;
|
|
}
|
|
}
|
|
if( aluInstruction->sourceOperand[operandIndex].rel != 0 )
|
|
{
|
|
if (aluInstruction->indexMode == GPU7_INDEX_AR_X)
|
|
src->addFmt("ARi.x+{}", uniformOffset);
|
|
else if (aluInstruction->indexMode == GPU7_INDEX_AR_Y)
|
|
src->addFmt("ARi.y+{}", uniformOffset);
|
|
else if (aluInstruction->indexMode == GPU7_INDEX_AR_Z)
|
|
src->addFmt("ARi.z+{}", uniformOffset);
|
|
else if (aluInstruction->indexMode == GPU7_INDEX_AR_W)
|
|
src->addFmt("ARi.w+{}", uniformOffset);
|
|
else
|
|
cemu_assert_unimplemented();
|
|
}
|
|
else
|
|
{
|
|
src->addFmt("{}", uniformOffset);
|
|
}
|
|
}
|
|
|
|
static void _emitUniformAccessCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex, sint32 requiredType)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
if(shaderContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_REMAPPED )
|
|
{
|
|
// uniform registers or buffers are accessed statically with predictable offsets
|
|
// find entry in remapped uniform
|
|
if( aluInstruction->sourceOperand[operandIndex].rel != 0 )
|
|
debugBreakpoint();
|
|
bool isUniformRegister = GPU7_ALU_SRC_IS_CFILE(aluInstruction->sourceOperand[operandIndex].sel);
|
|
sint32 uniformOffset = 0; // index into array
|
|
sint32 uniformBufferIndex = 0;
|
|
if( isUniformRegister )
|
|
{
|
|
uniformOffset = GPU7_ALU_SRC_GET_CFILE_INDEX(aluInstruction->sourceOperand[operandIndex].sel);
|
|
uniformBufferIndex = 0;
|
|
}
|
|
else
|
|
{
|
|
if( GPU7_ALU_SRC_IS_CBANK0(aluInstruction->sourceOperand[operandIndex].sel) )
|
|
{
|
|
uniformOffset = GPU7_ALU_SRC_GET_CBANK0_INDEX(aluInstruction->sourceOperand[operandIndex].sel) + aluInstruction->cfInstruction->cBank0AddrBase;
|
|
uniformBufferIndex = aluInstruction->cfInstruction->cBank0Index;
|
|
}
|
|
else
|
|
{
|
|
uniformOffset = GPU7_ALU_SRC_GET_CBANK1_INDEX(aluInstruction->sourceOperand[operandIndex].sel) + aluInstruction->cfInstruction->cBank1AddrBase;
|
|
uniformBufferIndex = aluInstruction->cfInstruction->cBank1Index;
|
|
}
|
|
}
|
|
LatteDecompilerRemappedUniformEntry_t* remappedUniformEntry = NULL;
|
|
for(size_t i=0; i< shaderContext->shader->list_remappedUniformEntries.size(); i++)
|
|
{
|
|
LatteDecompilerRemappedUniformEntry_t* remappedUniformEntryItr = shaderContext->shader->list_remappedUniformEntries.data() + i;
|
|
if( remappedUniformEntryItr->isRegister && isUniformRegister )
|
|
{
|
|
if( remappedUniformEntryItr->index == uniformOffset )
|
|
{
|
|
remappedUniformEntry = remappedUniformEntryItr;
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if( remappedUniformEntryItr->kcacheBankId == uniformBufferIndex && remappedUniformEntryItr->index == uniformOffset )
|
|
{
|
|
remappedUniformEntry = remappedUniformEntryItr;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
cemu_assert_debug(remappedUniformEntry);
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType);
|
|
src->addFmt("supportBuffer.remapped[{}]", remappedUniformEntry->mappedIndex);
|
|
_appendChannelAccess(src, aluInstruction->sourceOperand[operandIndex].chan);
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType);
|
|
}
|
|
else if( shaderContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CFILE )
|
|
{
|
|
// uniform registers are accessed with unpredictable (dynamic) offset
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType);
|
|
src->add("supportBuffer.uniformRegister[");
|
|
_emitUniformAccessIndexCode(shaderContext, aluInstruction, operandIndex);
|
|
src->add("]");
|
|
|
|
_appendChannelAccess(src, aluInstruction->sourceOperand[operandIndex].chan);
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType);
|
|
}
|
|
else if( shaderContext->shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK )
|
|
{
|
|
// uniform buffers are available as a whole
|
|
bool isUniformRegister = GPU7_ALU_SRC_IS_CFILE(aluInstruction->sourceOperand[operandIndex].sel);
|
|
if( isUniformRegister )
|
|
debugBreakpoint();
|
|
sint32 uniformBufferIndex = 0;
|
|
if( GPU7_ALU_SRC_IS_CBANK0(aluInstruction->sourceOperand[operandIndex].sel) )
|
|
{
|
|
uniformBufferIndex = aluInstruction->cfInstruction->cBank0Index;
|
|
}
|
|
else
|
|
{
|
|
uniformBufferIndex = aluInstruction->cfInstruction->cBank1Index;
|
|
}
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType);
|
|
src->addFmt("ubuff{}.d[", uniformBufferIndex);
|
|
_emitUniformAccessIndexCode(shaderContext, aluInstruction, operandIndex);
|
|
src->addFmt("]");
|
|
|
|
_appendChannelAccess(src, aluInstruction->sourceOperand[operandIndex].chan);
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType);
|
|
}
|
|
else
|
|
debugBreakpoint();
|
|
}
|
|
|
|
// Generates (slow) code to read an indexed GPR
|
|
static void _emitCodeToReadRelativeGPR(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex, sint32 requiredType)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
uint32 gprBaseIndex = GPU7_ALU_SRC_GET_GPR_INDEX(aluInstruction->sourceOperand[operandIndex].sel);
|
|
cemu_assert_debug(aluInstruction->sourceOperand[operandIndex].rel != 0);
|
|
|
|
if( shaderContext->typeTracker.useArrayGPRs )
|
|
{
|
|
_emitTypeConversionPrefixMSL(shaderContext, shaderContext->typeTracker.defaultDataType, requiredType);
|
|
src->add(_getRegisterVarName(shaderContext, gprBaseIndex, aluInstruction->indexMode));
|
|
_appendChannelAccess(src, aluInstruction->sourceOperand[operandIndex].chan);
|
|
_emitTypeConversionSuffixMSL(shaderContext, shaderContext->typeTracker.defaultDataType, requiredType);
|
|
return;
|
|
}
|
|
|
|
char indexAccessCode[64];
|
|
if (aluInstruction->indexMode == GPU7_INDEX_AR_X)
|
|
sprintf(indexAccessCode, "ARi.x");
|
|
else if (aluInstruction->indexMode == GPU7_INDEX_AR_Y)
|
|
sprintf(indexAccessCode, "ARi.y");
|
|
else if (aluInstruction->indexMode == GPU7_INDEX_AR_Z)
|
|
sprintf(indexAccessCode, "ARi.z");
|
|
else if (aluInstruction->indexMode == GPU7_INDEX_AR_W)
|
|
sprintf(indexAccessCode, "ARi.w");
|
|
else
|
|
cemu_assert_unimplemented();
|
|
|
|
if( LATTE_DECOMPILER_DTYPE_SIGNED_INT != requiredType )
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType);
|
|
|
|
// generated code looks like this:
|
|
// result = ((lookupIndex==0)?GPR5:(lookupIndex==1)?GPR6:(lookupIndex==2)?GPR7:...:(lookupIndex==122)?GPR127:0)
|
|
src->add("(");
|
|
for(sint32 i=gprBaseIndex; i<LATTE_NUM_GPR; i++)
|
|
{
|
|
// only emit access code for registers which are potentially written
|
|
if((shaderContext->analyzer.gprUseMask[i / 8] & (1 << (i % 8))) == 0 )
|
|
continue;
|
|
src->addFmt("({}=={})?", indexAccessCode, i-gprBaseIndex);
|
|
// code to access gpr
|
|
uint32 gprIndex = i;
|
|
src->add(_getRegisterVarName(shaderContext, i));
|
|
_appendChannelAccess(src, aluInstruction->sourceOperand[operandIndex].chan);
|
|
src->add(":");
|
|
}
|
|
src->add("0)");
|
|
if( LATTE_DECOMPILER_DTYPE_SIGNED_INT != requiredType )
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, requiredType);
|
|
}
|
|
|
|
static void _emitOperandInputCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, sint32 operandIndex, sint32 requiredType)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
if( operandIndex < 0 || operandIndex >= 3 )
|
|
debugBreakpoint();
|
|
sint32 requiredTypeOut = requiredType;
|
|
if( requiredType != LATTE_DECOMPILER_DTYPE_FLOAT && (aluInstruction->sourceOperand[operandIndex].abs != 0 || aluInstruction->sourceOperand[operandIndex].neg != 0) )
|
|
{
|
|
// we need to apply float operations on the input but it's not read as a float
|
|
// force internal required type to float and then cast it back to whatever type is actually required
|
|
requiredType = LATTE_DECOMPILER_DTYPE_FLOAT;
|
|
}
|
|
|
|
if( requiredTypeOut != requiredType )
|
|
_emitTypeConversionPrefixMSL(shaderContext, requiredType, requiredTypeOut);
|
|
|
|
if( aluInstruction->sourceOperand[operandIndex].neg != 0 )
|
|
src->add("-(");
|
|
if( aluInstruction->sourceOperand[operandIndex].abs != 0 )
|
|
src->add("abs(");
|
|
|
|
if( GPU7_ALU_SRC_IS_GPR(aluInstruction->sourceOperand[operandIndex].sel) )
|
|
{
|
|
if( aluInstruction->sourceOperand[operandIndex].rel != 0 )
|
|
{
|
|
_emitCodeToReadRelativeGPR(shaderContext, aluInstruction, operandIndex, requiredType);
|
|
}
|
|
else
|
|
{
|
|
uint32 gprIndex = GPU7_ALU_SRC_GET_GPR_INDEX(aluInstruction->sourceOperand[operandIndex].sel);
|
|
if( requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT )
|
|
{
|
|
// signed int 32bit
|
|
sint32 currentRegisterElementType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex);
|
|
// write code for register input
|
|
_emitTypeConversionPrefixMSL(shaderContext, currentRegisterElementType, requiredType);
|
|
_emitALURegisterInputAccessCode(shaderContext, aluInstruction, operandIndex);
|
|
_emitTypeConversionSuffixMSL(shaderContext, currentRegisterElementType, requiredType);
|
|
}
|
|
else if( requiredType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT )
|
|
{
|
|
// unsigned int 32bit
|
|
sint32 currentRegisterElementType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex);
|
|
if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_SIGNED_INT )
|
|
{
|
|
// need to convert from int to uint
|
|
src->add("uint(");
|
|
}
|
|
else if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT )
|
|
{
|
|
// no extra work necessary
|
|
}
|
|
else
|
|
debugBreakpoint();
|
|
// write code for register input
|
|
_emitALURegisterInputAccessCode(shaderContext, aluInstruction, operandIndex);
|
|
if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_SIGNED_INT )
|
|
{
|
|
src->add(")");
|
|
}
|
|
}
|
|
else if( requiredType == LATTE_DECOMPILER_DTYPE_FLOAT )
|
|
{
|
|
// float 32bit
|
|
sint32 currentRegisterElementType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex);
|
|
if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_SIGNED_INT )
|
|
{
|
|
// need to convert (not cast) from int bits to float
|
|
src->add("as_type<float>("); // TODO: correct?
|
|
}
|
|
else if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_FLOAT )
|
|
{
|
|
// no extra work necessary
|
|
}
|
|
else
|
|
debugBreakpoint();
|
|
// write code for register input
|
|
_emitALURegisterInputAccessCode(shaderContext, aluInstruction, operandIndex);
|
|
if( currentRegisterElementType == LATTE_DECOMPILER_DTYPE_SIGNED_INT )
|
|
{
|
|
src->add(")");
|
|
}
|
|
}
|
|
else
|
|
debugBreakpoint();
|
|
}
|
|
}
|
|
else if( GPU7_ALU_SRC_IS_CONST_0F(aluInstruction->sourceOperand[operandIndex].sel) )
|
|
{
|
|
if(requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT || requiredType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT)
|
|
src->add("0");
|
|
else if( requiredType == LATTE_DECOMPILER_DTYPE_FLOAT )
|
|
src->add("0.0");
|
|
}
|
|
else if( GPU7_ALU_SRC_IS_CONST_1F(aluInstruction->sourceOperand[operandIndex].sel) )
|
|
{
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType);
|
|
src->add("1.0");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType);
|
|
}
|
|
else if( GPU7_ALU_SRC_IS_CONST_0_5F(aluInstruction->sourceOperand[operandIndex].sel) )
|
|
{
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType);
|
|
src->add("0.5");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, requiredType);
|
|
}
|
|
else if( GPU7_ALU_SRC_IS_CONST_1I(aluInstruction->sourceOperand[operandIndex].sel) )
|
|
{
|
|
if (requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
src->add("int(1)");
|
|
else if (requiredType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT)
|
|
src->add("uint(1)");
|
|
else
|
|
cemu_assert_suspicious();
|
|
}
|
|
else if( GPU7_ALU_SRC_IS_CONST_M1I(aluInstruction->sourceOperand[operandIndex].sel) )
|
|
{
|
|
if( requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT )
|
|
src->add("int(-1)");
|
|
else
|
|
cemu_assert_suspicious();
|
|
}
|
|
else if( GPU7_ALU_SRC_IS_LITERAL(aluInstruction->sourceOperand[operandIndex].sel) )
|
|
{
|
|
if( requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT )
|
|
src->addFmt("int(0x{:x})", aluInstruction->literalData.w[aluInstruction->sourceOperand[operandIndex].chan]);
|
|
else if( requiredType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT )
|
|
src->addFmt("uint(0x{:x})", aluInstruction->literalData.w[aluInstruction->sourceOperand[operandIndex].chan]);
|
|
else if (requiredType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
{
|
|
uint32 constVal = aluInstruction->literalData.w[aluInstruction->sourceOperand[operandIndex].chan];
|
|
sint32 exponent = (constVal >> 23) & 0xFF;
|
|
exponent -= 127;
|
|
if ((constVal & 0xFF) == 0 && exponent >= -10 && exponent <= 10)
|
|
{
|
|
src->add(_FormatFloatAsConstant(*(float*)&constVal));
|
|
}
|
|
else
|
|
src->addFmt("as_type<float>(0x{:08x})", constVal);
|
|
}
|
|
}
|
|
else if( GPU7_ALU_SRC_IS_CFILE(aluInstruction->sourceOperand[operandIndex].sel) )
|
|
{
|
|
_emitUniformAccessCode(shaderContext, aluInstruction, operandIndex, requiredType);
|
|
}
|
|
else if( GPU7_ALU_SRC_IS_CBANK0(aluInstruction->sourceOperand[operandIndex].sel) ||
|
|
GPU7_ALU_SRC_IS_CBANK1(aluInstruction->sourceOperand[operandIndex].sel) )
|
|
{
|
|
_emitUniformAccessCode(shaderContext, aluInstruction, operandIndex, requiredType);
|
|
}
|
|
else if( GPU7_ALU_SRC_IS_PV(aluInstruction->sourceOperand[operandIndex].sel) )
|
|
{
|
|
sint32 currentPVDataType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex);
|
|
_emitTypeConversionPrefixMSL(shaderContext, currentPVDataType, requiredType);
|
|
_emitPVPSAccessCode(shaderContext, aluInstruction, operandIndex, aluInstruction->sourceOperand[operandIndex].chan);
|
|
_emitTypeConversionSuffixMSL(shaderContext, currentPVDataType, requiredType);
|
|
}
|
|
else if( GPU7_ALU_SRC_IS_PS(aluInstruction->sourceOperand[operandIndex].sel) )
|
|
{
|
|
sint32 currentPSDataType = _getInputRegisterDataType(shaderContext, aluInstruction, operandIndex);
|
|
_emitTypeConversionPrefixMSL(shaderContext, currentPSDataType, requiredType);
|
|
_emitPVPSAccessCode(shaderContext, aluInstruction, operandIndex, 4);
|
|
_emitTypeConversionSuffixMSL(shaderContext, currentPSDataType, requiredType);
|
|
}
|
|
else
|
|
{
|
|
cemuLog_log(LogType::Force, "Unsupported shader ALU operand sel {:#x}\n", aluInstruction->sourceOperand[operandIndex].sel);
|
|
debugBreakpoint();
|
|
}
|
|
|
|
if( aluInstruction->sourceOperand[operandIndex].abs != 0 )
|
|
src->add(")");
|
|
if( aluInstruction->sourceOperand[operandIndex].neg != 0 )
|
|
src->add(")");
|
|
|
|
if( requiredTypeOut != requiredType )
|
|
_emitTypeConversionSuffixMSL(shaderContext, requiredType, requiredTypeOut);
|
|
}
|
|
|
|
void _emitTypeConversionPrefixMSL(LatteDecompilerShaderContext* shaderContext, sint32 sourceType, sint32 destinationType, sint32 componentCount)
|
|
{
|
|
if( sourceType == destinationType )
|
|
return;
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
if (destinationType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
{
|
|
if (componentCount == 1)
|
|
src->add("as_type<int>(");
|
|
else
|
|
src->addFmt("as_type<int{}>(", componentCount);
|
|
}
|
|
else if (destinationType == LATTE_DECOMPILER_DTYPE_UNSIGNED_INT)
|
|
{
|
|
if (componentCount == 1)
|
|
src->add("as_type<uint>(");
|
|
else
|
|
src->addFmt("as_type<uint{}>(", componentCount);
|
|
}
|
|
else if (destinationType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
{
|
|
if (componentCount == 1)
|
|
src->add("as_type<float>(");
|
|
else
|
|
src->addFmt("as_type<float{}>(", componentCount);
|
|
}
|
|
else
|
|
cemu_assert_debug(false);
|
|
}
|
|
|
|
void _emitTypeConversionSuffixMSL(LatteDecompilerShaderContext* shaderContext, sint32 sourceType, sint32 destinationType)
|
|
{
|
|
if( sourceType == destinationType )
|
|
return;
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
src->add(")");
|
|
}
|
|
|
|
template<int TDataType>
|
|
static void _emitALUOperationBinary(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluInstruction, const char* operandStr)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
sint32 outputType = _getALUInstructionOutputDataType(shaderContext, aluInstruction);
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, TDataType, outputType);
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, TDataType);
|
|
src->add((char*)operandStr);
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, TDataType);
|
|
_emitTypeConversionSuffixMSL(shaderContext, TDataType, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
|
|
static bool _isSameGPROperand(LatteDecompilerALUInstruction* aluInstruction, sint32 opIndexA, sint32 opIndexB)
|
|
{
|
|
if (aluInstruction->sourceOperand[opIndexA].sel != aluInstruction->sourceOperand[opIndexB].sel)
|
|
return false;
|
|
if (!GPU7_ALU_SRC_IS_GPR(aluInstruction->sourceOperand[opIndexA].sel))
|
|
return false;
|
|
if (aluInstruction->sourceOperand[opIndexA].chan != aluInstruction->sourceOperand[opIndexB].chan)
|
|
return false;
|
|
if (aluInstruction->sourceOperand[opIndexA].abs != aluInstruction->sourceOperand[opIndexB].abs)
|
|
return false;
|
|
if (aluInstruction->sourceOperand[opIndexA].neg != aluInstruction->sourceOperand[opIndexB].neg)
|
|
return false;
|
|
if (aluInstruction->sourceOperand[opIndexA].rel != aluInstruction->sourceOperand[opIndexB].rel)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
static bool _operandHasModifiers(LatteDecompilerALUInstruction* aluInstruction, sint32 opIndex)
|
|
{
|
|
return aluInstruction->sourceOperand[opIndex].abs != 0 || aluInstruction->sourceOperand[opIndex].neg != 0;
|
|
}
|
|
|
|
static void _emitALUOP2InstructionCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, LatteDecompilerALUInstruction* aluInstruction)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
sint32 outputType = _getALUInstructionOutputDataType(shaderContext, aluInstruction); // data type of output
|
|
if( aluInstruction->opcode == ALU_OP2_INST_MOV )
|
|
{
|
|
bool requiresFloatMove = false;
|
|
requiresFloatMove = aluInstruction->sourceOperand[0].abs != 0 || aluInstruction->sourceOperand[0].neg != 0;
|
|
if( requiresFloatMove )
|
|
{
|
|
// abs/neg operations are applied to source operand, do float based move
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_MOVA_FLOOR )
|
|
{
|
|
cemu_assert_debug(aluInstruction->writeMask == 0);
|
|
cemu_assert_debug(aluInstruction->omod == 0);
|
|
src->add("tempResultf = ");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(";" _CRLF);
|
|
src->add("tempResultf = floor(tempResultf);" _CRLF);
|
|
src->add("tempResultf = clamp(tempResultf, -256.0, 255.0);" _CRLF);
|
|
// set AR
|
|
if( aluInstruction->destElem == 0 )
|
|
src->add("ARi.x = int(tempResultf);" _CRLF);
|
|
else if( aluInstruction->destElem == 1 )
|
|
src->add("ARi.y = int(tempResultf);" _CRLF);
|
|
else if( aluInstruction->destElem == 2 )
|
|
src->add("ARi.z = int(tempResultf);" _CRLF);
|
|
else
|
|
src->add("ARi.w = int(tempResultf);" _CRLF);
|
|
// set output
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
if( outputType != LATTE_DECOMPILER_DTYPE_SIGNED_INT )
|
|
debugBreakpoint(); // todo
|
|
src->add("as_type<int>(tempResultf)");
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_MOVA_INT )
|
|
{
|
|
cemu_assert_debug(aluInstruction->writeMask == 0);
|
|
cemu_assert_debug(aluInstruction->omod == 0);
|
|
src->add("tempResulti = ");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
src->add(";" _CRLF);
|
|
src->add("tempResulti = clamp(tempResulti, -256, 255);" _CRLF);
|
|
// set AR
|
|
if( aluInstruction->destElem == 0 )
|
|
src->add("ARi.x = tempResulti;" _CRLF);
|
|
else if( aluInstruction->destElem == 1 )
|
|
src->add("ARi.y = tempResulti;" _CRLF);
|
|
else if( aluInstruction->destElem == 2 )
|
|
src->add("ARi.z = tempResulti;" _CRLF);
|
|
else
|
|
src->add("ARi.w = tempResulti;" _CRLF);
|
|
// set output
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
if( outputType != LATTE_DECOMPILER_DTYPE_SIGNED_INT )
|
|
debugBreakpoint(); // todo
|
|
src->add("tempResulti");
|
|
src->add(";" _CRLF);
|
|
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_ADD )
|
|
{
|
|
_emitALUOperationBinary<LATTE_DECOMPILER_DTYPE_FLOAT>(shaderContext, aluInstruction, " + ");
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_MUL )
|
|
{
|
|
// 0*anything is always 0
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
|
|
// if any operand is a non-zero literal or constant we can use standard multiplication
|
|
bool useDefaultMul = false;
|
|
if (GPU7_ALU_SRC_IS_CONST_0F(aluInstruction->sourceOperand[0].sel) || GPU7_ALU_SRC_IS_CONST_0F(aluInstruction->sourceOperand[1].sel))
|
|
{
|
|
// result is always zero
|
|
src->add("0.0");
|
|
}
|
|
else
|
|
{
|
|
// multiply
|
|
if (GPU7_ALU_SRC_IS_LITERAL(aluInstruction->sourceOperand[0].sel) || GPU7_ALU_SRC_IS_LITERAL(aluInstruction->sourceOperand[1].sel) ||
|
|
GPU7_ALU_SRC_IS_ANY_CONST(aluInstruction->sourceOperand[0].sel) || GPU7_ALU_SRC_IS_ANY_CONST(aluInstruction->sourceOperand[1].sel))
|
|
{
|
|
useDefaultMul = true;
|
|
}
|
|
if (shaderContext->options->strictMul && useDefaultMul == false)
|
|
{
|
|
src->add("mul_nonIEEE(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(", ");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(")");
|
|
}
|
|
else
|
|
{
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(" * ");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
}
|
|
}
|
|
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_MUL_IEEE )
|
|
{
|
|
// 0*anything according to IEEE rules
|
|
_emitALUOperationBinary<LATTE_DECOMPILER_DTYPE_FLOAT>(shaderContext, aluInstruction, " * ");
|
|
}
|
|
else if (aluInstruction->opcode == ALU_OP2_INST_RECIP_IEEE)
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add("1.0");
|
|
src->add(" / ");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if (aluInstruction->opcode == ALU_OP2_INST_RECIP_FF)
|
|
{
|
|
// untested (BotW bombs)
|
|
src->add("tempResultf = 1.0 / (");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(");" _CRLF);
|
|
// INF becomes 0.0
|
|
src->add("if( isinf(tempResultf) == true && (as_type<int>(tempResultf)&0x80000000) == 0 ) tempResultf = 0.0;" _CRLF);
|
|
// -INF becomes -0.0
|
|
src->add("else if( isinf(tempResultf) == true && (as_type<int>(tempResultf)&0x80000000) != 0 ) tempResultf = -0.0;" _CRLF);
|
|
// assign result to output
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add("tempResultf");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_IEEE ||
|
|
aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_CLAMPED ||
|
|
aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_FF )
|
|
{
|
|
// todo: This should be correct but testing is needed
|
|
src->add("tempResultf = 1.0 / sqrt(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(");" _CRLF);
|
|
if (aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_CLAMPED)
|
|
{
|
|
// note: if( -INF < 0.0 ) does not resolve to true
|
|
src->add("if( isinf(tempResultf) == true && (as_type<int>(tempResultf)&0x80000000) != 0 ) tempResultf = -3.40282347E+38F;" _CRLF);
|
|
src->add("else if( isinf(tempResultf) == true && (as_type<int>(tempResultf)&0x80000000) == 0 ) tempResultf = 3.40282347E+38F;" _CRLF);
|
|
}
|
|
else if (aluInstruction->opcode == ALU_OP2_INST_RECIPSQRT_FF)
|
|
{
|
|
// untested (BotW bombs)
|
|
src->add("if( isinf(tempResultf) == true && (as_type<int>(tempResultf)&0x80000000) != 0 ) tempResultf = -0.0;" _CRLF);
|
|
src->add("else if( isinf(tempResultf) == true && (as_type<int>(tempResultf)&0x80000000) == 0 ) tempResultf = 0.0;" _CRLF);
|
|
}
|
|
// assign result to output
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add("tempResultf");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_MAX ||
|
|
aluInstruction->opcode == ALU_OP2_INST_MIN ||
|
|
aluInstruction->opcode == ALU_OP2_INST_MAX_DX10 ||
|
|
aluInstruction->opcode == ALU_OP2_INST_MIN_DX10 )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
if( aluInstruction->opcode == ALU_OP2_INST_MAX )
|
|
src->add("max");
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_MIN )
|
|
src->add("min");
|
|
else if (aluInstruction->opcode == ALU_OP2_INST_MAX_DX10)
|
|
src->add("max");
|
|
else if (aluInstruction->opcode == ALU_OP2_INST_MIN_DX10)
|
|
src->add("min");
|
|
src->add("(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(", ");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(")");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_FLOOR ||
|
|
aluInstruction->opcode == ALU_OP2_INST_FRACT ||
|
|
aluInstruction->opcode == ALU_OP2_INST_TRUNC )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
if( aluInstruction->opcode == ALU_OP2_INST_FLOOR )
|
|
src->add("floor");
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_FRACT )
|
|
src->add("fract");
|
|
else
|
|
src->add("trunc");
|
|
src->add("(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(")");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_LOG_CLAMPED ||
|
|
aluInstruction->opcode == ALU_OP2_INST_LOG_IEEE )
|
|
{
|
|
src->add("tempResultf = max(0.0, ");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(");" _CRLF);
|
|
|
|
src->add("tempResultf = log2(tempResultf);" _CRLF);
|
|
if( aluInstruction->opcode == ALU_OP2_INST_LOG_CLAMPED )
|
|
{
|
|
src->add("if( isinf(tempResultf) == true ) tempResultf = -3.40282347E+38F;" _CRLF);
|
|
}
|
|
// assign result to output
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add("tempResultf");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_RNDNE )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add("rint(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(")");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_EXP_IEEE )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add("exp2");
|
|
src->add("(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(")");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_SQRT_IEEE )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add("sqrt");
|
|
src->add("(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(")");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_SIN ||
|
|
aluInstruction->opcode == ALU_OP2_INST_COS )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
if( aluInstruction->opcode == ALU_OP2_INST_SIN )
|
|
src->add("sin");
|
|
else
|
|
src->add("cos");
|
|
src->add("((");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(")/0.1591549367)");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_FLT_TO_INT )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
src->add("int");
|
|
src->add("(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(")");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_FLT_TO_UINT )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_UNSIGNED_INT, outputType);
|
|
src->add("uint");
|
|
src->add("(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(")");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_UNSIGNED_INT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_INT_TO_FLOAT )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add("float(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
src->add(")");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_UINT_TO_FLOAT )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add("float(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_UNSIGNED_INT);
|
|
src->add(")");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if (aluInstruction->opcode == ALU_OP2_INST_AND_INT)
|
|
_emitALUOperationBinary<LATTE_DECOMPILER_DTYPE_SIGNED_INT>(shaderContext, aluInstruction, " & ");
|
|
else if (aluInstruction->opcode == ALU_OP2_INST_OR_INT)
|
|
_emitALUOperationBinary<LATTE_DECOMPILER_DTYPE_SIGNED_INT>(shaderContext, aluInstruction, " | ");
|
|
else if (aluInstruction->opcode == ALU_OP2_INST_XOR_INT)
|
|
_emitALUOperationBinary<LATTE_DECOMPILER_DTYPE_SIGNED_INT>(shaderContext, aluInstruction, " ^ ");
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_NOT_INT )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
src->add("~(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
src->add(")");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_ADD_INT )
|
|
_emitALUOperationBinary<LATTE_DECOMPILER_DTYPE_SIGNED_INT>(shaderContext, aluInstruction, " + ");
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_MAX_INT || aluInstruction->opcode == ALU_OP2_INST_MIN_INT ||
|
|
aluInstruction->opcode == ALU_OP2_INST_MAX_UINT || aluInstruction->opcode == ALU_OP2_INST_MIN_UINT)
|
|
{
|
|
// not verified
|
|
bool isUnsigned = aluInstruction->opcode == ALU_OP2_INST_MAX_UINT || aluInstruction->opcode == ALU_OP2_INST_MIN_UINT;
|
|
auto opType = isUnsigned ? LATTE_DECOMPILER_DTYPE_UNSIGNED_INT : LATTE_DECOMPILER_DTYPE_SIGNED_INT;
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, opType, outputType);
|
|
if( aluInstruction->opcode == ALU_OP2_INST_MAX_INT || aluInstruction->opcode == ALU_OP2_INST_MAX_UINT )
|
|
src->add("max(");
|
|
else
|
|
src->add("min(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, opType);
|
|
src->add(", ");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, opType);
|
|
_emitTypeConversionSuffixMSL(shaderContext, opType, outputType);
|
|
src->add(");" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_SUB_INT )
|
|
{
|
|
// note: The AMD doc says src1 is on the left side but tests indicate otherwise. It's src0 - src1.
|
|
_emitALUOperationBinary<LATTE_DECOMPILER_DTYPE_SIGNED_INT>(shaderContext, aluInstruction, " - ");
|
|
}
|
|
else if (aluInstruction->opcode == ALU_OP2_INST_MULLO_INT)
|
|
_emitALUOperationBinary<LATTE_DECOMPILER_DTYPE_SIGNED_INT>(shaderContext, aluInstruction, " * ");
|
|
else if (aluInstruction->opcode == ALU_OP2_INST_MULLO_UINT)
|
|
_emitALUOperationBinary<LATTE_DECOMPILER_DTYPE_UNSIGNED_INT>(shaderContext, aluInstruction, " * ");
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_LSHL_INT )
|
|
_emitALUOperationBinary<LATTE_DECOMPILER_DTYPE_SIGNED_INT>(shaderContext, aluInstruction, " << ");
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_LSHR_INT )
|
|
_emitALUOperationBinary<LATTE_DECOMPILER_DTYPE_SIGNED_INT>(shaderContext, aluInstruction, " >> ");
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_ASHR_INT )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
src->add(" >> ");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_SETGT ||
|
|
aluInstruction->opcode == ALU_OP2_INST_SETGE ||
|
|
aluInstruction->opcode == ALU_OP2_INST_SETNE ||
|
|
aluInstruction->opcode == ALU_OP2_INST_SETE )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add("(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
if( aluInstruction->opcode == ALU_OP2_INST_SETGT )
|
|
src->add(" > ");
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_SETGE )
|
|
src->add(" >= ");
|
|
else if (aluInstruction->opcode == ALU_OP2_INST_SETNE)
|
|
src->add(" != ");
|
|
else if (aluInstruction->opcode == ALU_OP2_INST_SETE)
|
|
src->add(" == ");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(")?1.0:0.0");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_SETGT_DX10 ||
|
|
aluInstruction->opcode == ALU_OP2_INST_SETE_DX10 ||
|
|
aluInstruction->opcode == ALU_OP2_INST_SETNE_DX10 ||
|
|
aluInstruction->opcode == ALU_OP2_INST_SETGE_DX10 )
|
|
{
|
|
if( aluInstruction->omod != 0 )
|
|
debugBreakpoint();
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
src->add("((");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
if( aluInstruction->opcode == ALU_OP2_INST_SETE_DX10 )
|
|
src->add(" == ");
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_SETNE_DX10 )
|
|
src->add(" != ");
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_SETGT_DX10 )
|
|
src->add(" > ");
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_SETGE_DX10 )
|
|
src->add(" >= ");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(")?-1:0)");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
src->add(";");
|
|
src->add(_CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_SETE_INT ||
|
|
aluInstruction->opcode == ALU_OP2_INST_SETNE_INT ||
|
|
aluInstruction->opcode == ALU_OP2_INST_SETGT_INT ||
|
|
aluInstruction->opcode == ALU_OP2_INST_SETGE_INT )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
src->add("(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
if( aluInstruction->opcode == ALU_OP2_INST_SETE_INT )
|
|
src->add(" == ");
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_SETNE_INT )
|
|
src->add(" != ");
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_SETGT_INT )
|
|
src->add(" > ");
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_SETGE_INT )
|
|
src->add(" >= ");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
src->add(")?-1:0");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_SETGE_UINT ||
|
|
aluInstruction->opcode == ALU_OP2_INST_SETGT_UINT )
|
|
{
|
|
// todo: Unsure if the result is unsigned or signed
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
src->add("(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_UNSIGNED_INT);
|
|
if( aluInstruction->opcode == ALU_OP2_INST_SETGE_UINT )
|
|
src->add(" >= ");
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_SETGT_UINT )
|
|
src->add(" > ");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_UNSIGNED_INT);
|
|
src->add(")?int(0xFFFFFFFF):int(0x0)");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_PRED_SETGT ||
|
|
aluInstruction->opcode == ALU_OP2_INST_PRED_SETGE ||
|
|
aluInstruction->opcode == ALU_OP2_INST_PRED_SETE ||
|
|
aluInstruction->opcode == ALU_OP2_INST_PRED_SETNE ||
|
|
aluInstruction->opcode == ALU_OP2_INST_PRED_SETNE_INT ||
|
|
aluInstruction->opcode == ALU_OP2_INST_PRED_SETE_INT ||
|
|
aluInstruction->opcode == ALU_OP2_INST_PRED_SETGE_INT ||
|
|
aluInstruction->opcode == ALU_OP2_INST_PRED_SETGT_INT )
|
|
{
|
|
cemu_assert_debug(aluInstruction->writeMask == 0);
|
|
bool isIntPred = (aluInstruction->opcode == ALU_OP2_INST_PRED_SETNE_INT) || (aluInstruction->opcode == ALU_OP2_INST_PRED_SETE_INT) || (aluInstruction->opcode == ALU_OP2_INST_PRED_SETGE_INT) || (aluInstruction->opcode == ALU_OP2_INST_PRED_SETGT_INT);
|
|
|
|
src->add("predResult");
|
|
src->add(" = (");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, isIntPred?LATTE_DECOMPILER_DTYPE_SIGNED_INT:LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
|
|
if (aluInstruction->opcode == ALU_OP2_INST_PRED_SETE || aluInstruction->opcode == ALU_OP2_INST_PRED_SETE_INT)
|
|
src->add(" == ");
|
|
else if (aluInstruction->opcode == ALU_OP2_INST_PRED_SETGT || aluInstruction->opcode == ALU_OP2_INST_PRED_SETGT_INT)
|
|
src->add(" > ");
|
|
else if (aluInstruction->opcode == ALU_OP2_INST_PRED_SETGE || aluInstruction->opcode == ALU_OP2_INST_PRED_SETGE_INT)
|
|
src->add(" >= ");
|
|
else if (aluInstruction->opcode == ALU_OP2_INST_PRED_SETNE || aluInstruction->opcode == ALU_OP2_INST_PRED_SETNE_INT)
|
|
src->add(" != ");
|
|
else
|
|
cemu_assert_debug(false);
|
|
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, isIntPred?LATTE_DECOMPILER_DTYPE_SIGNED_INT:LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(");" _CRLF);
|
|
// handle result of predicate instruction based on current ALU clause type
|
|
if( cfInstruction->type == GPU7_CF_INST_ALU_PUSH_BEFORE )
|
|
{
|
|
src->addFmt("{} = predResult;" _CRLF, _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth));
|
|
src->addFmt("{} = predResult == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth));
|
|
}
|
|
else if( cfInstruction->type == GPU7_CF_INST_ALU_BREAK )
|
|
{
|
|
// leave current loop
|
|
src->add("if( predResult == false ) break;" _CRLF);
|
|
}
|
|
else
|
|
cemu_assert_debug(false);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_KILLE_INT ||
|
|
aluInstruction->opcode == ALU_OP2_INST_KILLNE_INT ||
|
|
aluInstruction->opcode == ALU_OP2_INST_KILLGT_INT)
|
|
{
|
|
src->add("if( ");
|
|
src->add(" (");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
if( aluInstruction->opcode == ALU_OP2_INST_KILLE_INT )
|
|
src->add(" == ");
|
|
else if (aluInstruction->opcode == ALU_OP2_INST_KILLNE_INT)
|
|
src->add(" != ");
|
|
else if (aluInstruction->opcode == ALU_OP2_INST_KILLGT_INT)
|
|
src->add(" > ");
|
|
else
|
|
debugBreakpoint();
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
src->add(")");
|
|
src->add(") discard_fragment();");
|
|
src->add(_CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_KILLGT ||
|
|
aluInstruction->opcode == ALU_OP2_INST_KILLGE ||
|
|
aluInstruction->opcode == ALU_OP2_INST_KILLE )
|
|
{
|
|
src->add("if( ");
|
|
src->add(" (");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
if( aluInstruction->opcode == ALU_OP2_INST_KILLGT )
|
|
src->add(" > ");
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_KILLGE )
|
|
src->add(" >= ");
|
|
else if( aluInstruction->opcode == ALU_OP2_INST_KILLE )
|
|
src->add(" == ");
|
|
else
|
|
debugBreakpoint();
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(")");
|
|
src->add(") discard_fragment();");
|
|
src->add(_CRLF);
|
|
}
|
|
else
|
|
{
|
|
src->add("Unsupported instruction;" _CRLF);
|
|
debug_printf("Unsupported ALU op2 instruction 0x%x\n", aluInstruction->opcode);
|
|
shaderContext->shader->hasError = true;
|
|
}
|
|
}
|
|
|
|
static void _emitALUOP3InstructionCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, LatteDecompilerALUInstruction* aluInstruction)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
cemu_assert_debug(aluInstruction->destRel == 0); // todo
|
|
sint32 outputType = _getALUInstructionOutputDataType(shaderContext, aluInstruction);
|
|
|
|
/* check for common no-op or mov-like instructions */
|
|
if (aluInstruction->opcode == ALU_OP3_INST_CMOVGE ||
|
|
aluInstruction->opcode == ALU_OP3_INST_CMOVE ||
|
|
aluInstruction->opcode == ALU_OP3_INST_CMOVGT ||
|
|
aluInstruction->opcode == ALU_OP3_INST_CNDE_INT ||
|
|
aluInstruction->opcode == ALU_OP3_INST_CNDGT_INT ||
|
|
aluInstruction->opcode == ALU_OP3_INST_CMOVGE_INT)
|
|
{
|
|
if (_isSameGPROperand(aluInstruction, 1, 2) && !_operandHasModifiers(aluInstruction, 1))
|
|
{
|
|
// the condition is irrelevant as both operands are the same
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, outputType);
|
|
src->add(";" _CRLF);
|
|
return;
|
|
}
|
|
}
|
|
|
|
|
|
/* generic handlers */
|
|
if( aluInstruction->opcode == ALU_OP3_INST_MULADD ||
|
|
aluInstruction->opcode == ALU_OP3_INST_MULADD_D2 ||
|
|
aluInstruction->opcode == ALU_OP3_INST_MULADD_M2 ||
|
|
aluInstruction->opcode == ALU_OP3_INST_MULADD_M4 ||
|
|
aluInstruction->opcode == ALU_OP3_INST_MULADD_IEEE )
|
|
{
|
|
// todo: The difference between MULADD and MULADD IEEE is that the former has 0*anything=0 rule similar to MUL/MUL_IEEE?
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
if (aluInstruction->opcode != ALU_OP3_INST_MULADD) // avoid unnecessary parenthesis to improve code readability slightly
|
|
src->add("(");
|
|
|
|
bool useDefaultMul = false;
|
|
if (GPU7_ALU_SRC_IS_LITERAL(aluInstruction->sourceOperand[0].sel) || GPU7_ALU_SRC_IS_LITERAL(aluInstruction->sourceOperand[1].sel) ||
|
|
GPU7_ALU_SRC_IS_ANY_CONST(aluInstruction->sourceOperand[0].sel) || GPU7_ALU_SRC_IS_ANY_CONST(aluInstruction->sourceOperand[1].sel))
|
|
{
|
|
useDefaultMul = true;
|
|
}
|
|
if (aluInstruction->opcode == ALU_OP3_INST_MULADD_IEEE)
|
|
useDefaultMul = true;
|
|
|
|
if (shaderContext->options->strictMul && useDefaultMul == false)
|
|
{
|
|
src->add("mul_nonIEEE(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(",");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(")");
|
|
}
|
|
else
|
|
{
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(" * ");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
}
|
|
|
|
src->add(" + ");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
if(aluInstruction->opcode != ALU_OP3_INST_MULADD)
|
|
src->add(")");
|
|
if( aluInstruction->opcode == ALU_OP3_INST_MULADD_D2 )
|
|
src->add("/2.0");
|
|
else if( aluInstruction->opcode == ALU_OP3_INST_MULADD_M2 )
|
|
src->add("*2.0");
|
|
else if( aluInstruction->opcode == ALU_OP3_INST_MULADD_M4 )
|
|
src->add("*4.0");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if(aluInstruction->opcode == ALU_OP3_INST_CNDE_INT || aluInstruction->opcode == ALU_OP3_INST_CNDGT_INT || aluInstruction->opcode == ALU_OP3_INST_CMOVGE_INT)
|
|
{
|
|
bool requiresFloatResult = (aluInstruction->sourceOperand[1].neg != 0) || (aluInstruction->sourceOperand[2].neg != 0);
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, requiresFloatResult?LATTE_DECOMPILER_DTYPE_FLOAT:LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
src->add("((");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
if (aluInstruction->opcode == ALU_OP3_INST_CNDE_INT)
|
|
src->add(" == ");
|
|
else if (aluInstruction->opcode == ALU_OP3_INST_CNDGT_INT)
|
|
src->add(" > ");
|
|
else if (aluInstruction->opcode == ALU_OP3_INST_CMOVGE_INT)
|
|
src->add(" >= ");
|
|
src->add("0)?(");
|
|
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, requiresFloatResult?LATTE_DECOMPILER_DTYPE_FLOAT:LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
src->add("):(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 2, requiresFloatResult?LATTE_DECOMPILER_DTYPE_FLOAT:LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
src->add("))");
|
|
_emitTypeConversionSuffixMSL(shaderContext, requiresFloatResult?LATTE_DECOMPILER_DTYPE_FLOAT:LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluInstruction->opcode == ALU_OP3_INST_CMOVGE ||
|
|
aluInstruction->opcode == ALU_OP3_INST_CMOVE ||
|
|
aluInstruction->opcode == ALU_OP3_INST_CMOVGT )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, aluInstruction);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
src->add("((");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
if (aluInstruction->opcode == ALU_OP3_INST_CMOVE)
|
|
src->add(" == ");
|
|
else if (aluInstruction->opcode == ALU_OP3_INST_CMOVGE)
|
|
src->add(" >= ");
|
|
else if (aluInstruction->opcode == ALU_OP3_INST_CMOVGT)
|
|
src->add(" > ");
|
|
src->add("0.0)?(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 1, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
src->add("):(");
|
|
_emitOperandInputCode(shaderContext, aluInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
src->add("))");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else
|
|
{
|
|
src->add("Unsupported instruction;" _CRLF);
|
|
debug_printf("Unsupported ALU op3 instruction 0x%x\n", aluInstruction->opcode);
|
|
shaderContext->shader->hasError = true;
|
|
}
|
|
}
|
|
|
|
static void _emitALUReductionInstructionCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction* aluRedcInstruction[4])
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
if( aluRedcInstruction[0]->isOP3 == false && (aluRedcInstruction[0]->opcode == ALU_OP2_INST_DOT4 || aluRedcInstruction[0]->opcode == ALU_OP2_INST_DOT4_IEEE) )
|
|
{
|
|
// todo: Figure out and implement the difference between normal DOT4 and DOT4_IEEE
|
|
sint32 outputType = _getALUInstructionOutputDataType(shaderContext, aluRedcInstruction[0]);
|
|
_emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[0]);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
|
|
// dot(float4(op0),float4(op1))
|
|
src->add("dot(float4(");
|
|
_emitOperandInputCode(shaderContext, aluRedcInstruction[0], 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(",");
|
|
_emitOperandInputCode(shaderContext, aluRedcInstruction[1], 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(",");
|
|
_emitOperandInputCode(shaderContext, aluRedcInstruction[2], 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(",");
|
|
_emitOperandInputCode(shaderContext, aluRedcInstruction[3], 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add("),float4(");
|
|
_emitOperandInputCode(shaderContext, aluRedcInstruction[0], 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(",");
|
|
_emitOperandInputCode(shaderContext, aluRedcInstruction[1], 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(",");
|
|
_emitOperandInputCode(shaderContext, aluRedcInstruction[2], 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(",");
|
|
_emitOperandInputCode(shaderContext, aluRedcInstruction[3], 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add("))");
|
|
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else if( aluRedcInstruction[0]->isOP3 == false && (aluRedcInstruction[0]->opcode == ALU_OP2_INST_CUBE) )
|
|
{
|
|
/*
|
|
* How the CUBE instruction works (guessed mostly, based on DirectX/OpenGL spec):
|
|
Input: float4, 3d direction vector (can be unnormalized) + w component (which can be ignored, since it only scales the vector but does not affect the direction)
|
|
|
|
First we figure out the major axis (closest axis-aligned vector). There are six possible vectors:
|
|
+rx 0
|
|
-rx 1
|
|
+ry 2
|
|
-ry 3
|
|
+rz 4
|
|
-rz 5
|
|
The major axis vector is calculated by looking at the largest (absolute) 3d vector component and then setting the other components to 0.0
|
|
The value that remains in the axis vector is referred to as 'MajorAxis' by the AMD documentation.
|
|
The S,T coordinates are taken from the other two components.
|
|
Example: -0.5,0.2,0.4 -> -rx -> -0.5,0.0,0.0 MajorAxis: -0.5, S: 0.2 T: 0.4
|
|
|
|
The CUBE reduction instruction requires a specific mapping for the input vector:
|
|
src0 = Rn.zzxy
|
|
src1 = Rn.yxzz
|
|
It's probably related to the way the instruction works internally?
|
|
If we look at the individual components per ALU unit:
|
|
z y -> Compare y/z
|
|
z x -> Compare x/z
|
|
x z -> Compare x/z
|
|
y z -> Compare y/z
|
|
*/
|
|
|
|
sint32 outputType;
|
|
|
|
src->add("redcCUBE(");
|
|
src->add("float4(");
|
|
_emitOperandInputCode(shaderContext, aluRedcInstruction[0], 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(",");
|
|
_emitOperandInputCode(shaderContext, aluRedcInstruction[1], 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(",");
|
|
_emitOperandInputCode(shaderContext, aluRedcInstruction[2], 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(",");
|
|
_emitOperandInputCode(shaderContext, aluRedcInstruction[3], 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add("),");
|
|
src->add("float4(");
|
|
_emitOperandInputCode(shaderContext, aluRedcInstruction[0], 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(",");
|
|
_emitOperandInputCode(shaderContext, aluRedcInstruction[1], 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(",");
|
|
_emitOperandInputCode(shaderContext, aluRedcInstruction[2], 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(",");
|
|
_emitOperandInputCode(shaderContext, aluRedcInstruction[3], 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add("),");
|
|
src->add("cubeMapSTM,cubeMapFaceId);" _CRLF);
|
|
|
|
// dst.X (S)
|
|
outputType = _getALUInstructionOutputDataType(shaderContext, aluRedcInstruction[0]);
|
|
_emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[0]);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add("cubeMapSTM.x");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
// dst.Y (T)
|
|
outputType = _getALUInstructionOutputDataType(shaderContext, aluRedcInstruction[1]);
|
|
_emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[1]);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add("cubeMapSTM.y");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
// dst.Z (MajorAxis)
|
|
outputType = _getALUInstructionOutputDataType(shaderContext, aluRedcInstruction[2]);
|
|
_emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[2]);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add("cubeMapSTM.z");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, outputType);
|
|
src->add(";" _CRLF);
|
|
// dst.W (FaceId)
|
|
outputType = _getALUInstructionOutputDataType(shaderContext, aluRedcInstruction[3]);
|
|
_emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[3]);
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
src->add("cubeMapFaceId");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, outputType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else
|
|
cemu_assert_unimplemented();
|
|
}
|
|
|
|
static void _emitALUClauseRegisterBackupCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, sint32 startIndex)
|
|
{
|
|
sint32 instructionGroupIndex = cfInstruction->instructionsALU[startIndex].instructionGroupIndex;
|
|
size_t groupSize = 1;
|
|
while ((startIndex + groupSize) < cfInstruction->instructionsALU.size())
|
|
{
|
|
if (instructionGroupIndex != cfInstruction->instructionsALU[startIndex + groupSize].instructionGroupIndex)
|
|
break;
|
|
groupSize++;
|
|
}
|
|
shaderContext->aluPVPSState->CreateGPRTemporaries(shaderContext, { cfInstruction->instructionsALU.data() + startIndex, groupSize });
|
|
}
|
|
|
|
/*
|
|
bool _isPVUsedInNextGroup(LatteDecompilerCFInstruction* cfInstruction, sint32 startIndex, sint32 pvUnit)
|
|
{
|
|
sint32 currentGroupIndex = cfInstruction->instructionsALU[startIndex].instructionGroupIndex;
|
|
for (sint32 i = startIndex + 1; i < (sint32)cfInstruction->instructionsALU.size(); i++)
|
|
{
|
|
LatteDecompilerALUInstruction& aluInstructionItr = cfInstruction->instructionsALU[i];
|
|
if(aluInstructionItr.instructionGroupIndex == currentGroupIndex )
|
|
continue;
|
|
if ((sint32)aluInstructionItr.instructionGroupIndex > currentGroupIndex + 1)
|
|
return false;
|
|
// check OP code type
|
|
if (aluInstructionItr.isOP3)
|
|
{
|
|
// op0
|
|
if (GPU7_ALU_SRC_IS_PV(aluInstructionItr.sourceOperand[0].sel))
|
|
{
|
|
uint32 chan = aluInstructionItr.sourceOperand[0].chan;
|
|
if (pvUnit == chan)
|
|
return true;
|
|
}
|
|
// op1
|
|
if (GPU7_ALU_SRC_IS_PV(aluInstructionItr.sourceOperand[1].sel))
|
|
{
|
|
uint32 chan = aluInstructionItr.sourceOperand[1].chan;
|
|
if (pvUnit == chan)
|
|
return true;
|
|
}
|
|
// op2
|
|
if (GPU7_ALU_SRC_IS_PV(aluInstructionItr.sourceOperand[2].sel))
|
|
{
|
|
uint32 chan = aluInstructionItr.sourceOperand[2].chan;
|
|
if (pvUnit == chan)
|
|
return true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// op0
|
|
if (GPU7_ALU_SRC_IS_PV(aluInstructionItr.sourceOperand[0].sel))
|
|
{
|
|
uint32 chan = aluInstructionItr.sourceOperand[0].chan;
|
|
if (pvUnit == chan)
|
|
return true;
|
|
}
|
|
// op1
|
|
if (GPU7_ALU_SRC_IS_PV(aluInstructionItr.sourceOperand[1].sel))
|
|
{
|
|
uint32 chan = aluInstructionItr.sourceOperand[1].chan;
|
|
if (pvUnit == chan)
|
|
return true;
|
|
}
|
|
// todo: Not all operations use both operands
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
*/
|
|
|
|
static void _emitFloat3(LatteDecompilerShaderContext* shaderContext, uint32 dataType, LatteDecompilerALUInstruction* aluInst0, sint32 opIdx0, LatteDecompilerALUInstruction* aluInst1, sint32 opIdx1, LatteDecompilerALUInstruction* aluInst2, sint32 opIdx2)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
{
|
|
src->add("float3(");
|
|
_emitOperandInputCode(shaderContext, aluInst0, opIdx0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(",");
|
|
_emitOperandInputCode(shaderContext, aluInst1, opIdx1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(",");
|
|
_emitOperandInputCode(shaderContext, aluInst2, opIdx2, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(")");
|
|
}
|
|
else if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
{
|
|
src->add("int3(");
|
|
_emitOperandInputCode(shaderContext, aluInst0, opIdx0, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
src->add(",");
|
|
_emitOperandInputCode(shaderContext, aluInst1, opIdx1, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
src->add(",");
|
|
_emitOperandInputCode(shaderContext, aluInst2, opIdx2, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
src->add(")");
|
|
}
|
|
else
|
|
cemu_assert_unimplemented();
|
|
}
|
|
|
|
static void _emitGPRVectorAssignment(LatteDecompilerShaderContext* shaderContext, LatteDecompilerALUInstruction** aluInstructions, sint32 count)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
// output var name (GPR)
|
|
src->add(_getRegisterVarName(shaderContext, aluInstructions[0]->destGpr, -1));
|
|
src->add(".");
|
|
for (sint32 f = 0; f < count; f++)
|
|
{
|
|
src->add(_getElementStrByIndex(aluInstructions[f]->destElem));
|
|
}
|
|
src->add(" = ");
|
|
}
|
|
|
|
static void _emitALUClauseCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction)
|
|
{
|
|
ALUClauseTemporariesState pvpsState;
|
|
shaderContext->aluPVPSState = &pvpsState;
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
LatteDecompilerALUInstruction* aluRedcInstruction[4];
|
|
size_t groupStartIndex = 0;
|
|
for(size_t i=0; i<cfInstruction->instructionsALU.size(); i++)
|
|
{
|
|
LatteDecompilerALUInstruction& aluInstruction = cfInstruction->instructionsALU[i];
|
|
if( aluInstruction.indexInGroup == 0 )
|
|
{
|
|
src->addFmt("// {}" _CRLF, aluInstruction.instructionGroupIndex);
|
|
// apply PV/PS updates for previous group
|
|
if (i > 0)
|
|
{
|
|
pvpsState.TrackGroupOutputPVPS(shaderContext, cfInstruction->instructionsALU.data() + groupStartIndex, i - groupStartIndex);
|
|
}
|
|
groupStartIndex = i;
|
|
// backup registers which are read after being written
|
|
_emitALUClauseRegisterBackupCode(shaderContext, cfInstruction, i);
|
|
}
|
|
// detect reduction instructions and use a special handler
|
|
bool isReductionOperation = _isReductionInstruction(&aluInstruction);
|
|
if( isReductionOperation )
|
|
{
|
|
cemu_assert_debug((i + 4) <= cfInstruction->instructionsALU.size());
|
|
aluRedcInstruction[0] = &aluInstruction;
|
|
aluRedcInstruction[1] = &cfInstruction->instructionsALU[i + 1];
|
|
aluRedcInstruction[2] = &cfInstruction->instructionsALU[i + 2];
|
|
aluRedcInstruction[3] = &cfInstruction->instructionsALU[i + 3];
|
|
if( aluRedcInstruction[0]->isOP3 != aluRedcInstruction[1]->isOP3 || aluRedcInstruction[1]->isOP3 != aluRedcInstruction[2]->isOP3 || aluRedcInstruction[2]->isOP3 != aluRedcInstruction[3]->isOP3 )
|
|
debugBreakpoint();
|
|
if( aluRedcInstruction[0]->opcode != aluRedcInstruction[1]->opcode || aluRedcInstruction[1]->opcode != aluRedcInstruction[2]->opcode || aluRedcInstruction[2]->opcode != aluRedcInstruction[3]->opcode )
|
|
debugBreakpoint();
|
|
if( aluRedcInstruction[0]->omod != aluRedcInstruction[1]->omod || aluRedcInstruction[1]->omod != aluRedcInstruction[2]->omod || aluRedcInstruction[2]->omod != aluRedcInstruction[3]->omod )
|
|
debugBreakpoint();
|
|
if( aluRedcInstruction[0]->destClamp != aluRedcInstruction[1]->destClamp || aluRedcInstruction[1]->destClamp != aluRedcInstruction[2]->destClamp || aluRedcInstruction[2]->destClamp != aluRedcInstruction[3]->destClamp )
|
|
debugBreakpoint();
|
|
_emitALUReductionInstructionCode(shaderContext, aluRedcInstruction);
|
|
i += 3; // skip the instructions that are part of the reduction operation
|
|
}
|
|
else /* not a reduction operation */
|
|
{
|
|
if( aluInstruction.isOP3 )
|
|
{
|
|
// op3
|
|
_emitALUOP3InstructionCode(shaderContext, cfInstruction, &aluInstruction);
|
|
}
|
|
else
|
|
{
|
|
// op2
|
|
if( aluInstruction.opcode == ALU_OP2_INST_NOP )
|
|
continue; // skip NOP instruction
|
|
_emitALUOP2InstructionCode(shaderContext, cfInstruction, &aluInstruction);
|
|
}
|
|
}
|
|
// handle omod
|
|
sint32 outputDataType = _getALUInstructionOutputDataType(shaderContext, &aluInstruction);
|
|
if( aluInstruction.omod != ALU_OMOD_NONE )
|
|
{
|
|
if( outputDataType == LATTE_DECOMPILER_DTYPE_FLOAT )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, &aluInstruction);
|
|
if( aluInstruction.omod == ALU_OMOD_MUL2 )
|
|
src->add(" *= 2.0;" _CRLF);
|
|
else if( aluInstruction.omod == ALU_OMOD_MUL4 )
|
|
src->add(" *= 4.0;" _CRLF);
|
|
else if( aluInstruction.omod == ALU_OMOD_DIV2 )
|
|
src->add(" /= 2.0;" _CRLF);
|
|
}
|
|
else if( outputDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, &aluInstruction);
|
|
src->add(" = ");
|
|
src->add("as_type<int>(as_type<float>("); // TODO: correct?
|
|
_emitInstructionOutputVariableName(shaderContext, &aluInstruction);
|
|
src->add(")");
|
|
if( aluInstruction.omod == 1 )
|
|
src->add(" * 2.0");
|
|
else if( aluInstruction.omod == 2 )
|
|
src->add(" * 4.0");
|
|
else if( aluInstruction.omod == 3 )
|
|
src->add(" / 2.0");
|
|
src->add(");" _CRLF);
|
|
}
|
|
else
|
|
{
|
|
cemu_assert_unimplemented();
|
|
}
|
|
}
|
|
// handle clamp
|
|
if( aluInstruction.destClamp != 0 )
|
|
{
|
|
if( outputDataType == LATTE_DECOMPILER_DTYPE_FLOAT )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, &aluInstruction);
|
|
src->add(" = clamp(");
|
|
_emitInstructionOutputVariableName(shaderContext, &aluInstruction);
|
|
src->add(", 0.0, 1.0);" _CRLF);
|
|
}
|
|
else if( outputDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT )
|
|
{
|
|
_emitInstructionOutputVariableName(shaderContext, &aluInstruction);
|
|
src->add(" = clampFI32(");
|
|
_emitInstructionOutputVariableName(shaderContext, &aluInstruction);
|
|
src->add(");" _CRLF);
|
|
}
|
|
else
|
|
{
|
|
cemu_assert_unimplemented();
|
|
}
|
|
}
|
|
// handle result broadcasting for reduction instructions
|
|
if( isReductionOperation )
|
|
{
|
|
// reduction operations set all four PV components (todo: Needs further research. According to AMD docs, dot4 only sets PV.x? update: Unlike DOT4, CUBE sets all PV elements accordingly to their GPR output?)
|
|
if( aluRedcInstruction[0]->opcode == ALU_OP2_INST_CUBE )
|
|
{
|
|
// CUBE
|
|
for (sint32 f = 0; f < 4; f++)
|
|
{
|
|
if (aluRedcInstruction[f]->writeMask != 0)
|
|
continue;
|
|
_emitInstructionPVPSOutputVariableName(shaderContext, aluRedcInstruction[f]);
|
|
src->add(" = ");
|
|
_emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[0]);
|
|
src->add(";" _CRLF);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// DOT4, DOT4_IEEE, etc.
|
|
// reduction operation result is only set for output in redc[0], we also need to update redc[1] to redc[3]
|
|
for(sint32 f=0; f<4; f++)
|
|
{
|
|
if( aluRedcInstruction[f]->writeMask == 0 )
|
|
_emitInstructionPVPSOutputVariableName(shaderContext, aluRedcInstruction[f]);
|
|
else
|
|
{
|
|
if (f == 0)
|
|
continue;
|
|
_emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[f]);
|
|
}
|
|
src->add(" = ");
|
|
_emitInstructionOutputVariableName(shaderContext, aluRedcInstruction[0]);
|
|
src->add(";" _CRLF);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
shaderContext->aluPVPSState = nullptr;
|
|
}
|
|
|
|
/*
|
|
* Emits code to access one component (xyzw) of the texture coordinate input vector
|
|
*/
|
|
static void _emitTEXSampleCoordInputComponent(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction, sint32 componentIndex, sint32 interpretSrcAsType)
|
|
{
|
|
cemu_assert(componentIndex >= 0 && componentIndex < 4);
|
|
cemu_assert_debug(interpretSrcAsType == LATTE_DECOMPILER_DTYPE_SIGNED_INT || interpretSrcAsType == LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
sint32 elementSel = texInstruction->textureFetch.srcSel[componentIndex];
|
|
if (elementSel < 4)
|
|
{
|
|
_emitRegisterChannelAccessCode(shaderContext, texInstruction->srcGpr, elementSel, interpretSrcAsType);
|
|
return;
|
|
}
|
|
const char* resultElemTable[4] = {"x","y","z","w"};
|
|
if(interpretSrcAsType == LATTE_DECOMPILER_DTYPE_SIGNED_INT )
|
|
{
|
|
if( elementSel == 4 )
|
|
src->add("as_type<int>(0.0)");
|
|
else if( elementSel == 5 )
|
|
src->add("as_type<int>(1.0)");
|
|
}
|
|
else if(interpretSrcAsType == LATTE_DECOMPILER_DTYPE_FLOAT )
|
|
{
|
|
if( elementSel == 4 )
|
|
src->add("0.0");
|
|
else if( elementSel == 5 )
|
|
src->add("1.0");
|
|
}
|
|
}
|
|
|
|
static const char* _texGprAccessElemTable[8] = {"x","y","z","w","_","_","_","_"};
|
|
|
|
static char* _getTexGPRAccess(LatteDecompilerShaderContext* shaderContext, sint32 gprIndex, uint32 dataType, sint8 selX, sint8 selY, sint8 selZ, sint8 selW, char* tempBuffer)
|
|
{
|
|
// as_type<float>(R{}i.w)
|
|
*tempBuffer = '\0';
|
|
uint8 elemCount = (selX >= 0 ? 1 : 0) + (selY >= 0 ? 1 : 0) + (selZ >= 0 ? 1 : 0) + (selW >= 0 ? 1 : 0);
|
|
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
{
|
|
if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
; // no conversion
|
|
else if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
{
|
|
if (elemCount == 1)
|
|
strcat(tempBuffer, "as_type<float>(");
|
|
else
|
|
strcat(tempBuffer, ("as_type<float" + std::to_string(elemCount) + ">(").c_str());
|
|
}
|
|
else
|
|
cemu_assert_unimplemented();
|
|
strcat(tempBuffer, _getRegisterVarName(shaderContext, gprIndex));
|
|
// _texGprAccessElemTable
|
|
strcat(tempBuffer, ".");
|
|
if (selX >= 0)
|
|
strcat(tempBuffer, _texGprAccessElemTable[selX]);
|
|
if (selY >= 0)
|
|
strcat(tempBuffer, _texGprAccessElemTable[selY]);
|
|
if (selZ >= 0)
|
|
strcat(tempBuffer, _texGprAccessElemTable[selZ]);
|
|
if (selW >= 0)
|
|
strcat(tempBuffer, _texGprAccessElemTable[selW]);
|
|
if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
; // no conversion
|
|
else if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
strcat(tempBuffer, ")");
|
|
else
|
|
cemu_assert_unimplemented();
|
|
}
|
|
else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
{
|
|
if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
cemu_assert_unimplemented();
|
|
else if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
; // no conversion
|
|
else
|
|
cemu_assert_unimplemented();
|
|
strcat(tempBuffer, _getRegisterVarName(shaderContext, gprIndex));
|
|
// _texGprAccessElemTable
|
|
strcat(tempBuffer, ".");
|
|
if (selX >= 0)
|
|
strcat(tempBuffer, _texGprAccessElemTable[selX]);
|
|
if (selY >= 0)
|
|
strcat(tempBuffer, _texGprAccessElemTable[selY]);
|
|
if (selZ >= 0)
|
|
strcat(tempBuffer, _texGprAccessElemTable[selZ]);
|
|
if (selW >= 0)
|
|
strcat(tempBuffer, _texGprAccessElemTable[selW]);
|
|
if (dataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
cemu_assert_unimplemented();
|
|
else if (dataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
; // no conversion
|
|
else
|
|
cemu_assert_unimplemented();
|
|
}
|
|
else
|
|
cemu_assert_unimplemented();
|
|
return tempBuffer;
|
|
}
|
|
|
|
static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
if (texInstruction->textureFetch.textureIndex < 0 || texInstruction->textureFetch.textureIndex >= LATTE_NUM_MAX_TEX_UNITS)
|
|
{
|
|
// skip out of bounds texture unit access
|
|
return;
|
|
}
|
|
|
|
auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex];
|
|
|
|
char tempBuffer0[32];
|
|
char tempBuffer1[32];
|
|
src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr));
|
|
src->add(".");
|
|
const char* resultElemTable[4] = {"x","y","z","w"};
|
|
sint32 numWrittenElements = 0;
|
|
for(sint32 f = 0; f < 4; f++)
|
|
{
|
|
if( texInstruction->dstSel[f] < 4 )
|
|
{
|
|
src->add(resultElemTable[f]);
|
|
numWrittenElements++;
|
|
}
|
|
else if( texInstruction->dstSel[f] == 7 )
|
|
{
|
|
// masked and not written
|
|
}
|
|
else
|
|
{
|
|
debugBreakpoint();
|
|
}
|
|
}
|
|
// texture sampler opcode
|
|
uint32 texOpcode = texInstruction->opcode;
|
|
// TODO: is this needed?
|
|
if (shaderContext->shaderType == LatteConst::ShaderType::Vertex)
|
|
{
|
|
// vertex shader forces LOD to zero, but certain sampler types don't support textureLod(...) API
|
|
if (texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ)
|
|
texOpcode = GPU7_TEX_INST_SAMPLE_C;
|
|
}
|
|
// check if offset is used
|
|
bool hasOffset = false;
|
|
if( texInstruction->textureFetch.offsetX != 0 || texInstruction->textureFetch.offsetY != 0 || texInstruction->textureFetch.offsetZ != 0 )
|
|
hasOffset = true;
|
|
// emit sample code
|
|
if (shaderContext->shader->textureIsIntegerFormat[texInstruction->textureFetch.textureIndex])
|
|
{
|
|
// integer samplers
|
|
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT) // uint to int
|
|
{
|
|
if (numWrittenElements == 1)
|
|
src->add(" = int(");
|
|
else
|
|
shaderContext->shaderSource->addFmt(" = int{}(", numWrittenElements);
|
|
}
|
|
else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
{
|
|
if (numWrittenElements == 1)
|
|
src->add(" = as_type<float>(");
|
|
else
|
|
shaderContext->shaderSource->addFmt(" = as_type<float{}>(", numWrittenElements);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// float samplers
|
|
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
{
|
|
if (numWrittenElements == 1)
|
|
src->add(" = as_type<int>(");
|
|
else
|
|
shaderContext->shaderSource->addFmt(" = as_type<int{}>(", numWrittenElements);
|
|
}
|
|
else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
src->add(" = (");
|
|
}
|
|
|
|
bool isCompare = shaderContext->shader->textureUsesDepthCompare[texInstruction->textureFetch.textureIndex];
|
|
bool emulateCompare = (isCompare && !IsValidDepthTextureType(texDim));
|
|
bool isGather = (texOpcode == GPU7_TEX_INST_FETCH4);
|
|
|
|
bool unnormalizationHandled = false;
|
|
bool useTexelCoordinates = false;
|
|
bool isRead = ((texOpcode == GPU7_TEX_INST_SAMPLE && (texInstruction->textureFetch.unnormalized[0] && texInstruction->textureFetch.unnormalized[1] && texInstruction->textureFetch.unnormalized[2] && texInstruction->textureFetch.unnormalized[3])) || texOpcode == GPU7_TEX_INST_LD);
|
|
|
|
// handle illegal combinations
|
|
if (texOpcode == GPU7_TEX_INST_FETCH4 && (texDim == Latte::E_DIM::DIM_1D || texDim == Latte::E_DIM::DIM_1D_ARRAY))
|
|
{
|
|
// fetch4 is not allowed on 1D textures
|
|
// seen in YWW during boss fight of Level 1-4
|
|
// todo - investigate what this returns on actual HW
|
|
if (numWrittenElements == 1)
|
|
shaderContext->shaderSource->add("0.0");
|
|
else
|
|
shaderContext->shaderSource->addFmt("float{}(0.0)", numWrittenElements);
|
|
shaderContext->shaderSource->add(");" _CRLF);
|
|
return;
|
|
}
|
|
|
|
// Do a framebuffer fetch if possible
|
|
uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex];
|
|
if (static_cast<MetalRenderer*>(g_renderer.get())->SupportsFramebufferFetch() && renderTargetIndex != 255)
|
|
{
|
|
// TODO: support comparison samplers
|
|
// TODO: support swizzling
|
|
src->addFmt("col{}", renderTargetIndex);
|
|
}
|
|
else
|
|
{
|
|
// sample_compare returns a float, need to convert to float4
|
|
if (isCompare)
|
|
src->addFmt("float4(");
|
|
|
|
if (emulateCompare)
|
|
{
|
|
cemu_assert_debug(!isGather);
|
|
|
|
src->add("sampleCompareEmulate(");
|
|
}
|
|
|
|
src->addFmt("tex{}", texInstruction->textureFetch.textureIndex);
|
|
if (!emulateCompare)
|
|
{
|
|
src->add(".");
|
|
if (isRead)
|
|
{
|
|
if (hasOffset)
|
|
cemu_assert_unimplemented();
|
|
src->add("read(");
|
|
unnormalizationHandled = true;
|
|
useTexelCoordinates = true;
|
|
}
|
|
else
|
|
{
|
|
if (isGather)
|
|
src->add("gather");
|
|
else
|
|
src->add("sample");
|
|
if (isCompare)
|
|
src->add("_compare");
|
|
src->addFmt("(samplr{}, ", texInstruction->textureFetch.textureIndex);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
src->addFmt(", samplr{}, ", texInstruction->textureFetch.textureIndex);
|
|
}
|
|
|
|
// for textureGather() add shift (todo: depends on rounding mode set in sampler registers?)
|
|
if (texOpcode == GPU7_TEX_INST_FETCH4)
|
|
{
|
|
if (texDim == Latte::E_DIM::DIM_2D)
|
|
{
|
|
//src->addFmt2("(vec2(-0.1) / vec2(textureSize(tex{},0).xy)) + ", texInstruction->textureIndex);
|
|
|
|
// vec2(-0.00001) is minimum to break Nvidia
|
|
// vec2(0.0001) is minimum to fix shadows on Intel, also fixes it on AMD (Windows and Linux)
|
|
|
|
// todo - emulating coordinate rounding mode correctly is tricky
|
|
// GX2 supports two modes: Truncate or rounding according to DX9 rules
|
|
// Vulkan uses truncate mode when point sampling (min and mag is both nearest) otherwise it uses rounding
|
|
|
|
// adding a small fixed bias is enough to avoid vendor-specific cases where small inaccuracies cause the number to get rounded down due to truncation
|
|
src->addFmt("float2(0.0001) + ");
|
|
}
|
|
}
|
|
|
|
const sint32 texCoordDataType = (texOpcode == GPU7_TEX_INST_LD) ? LATTE_DECOMPILER_DTYPE_SIGNED_INT : LATTE_DECOMPILER_DTYPE_FLOAT;
|
|
if(useTexelCoordinates)
|
|
{
|
|
// handle integer coordinates for texelFetch
|
|
if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA)
|
|
{
|
|
src->add("uint2(");
|
|
src->add("float2(");
|
|
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, texCoordDataType);
|
|
src->addFmt(", ");
|
|
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, texCoordDataType);
|
|
|
|
src->addFmt(")*supportBuffer.tex{}Scale", texInstruction->textureFetch.textureIndex); // close float2 and scale
|
|
|
|
src->add("), 0"); // close int2 and lod param
|
|
// todo - lod
|
|
}
|
|
else if (texDim == Latte::E_DIM::DIM_1D)
|
|
{
|
|
// VC DS games forget to initialize textures and use texel fetch on an uninitialized texture (a dim of 0 maps to 1D)
|
|
src->add("uint(");
|
|
src->add("float(");
|
|
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, (texOpcode == GPU7_TEX_INST_LD) ? LATTE_DECOMPILER_DTYPE_SIGNED_INT : LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->addFmt(")*supportBuffer.tex{}Scale.x", texInstruction->textureFetch.textureIndex);
|
|
src->add("), 0");
|
|
// todo - lod
|
|
}
|
|
else
|
|
cemu_assert_debug(false);
|
|
}
|
|
else /* useTexelCoordinates == false */
|
|
{
|
|
// float coordinates
|
|
if ( (texOpcode == GPU7_TEX_INST_SAMPLE_C || texOpcode == GPU7_TEX_INST_SAMPLE_C_L || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ) )
|
|
{
|
|
// shadow sampler
|
|
if (texDim == Latte::E_DIM::DIM_2D_ARRAY)
|
|
{
|
|
// 3 coords + compare value
|
|
src->add("float2(");
|
|
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(", ");
|
|
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add("), uint(rint(");
|
|
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add("))");
|
|
|
|
src->addFmt(", {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer0));
|
|
}
|
|
else if (texDim == Latte::E_DIM::DIM_CUBEMAP)
|
|
{
|
|
// 2 coords + faceId
|
|
if (texInstruction->textureFetch.srcSel[0] >= 4 || texInstruction->textureFetch.srcSel[1] >= 4)
|
|
{
|
|
debugBreakpoint();
|
|
}
|
|
src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0));
|
|
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
src->addFmt(")");
|
|
src->addFmt(", uint(cubeMapArrayIndex{})", texInstruction->textureFetch.textureIndex); // cubemap index
|
|
}
|
|
else if (texDim == Latte::E_DIM::DIM_1D)
|
|
{
|
|
// 1 coord + 1 unused coord (per spec) + compare value
|
|
if (texInstruction->textureFetch.srcSel[0] >= 4)
|
|
{
|
|
debugBreakpoint();
|
|
}
|
|
src->addFmt("{}, {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1));
|
|
}
|
|
else
|
|
{
|
|
// 2 coords + compare value (as float3)
|
|
if (texInstruction->textureFetch.srcSel[0] >= 4 && texInstruction->textureFetch.srcSel[1] >= 4)
|
|
{
|
|
debugBreakpoint();
|
|
}
|
|
src->addFmt("float2({}), {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1));
|
|
}
|
|
}
|
|
else if(texDim == Latte::E_DIM::DIM_2D_ARRAY)
|
|
{
|
|
// 3 coords
|
|
src->add("float2(");
|
|
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(", ");
|
|
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add("), uint(rint(");
|
|
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add("))");
|
|
}
|
|
else if(texDim == Latte::E_DIM::DIM_3D)
|
|
{
|
|
// 3 coords
|
|
src->add("float3(");
|
|
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(", ");
|
|
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(", ");
|
|
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(")");
|
|
}
|
|
else if( texDim == Latte::E_DIM::DIM_CUBEMAP )
|
|
{
|
|
// 2 coords + faceId
|
|
cemu_assert_debug(texInstruction->textureFetch.srcSel[0] < 4);
|
|
cemu_assert_debug(texInstruction->textureFetch.srcSel[1] < 4);
|
|
src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0));
|
|
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
src->add(")");
|
|
src->addFmt(", uint(cubeMapArrayIndex{})", texInstruction->textureFetch.textureIndex); // cubemap index
|
|
}
|
|
else if( texDim == Latte::E_DIM::DIM_1D )
|
|
{
|
|
// 1 coord
|
|
src->add(_getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0));
|
|
}
|
|
else
|
|
{
|
|
// 2 coords
|
|
src->add("float2(");
|
|
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(",");
|
|
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(")");
|
|
// avoid truncate to effectively round downwards on texel edges
|
|
if (ActiveSettings::ForceSamplerRoundToPrecision())
|
|
src->addFmt("+ float2(1.0)/float2(tex{}.get_width(), tex{}.get_height())/512.0", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
|
|
}
|
|
// lod or lod bias parameter
|
|
// 1D textures don't support lod
|
|
if (texDim != Latte::E_DIM::DIM_1D && texDim != Latte::E_DIM::DIM_1D_ARRAY)
|
|
{
|
|
if (texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LB || texOpcode == GPU7_TEX_INST_SAMPLE_C_L)
|
|
{
|
|
src->add(", ");
|
|
if (texOpcode == GPU7_TEX_INST_SAMPLE_LB)
|
|
{
|
|
src->addFmt("bias({})", _FormatFloatAsConstant((float)texInstruction->textureFetch.lodBias / 16.0f));
|
|
}
|
|
else
|
|
{
|
|
// TODO: is this correct?
|
|
src->add("level(");
|
|
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 3, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
src->add(")");
|
|
}
|
|
}
|
|
else if (texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ)
|
|
{
|
|
src->add(", level(0.0)");
|
|
}
|
|
}
|
|
}
|
|
// gradient parameters
|
|
if (texOpcode == GPU7_TEX_INST_SAMPLE_G)
|
|
{
|
|
if (texDim == Latte::E_DIM::DIM_2D ||
|
|
texDim == Latte::E_DIM::DIM_1D)
|
|
{
|
|
src->add(", gradient2d(gradH.xy, gradV.xy)");
|
|
}
|
|
else
|
|
{
|
|
cemu_assert_unimplemented();
|
|
}
|
|
}
|
|
|
|
// offset
|
|
if( texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ || texOpcode == GPU7_TEX_INST_SAMPLE || texOpcode == GPU7_TEX_INST_SAMPLE_C )
|
|
{
|
|
if( hasOffset )
|
|
{
|
|
uint8 offsetComponentCount = 0;
|
|
if( texDim == Latte::E_DIM::DIM_1D )
|
|
offsetComponentCount = 1;
|
|
else if( texDim == Latte::E_DIM::DIM_2D )
|
|
offsetComponentCount = 2;
|
|
else if( texDim == Latte::E_DIM::DIM_3D )
|
|
offsetComponentCount = 3;
|
|
else if( texDim == Latte::E_DIM::DIM_2D_ARRAY )
|
|
offsetComponentCount = 2;
|
|
else
|
|
cemu_assert_unimplemented();
|
|
|
|
if( (texInstruction->textureFetch.offsetX&1) )
|
|
cemu_assert_unimplemented();
|
|
if( (texInstruction->textureFetch.offsetY&1) )
|
|
cemu_assert_unimplemented();
|
|
if ((texInstruction->textureFetch.offsetZ & 1))
|
|
cemu_assert_unimplemented();
|
|
|
|
if( offsetComponentCount == 1 )
|
|
src->addFmt(",{}", texInstruction->textureFetch.offsetX/2);
|
|
else if( offsetComponentCount == 2 )
|
|
src->addFmt(",int2({},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2);
|
|
else if( offsetComponentCount == 3 )
|
|
src->addFmt(",int3({},{},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2);
|
|
}
|
|
}
|
|
|
|
// lod bias (TODO: wht?)
|
|
|
|
src->add(")");
|
|
}
|
|
|
|
if (isCompare)
|
|
src->add(")");
|
|
|
|
if (texOpcode == GPU7_TEX_INST_SAMPLE_C || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ)
|
|
{
|
|
src->add(".");
|
|
|
|
if (numWrittenElements > 1)
|
|
{
|
|
// result is copied into multiple channels
|
|
for (sint32 f = 0; f < numWrittenElements; f++)
|
|
{
|
|
cemu_assert_debug(texInstruction->dstSel[f] == 0); // only x component is defined
|
|
src->add("x");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
src->add("x");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
src->add(".");
|
|
for (sint32 f = 0; f < 4; f++)
|
|
{
|
|
if (texInstruction->dstSel[f] < 4)
|
|
{
|
|
uint8 elemIndex = texInstruction->dstSel[f];
|
|
if (isGather)
|
|
{
|
|
// 's textureGather() and GPU7's FETCH4 instruction have a different order of elements
|
|
// xyzw: top-left, top-right, bottom-right, bottom-left
|
|
// textureGather xyzw
|
|
// fetch4 yzxw
|
|
// translate index from fetch4 to textureGather order
|
|
static uint8 fetchToGather[4] =
|
|
{
|
|
2, // x -> z
|
|
0, // y -> x
|
|
1, // z -> y
|
|
3, // w -> w
|
|
};
|
|
elemIndex = fetchToGather[elemIndex];
|
|
}
|
|
src->add(resultElemTable[elemIndex]);
|
|
}
|
|
else if (texInstruction->dstSel[f] == 7)
|
|
{
|
|
// masked and not written
|
|
}
|
|
else
|
|
{
|
|
cemu_assert_unimplemented();
|
|
}
|
|
}
|
|
}
|
|
src->add(");");
|
|
|
|
// debug
|
|
#ifdef CEMU_DEBUG_ASSERT
|
|
if(texInstruction->opcode == GPU7_TEX_INST_LD )
|
|
src->add(" // TEX_INST_LD");
|
|
else if(texInstruction->opcode == GPU7_TEX_INST_SAMPLE )
|
|
src->add(" // TEX_INST_SAMPLE");
|
|
else if(texInstruction->opcode == GPU7_TEX_INST_SAMPLE_L )
|
|
src->add(" // TEX_INST_SAMPLE_L");
|
|
else if(texInstruction->opcode == GPU7_TEX_INST_SAMPLE_LZ )
|
|
src->add(" // TEX_INST_SAMPLE_LZ");
|
|
else if(texInstruction->opcode == GPU7_TEX_INST_SAMPLE_C )
|
|
src->add(" // TEX_INST_SAMPLE_C");
|
|
else if(texInstruction->opcode == GPU7_TEX_INST_SAMPLE_G )
|
|
src->add(" // TEX_INST_SAMPLE_G");
|
|
else
|
|
src->addFmt(" // 0x{:02x}", texInstruction->opcode);
|
|
if (texInstruction->opcode != texOpcode)
|
|
src->addFmt(" (applied as 0x{:02x})", texOpcode);
|
|
src->addFmt(" OffsetXYZ {:02x} {:02x} {:02x}", (uint8)texInstruction->textureFetch.offsetX&0xFF, (uint8)texInstruction->textureFetch.offsetY&0xFF, (uint8)texInstruction->textureFetch.offsetZ&0xFF);
|
|
#endif
|
|
src->add("" _CRLF);
|
|
}
|
|
|
|
static void _emitTEXGetTextureResInfoCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
src->addFmt("R{}", texInstruction->dstGpr);
|
|
src->add("i");
|
|
src->add(".");
|
|
|
|
const char* resultElemTable[4] = {"x","y","z","w"};
|
|
sint32 numWrittenElements = 0;
|
|
for(sint32 f=0; f<4; f++)
|
|
{
|
|
if( texInstruction->dstSel[f] < 4 )
|
|
{
|
|
src->add(resultElemTable[f]);
|
|
numWrittenElements++;
|
|
}
|
|
else if( texInstruction->dstSel[f] == 7 )
|
|
{
|
|
// masked and not written
|
|
}
|
|
else
|
|
{
|
|
cemu_assert_unimplemented();
|
|
}
|
|
}
|
|
|
|
// todo - mip index parameter?
|
|
|
|
if (static_cast<MetalRenderer*>(g_renderer.get())->SupportsFramebufferFetch() && shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex] != 255)
|
|
{
|
|
// TODO: use the render target size
|
|
src->addFmt(" = int4(1920, 1080, 1, 1).");
|
|
}
|
|
else
|
|
{
|
|
auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex];
|
|
|
|
if (texDim == Latte::E_DIM::DIM_1D)
|
|
src->addFmt(" = int4(tex{}.get_width(), 1, 1, 1).", texInstruction->textureFetch.textureIndex);
|
|
else if (texDim == Latte::E_DIM::DIM_1D_ARRAY)
|
|
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_array_size(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
|
|
else if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA)
|
|
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
|
|
else if (texDim == Latte::E_DIM::DIM_2D_ARRAY)
|
|
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), tex{}.get_array_size(), 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
|
|
else
|
|
{
|
|
cemu_assert_debug(false);
|
|
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
|
|
}
|
|
}
|
|
|
|
for(sint32 f=0; f<4; f++)
|
|
{
|
|
if( texInstruction->dstSel[f] < 4 )
|
|
{
|
|
src->add(resultElemTable[texInstruction->dstSel[f]]);
|
|
numWrittenElements++;
|
|
}
|
|
else if( texInstruction->dstSel[f] == 7 )
|
|
{
|
|
// masked and not written
|
|
}
|
|
else
|
|
{
|
|
debugBreakpoint();
|
|
}
|
|
}
|
|
src->add(";" _CRLF);
|
|
}
|
|
|
|
static void _emitTEXGetCompTexLodCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr));
|
|
src->add(".");
|
|
|
|
const char* resultElemTable[4] = {"x","y","z","w"};
|
|
sint32 numWrittenElements = 0;
|
|
for(sint32 f=0; f<4; f++)
|
|
{
|
|
if( texInstruction->dstSel[f] < 4 )
|
|
{
|
|
src->add(resultElemTable[f]);
|
|
numWrittenElements++;
|
|
}
|
|
else if( texInstruction->dstSel[f] == 7 )
|
|
{
|
|
// masked and not written
|
|
}
|
|
else
|
|
{
|
|
debugBreakpoint();
|
|
}
|
|
}
|
|
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType);
|
|
|
|
if (static_cast<MetalRenderer*>(g_renderer.get())->SupportsFramebufferFetch() && shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex] != 255)
|
|
{
|
|
// We assume that textures accessed as framebuffer fetch are always sampled at pixel coordinates, therefore the lod would always be 0.0
|
|
src->add("float4(0.0, 0.0, 0.0, 0.0)");
|
|
}
|
|
else
|
|
{
|
|
if (shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex] == Latte::E_DIM::DIM_CUBEMAP)
|
|
{
|
|
// 3 coordinates
|
|
if(shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, {}.{}{}{}), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]);
|
|
else
|
|
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, as_type<float3>({}.{}{}{})), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]);
|
|
}
|
|
else
|
|
{
|
|
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, {}.{}{}), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]);
|
|
else
|
|
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, as_type<float2>({}.{}{})), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]);
|
|
debugBreakpoint();
|
|
}
|
|
}
|
|
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType);
|
|
src->add(".");
|
|
|
|
for(sint32 f=0; f<4; f++)
|
|
{
|
|
if( texInstruction->dstSel[f] < 4 )
|
|
{
|
|
src->add(resultElemTable[texInstruction->dstSel[f]]);
|
|
numWrittenElements++;
|
|
}
|
|
else if( texInstruction->dstSel[f] == 7 )
|
|
{
|
|
// masked and not written
|
|
}
|
|
else
|
|
{
|
|
debugBreakpoint();
|
|
}
|
|
}
|
|
src->add(";" _CRLF);
|
|
}
|
|
|
|
static void _emitTEXSetCubemapIndexCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
src->addFmt("cubeMapArrayIndex{}", texInstruction->textureFetch.textureIndex);
|
|
const char* resultElemTable[4] = {"x","y","z","w"};
|
|
|
|
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
src->addFmt(" = as_type<float>(R{}i.{});" _CRLF, texInstruction->srcGpr, resultElemTable[texInstruction->textureFetch.srcSel[0]]);
|
|
else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
src->addFmt(" = R{}f.{};" _CRLF, texInstruction->srcGpr, resultElemTable[texInstruction->textureFetch.srcSel[0]]);
|
|
else
|
|
cemu_assert_unimplemented();
|
|
}
|
|
|
|
static void _emitTEXGetGradientsHV(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
sint32 componentCount = 0;
|
|
for (sint32 i = 0; i < 4; i++)
|
|
{
|
|
if (texInstruction->dstSel[i] == 7)
|
|
continue;
|
|
componentCount++;
|
|
}
|
|
src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr));
|
|
src->add(".");
|
|
const char* resultElemTable[4] = { "x","y","z","w" };
|
|
sint32 numWrittenElements = 0;
|
|
for (sint32 f = 0; f < 4; f++)
|
|
{
|
|
if (texInstruction->dstSel[f] < 4)
|
|
{
|
|
src->add(resultElemTable[f]);
|
|
numWrittenElements++;
|
|
}
|
|
else if (texInstruction->dstSel[f] == 7)
|
|
{
|
|
// masked and not written
|
|
}
|
|
else
|
|
{
|
|
debugBreakpoint();
|
|
}
|
|
}
|
|
|
|
const char* funcName;
|
|
if (texInstruction->opcode == GPU7_TEX_INST_GET_GRADIENTS_H)
|
|
funcName = "dfdx";
|
|
else
|
|
funcName = "dfdy";
|
|
|
|
src->add(" = ");
|
|
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType, componentCount);
|
|
|
|
src->addFmt("{}(", funcName);
|
|
_emitRegisterAccessCode(shaderContext, texInstruction->srcGpr, (componentCount >= 1) ? texInstruction->textureFetch.srcSel[0] : -1, (componentCount >= 2) ? texInstruction->textureFetch.srcSel[1] : -1, (componentCount >= 3) ? texInstruction->textureFetch.srcSel[2] : -1, (componentCount >= 4) ? texInstruction->textureFetch.srcSel[3] : -1, LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
|
|
src->add(")");
|
|
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType);
|
|
|
|
src->add(";" _CRLF);
|
|
|
|
}
|
|
|
|
static void _emitTEXSetGradientsHV(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
if (texInstruction->opcode == GPU7_TEX_INST_SET_GRADIENTS_H)
|
|
src->add("gradH = ");
|
|
else
|
|
src->add("gradV = ");
|
|
|
|
_emitRegisterAccessCode(shaderContext, texInstruction->srcGpr, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], texInstruction->textureFetch.srcSel[2], texInstruction->textureFetch.srcSel[3], LATTE_DECOMPILER_DTYPE_FLOAT);
|
|
|
|
src->add(";" _CRLF);
|
|
}
|
|
|
|
static void _emitGSReadInputVFetchCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr));
|
|
|
|
src->add(".");
|
|
|
|
const char* resultElemTable[4] = {"x","y","z","w"};
|
|
sint32 numWrittenElements = 0;
|
|
for(sint32 f=0; f<4; f++)
|
|
{
|
|
if( texInstruction->dstSel[f] < 4 )
|
|
{
|
|
src->add(resultElemTable[f]);
|
|
numWrittenElements++;
|
|
}
|
|
else if( texInstruction->dstSel[f] == 7 )
|
|
{
|
|
// masked and not written
|
|
}
|
|
else
|
|
{
|
|
cemu_assert_unimplemented();
|
|
}
|
|
}
|
|
|
|
src->add(" = ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, shaderContext->typeTracker.defaultDataType, numWrittenElements);
|
|
src->add("(objectPayload.vertexOut[");
|
|
if (texInstruction->textureFetch.srcSel[0] >= 4)
|
|
cemu_assert_unimplemented();
|
|
if (texInstruction->textureFetch.srcSel[1] >= 4)
|
|
cemu_assert_unimplemented();
|
|
src->add("vertexIndex");
|
|
src->addFmt("].passParameterSem{}.", texInstruction->textureFetch.offset/16);
|
|
|
|
|
|
for(sint32 f=0; f<4; f++)
|
|
{
|
|
if( texInstruction->dstSel[f] < 4 )
|
|
{
|
|
src->add(resultElemTable[texInstruction->dstSel[f]]);
|
|
}
|
|
else if( texInstruction->dstSel[f] == 7 )
|
|
{
|
|
// masked and not written
|
|
}
|
|
else
|
|
{
|
|
cemu_assert_unimplemented();
|
|
}
|
|
}
|
|
src->add(")");
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, shaderContext->typeTracker.defaultDataType);
|
|
src->add(";" _CRLF);
|
|
}
|
|
|
|
static sint32 _writeDestMaskXYZW(LatteDecompilerShaderContext* shaderContext, sint8* dstSel)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
const char* resultElemTable[4] = { "x","y","z","w" };
|
|
sint32 numWrittenElements = 0;
|
|
for (sint32 f = 0; f < 4; f++)
|
|
{
|
|
if (dstSel[f] < 4)
|
|
{
|
|
src->add(resultElemTable[f]);
|
|
numWrittenElements++;
|
|
}
|
|
else if (dstSel[f] == 7)
|
|
{
|
|
// masked and not written
|
|
}
|
|
else
|
|
{
|
|
cemu_assert_unimplemented();
|
|
}
|
|
}
|
|
return numWrittenElements;
|
|
}
|
|
|
|
static void _emitTEXVFetchCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction)
|
|
{
|
|
// handle special case where geometry shader reads input attributes from vertex shader via ringbuffer
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
if( texInstruction->textureFetch.textureIndex == 0x9F && shaderContext->shaderType == LatteConst::ShaderType::Geometry )
|
|
{
|
|
_emitGSReadInputVFetchCode(shaderContext, texInstruction);
|
|
return;
|
|
}
|
|
|
|
src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr));
|
|
src->add(".");
|
|
|
|
_writeDestMaskXYZW(shaderContext, texInstruction->dstSel);
|
|
const char* resultElemTable[4] = {"x","y","z","w"};
|
|
uint32 numWrittenElements = 0;
|
|
for (sint32 f=0; f<4; f++)
|
|
{
|
|
if (texInstruction->dstSel[f] < 4)
|
|
numWrittenElements++;
|
|
}
|
|
|
|
src->add(" = ");
|
|
|
|
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
{
|
|
if (numWrittenElements == 1)
|
|
src->add("as_type<int>(");
|
|
else
|
|
src->addFmt("as_type<int{}>(", numWrittenElements);
|
|
}
|
|
else
|
|
src->add("(");
|
|
|
|
src->addFmt("ubuff{}.d[", texInstruction->textureFetch.textureIndex - 0x80);
|
|
|
|
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
src->addFmt("{}.{}", _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]]);
|
|
else
|
|
src->addFmt("as_type<int>({}.{})", _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]]);
|
|
src->add("].");
|
|
|
|
|
|
for (sint32 f=0; f<4; f++)
|
|
{
|
|
if (texInstruction->dstSel[f] < 4)
|
|
{
|
|
src->add(resultElemTable[texInstruction->dstSel[f]]);
|
|
}
|
|
else if (texInstruction->dstSel[f] == 7)
|
|
{
|
|
// masked and not written
|
|
}
|
|
else
|
|
{
|
|
debugBreakpoint();
|
|
}
|
|
}
|
|
src->add(");" _CRLF);
|
|
}
|
|
|
|
static void _emitTEXReadMemCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerTEXInstruction* texInstruction)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
src->add(_getRegisterVarName(shaderContext, texInstruction->dstGpr));
|
|
src->add(".");
|
|
sint32 count = _writeDestMaskXYZW(shaderContext, texInstruction->dstSel);
|
|
|
|
src->add(" = ");
|
|
|
|
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
{
|
|
if (count == 1)
|
|
src->add("as_type<int>(");
|
|
else
|
|
src->addFmt("as_type<int{}>(", count);
|
|
}
|
|
else
|
|
src->add("(");
|
|
|
|
sint32 readCount;
|
|
|
|
if (texInstruction->memRead.format == FMT_32_FLOAT)
|
|
{
|
|
readCount = 1;
|
|
// todo
|
|
src->add("0.0");
|
|
}
|
|
else if (texInstruction->memRead.format == FMT_32_32_FLOAT)
|
|
{
|
|
readCount = 2;
|
|
// todo
|
|
src->add("float2(0.0,0.0)");
|
|
}
|
|
else if (texInstruction->memRead.format == FMT_32_32_32_FLOAT)
|
|
{
|
|
readCount = 3;
|
|
// todo
|
|
src->add("float3(0.0,0.0,0.0)");
|
|
}
|
|
else
|
|
{
|
|
cemu_assert_unimplemented();
|
|
}
|
|
|
|
if (count < readCount)
|
|
{
|
|
if (count == 1)
|
|
src->add(".x");
|
|
else if (count == 2)
|
|
src->add(".xy");
|
|
else if (count == 3)
|
|
src->add(".xyz");
|
|
}
|
|
src->add(");" _CRLF);
|
|
}
|
|
|
|
static void _emitTEXClauseCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction)
|
|
{
|
|
cemu_assert_debug(cfInstruction->instructionsALU.empty());
|
|
for(auto& texInstruction : cfInstruction->instructionsTEX)
|
|
{
|
|
if( texInstruction.opcode == GPU7_TEX_INST_SAMPLE || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_L || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LB || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_LZ || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_L || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_C_LZ || texInstruction.opcode == GPU7_TEX_INST_FETCH4 || texInstruction.opcode == GPU7_TEX_INST_SAMPLE_G || texInstruction.opcode == GPU7_TEX_INST_LD )
|
|
_emitTEXSampleTextureCode(shaderContext, &texInstruction);
|
|
else if( texInstruction.opcode == GPU7_TEX_INST_GET_TEXTURE_RESINFO )
|
|
_emitTEXGetTextureResInfoCode(shaderContext, &texInstruction);
|
|
else if( texInstruction.opcode == GPU7_TEX_INST_GET_COMP_TEX_LOD )
|
|
_emitTEXGetCompTexLodCode(shaderContext, &texInstruction);
|
|
else if( texInstruction.opcode == GPU7_TEX_INST_SET_CUBEMAP_INDEX )
|
|
_emitTEXSetCubemapIndexCode(shaderContext, &texInstruction);
|
|
else if (texInstruction.opcode == GPU7_TEX_INST_GET_GRADIENTS_H ||
|
|
texInstruction.opcode == GPU7_TEX_INST_GET_GRADIENTS_V)
|
|
_emitTEXGetGradientsHV(shaderContext, &texInstruction);
|
|
else if (texInstruction.opcode == GPU7_TEX_INST_SET_GRADIENTS_H ||
|
|
texInstruction.opcode == GPU7_TEX_INST_SET_GRADIENTS_V)
|
|
_emitTEXSetGradientsHV(shaderContext, &texInstruction);
|
|
else if (texInstruction.opcode == GPU7_TEX_INST_VFETCH)
|
|
_emitTEXVFetchCode(shaderContext, &texInstruction);
|
|
else if (texInstruction.opcode == GPU7_TEX_INST_MEM)
|
|
_emitTEXReadMemCode(shaderContext, &texInstruction);
|
|
else
|
|
cemu_assert_unimplemented();
|
|
}
|
|
}
|
|
|
|
// generate the code for reading the source input GPR (or constants) for exports
|
|
static void _emitExportGPRReadCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, sint32 requiredType, uint32 burstIndex)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
uint32 numOutputs = 4;
|
|
if( cfInstruction->type == GPU7_CF_INST_MEM_RING_WRITE )
|
|
{
|
|
numOutputs = (cfInstruction->memWriteCompMask&1)?1:0;
|
|
numOutputs += (cfInstruction->memWriteCompMask&2)?1:0;
|
|
numOutputs += (cfInstruction->memWriteCompMask&4)?1:0;
|
|
numOutputs += (cfInstruction->memWriteCompMask&8)?1:0;
|
|
}
|
|
if (requiredType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
{
|
|
if(numOutputs == 1)
|
|
src->add("float(");
|
|
else
|
|
src->addFmt("float{}(", numOutputs);
|
|
}
|
|
else if (requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
{
|
|
if (numOutputs == 1)
|
|
src->add("int(");
|
|
else
|
|
src->addFmt("int{}(", numOutputs);
|
|
}
|
|
else
|
|
cemu_assert_unimplemented();
|
|
sint32 actualOutputs = 0;
|
|
for(sint32 i=0; i<4; i++)
|
|
{
|
|
// todo: Use type of register element based on information from type tracker (currently we assume it's always a signed integer)
|
|
uint32 exportSel = 0;
|
|
if( cfInstruction->type == GPU7_CF_INST_MEM_RING_WRITE )
|
|
{
|
|
exportSel = i;
|
|
if( (cfInstruction->memWriteCompMask&(1<<i)) == 0 )
|
|
continue; // dont output
|
|
}
|
|
else
|
|
{
|
|
exportSel = cfInstruction->exportComponentSel[i];
|
|
}
|
|
if( actualOutputs > 0 )
|
|
src->add(", ");
|
|
actualOutputs++;
|
|
if( exportSel < 4 )
|
|
{
|
|
_emitRegisterAccessCode(shaderContext, cfInstruction->exportSourceGPR+burstIndex, exportSel, -1, -1, -1, requiredType);
|
|
}
|
|
else if (exportSel == 4)
|
|
{
|
|
// constant zero
|
|
src->add("0");
|
|
}
|
|
else if (exportSel == 5)
|
|
{
|
|
// constant one
|
|
src->add("1.0");
|
|
}
|
|
else if( exportSel == 7 )
|
|
{
|
|
// element masked (which means 0 is exported?)
|
|
src->add("0");
|
|
}
|
|
else
|
|
{
|
|
cemu_assert_debug(false);
|
|
src->add("0");
|
|
}
|
|
}
|
|
if( requiredType == LATTE_DECOMPILER_DTYPE_FLOAT )
|
|
src->add(")");
|
|
else if( requiredType == LATTE_DECOMPILER_DTYPE_SIGNED_INT )
|
|
src->add(")");
|
|
else
|
|
cemu_assert_unimplemented();
|
|
}
|
|
|
|
static void _emitExportCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
src->add("// export" _CRLF);
|
|
if(shaderContext->shaderType == LatteConst::ShaderType::Vertex )
|
|
{
|
|
if (!shaderContext->contextRegistersNew->IsRasterizationEnabled())
|
|
{
|
|
src->add("// Rasterization disabled" _CRLF);
|
|
return;
|
|
}
|
|
|
|
if( cfInstruction->exportBurstCount != 0 )
|
|
debugBreakpoint();
|
|
if (cfInstruction->exportType == 1 && cfInstruction->exportArrayBase == GPU7_DECOMPILER_CF_EXPORT_BASE_POSITION)
|
|
{
|
|
// export position
|
|
// GX2 special state 0 disables rasterizer viewport offset and scaling (probably, exact mechanism is not known). Handle this here
|
|
bool hasAnyViewportScaleDisabled =
|
|
!shaderContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_X_SCALE_ENA() ||
|
|
!shaderContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Y_SCALE_ENA() ||
|
|
!shaderContext->contextRegistersNew->PA_CL_VTE_CNTL.get_VPORT_Z_SCALE_ENA();
|
|
|
|
if (hasAnyViewportScaleDisabled)
|
|
{
|
|
src->add("float4 finalPos = ");
|
|
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0);
|
|
src->add(";" _CRLF);
|
|
src->add("finalPos.xy = finalPos.xy * supportBuffer.windowSpaceToClipSpaceTransform - float2(1.0,1.0);" _CRLF);
|
|
src->add("SET_POSITION(finalPos);");
|
|
}
|
|
else
|
|
{
|
|
src->add("SET_POSITION(");
|
|
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0);
|
|
src->add(");" _CRLF);
|
|
}
|
|
}
|
|
else if (cfInstruction->exportType == 1 && cfInstruction->exportArrayBase == GPU7_DECOMPILER_CF_EXPORT_POINT_SIZE )
|
|
{
|
|
// export gl_PointSize
|
|
if (shaderContext->analyzer.outputPointSize)
|
|
{
|
|
cemu_assert_debug(shaderContext->analyzer.writesPointSize);
|
|
src->add("out.pointSize = (");
|
|
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0);
|
|
src->add(").x");
|
|
src->add(";" _CRLF);
|
|
}
|
|
}
|
|
else if( cfInstruction->exportType == 2 && cfInstruction->exportArrayBase < 32 )
|
|
{
|
|
// export parameter
|
|
sint32 paramIndex = cfInstruction->exportArrayBase;
|
|
uint32 vsSemanticId = _getVertexShaderOutParamSemanticId(shaderContext->contextRegisters, paramIndex);
|
|
if (vsSemanticId != 0xFF)
|
|
{
|
|
src->addFmt("out.passParameterSem{} = ", vsSemanticId);
|
|
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else
|
|
{
|
|
src->add("// skipped export to semanticId 255" _CRLF);
|
|
}
|
|
}
|
|
else
|
|
cemu_assert_unimplemented();
|
|
}
|
|
else if(shaderContext->shaderType == LatteConst::ShaderType::Pixel )
|
|
{
|
|
if( cfInstruction->exportType == 0 && cfInstruction->exportArrayBase < 8 )
|
|
{
|
|
for(uint32 i=0; i<(cfInstruction->exportBurstCount+1); i++)
|
|
{
|
|
sint32 pixelColorOutputIndex = LatteDecompiler_getColorOutputIndexFromExportIndex(shaderContext, cfInstruction->exportArrayBase+i);
|
|
// if color output is for target 0, then also handle alpha test
|
|
bool alphaTestEnable = shaderContext->contextRegistersNew->SX_ALPHA_TEST_CONTROL.get_ALPHA_TEST_ENABLE();
|
|
auto alphaTestFunc = shaderContext->contextRegistersNew->SX_ALPHA_TEST_CONTROL.get_ALPHA_FUNC();
|
|
if( pixelColorOutputIndex == 0 && alphaTestEnable && alphaTestFunc == Latte::E_COMPAREFUNC::NEVER )
|
|
{
|
|
// never pass alpha test
|
|
src->add("discard_fragment();" _CRLF);
|
|
}
|
|
else if( pixelColorOutputIndex == 0 && alphaTestEnable && alphaTestFunc != Latte::E_COMPAREFUNC::ALWAYS)
|
|
{
|
|
src->add("if( ((");
|
|
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i);
|
|
src->add(").a ");
|
|
|
|
switch( alphaTestFunc )
|
|
{
|
|
case Latte::E_COMPAREFUNC::LESS:
|
|
src->add("<");
|
|
break;
|
|
case Latte::E_COMPAREFUNC::EQUAL:
|
|
src->add("==");
|
|
break;
|
|
case Latte::E_COMPAREFUNC::LEQUAL:
|
|
src->add("<=");
|
|
break;
|
|
case Latte::E_COMPAREFUNC::GREATER:
|
|
src->add(">");
|
|
break;
|
|
case Latte::E_COMPAREFUNC::NOTEQUAL:
|
|
src->add("!=");
|
|
break;
|
|
case Latte::E_COMPAREFUNC::GEQUAL:
|
|
src->add(">=");
|
|
break;
|
|
}
|
|
src->add(" supportBuffer.alphaTestRef");
|
|
src->add(") == false) discard_fragment();" _CRLF);
|
|
}
|
|
// pixel color output
|
|
auto dataType = GetColorBufferDataType(pixelColorOutputIndex, *shaderContext->contextRegistersNew);
|
|
if (dataType != MetalDataType::NONE)
|
|
{
|
|
src->addFmt("out.passPixelColor{} = as_type<{}>(", pixelColorOutputIndex, GetDataTypeStr(dataType));
|
|
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, i);
|
|
src->add(");" _CRLF);
|
|
}
|
|
|
|
if( cfInstruction->exportArrayBase+i >= 8 )
|
|
cemu_assert_unimplemented();
|
|
}
|
|
}
|
|
else if( cfInstruction->exportType == 0 && cfInstruction->exportArrayBase == 61 )
|
|
{
|
|
// pixel depth or gl_FragStencilRefARB
|
|
if( cfInstruction->exportBurstCount > 0 )
|
|
cemu_assert_unimplemented();
|
|
|
|
if (cfInstruction->exportComponentSel[0] == 7)
|
|
{
|
|
cemu_assert_unimplemented(); // gl_FragDepth ?
|
|
}
|
|
if (cfInstruction->exportComponentSel[1] != 7)
|
|
{
|
|
cemu_assert_unimplemented(); // exporting to gl_FragStencilRefARB
|
|
}
|
|
if (cfInstruction->exportComponentSel[2] != 7)
|
|
{
|
|
cemu_assert_unimplemented(); // ukn
|
|
}
|
|
if (cfInstruction->exportComponentSel[3] != 7)
|
|
{
|
|
cemu_assert_unimplemented(); // ukn
|
|
}
|
|
|
|
if (!shaderContext->shader->depthMask)
|
|
return;
|
|
|
|
src->add("out.passDepth = ");
|
|
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, 0);
|
|
src->add(".x");
|
|
src->add(";" _CRLF);
|
|
}
|
|
else
|
|
cemu_assert_unimplemented();
|
|
}
|
|
}
|
|
|
|
static void _emitXYZWByMask(StringBuf* src, uint32 mask)
|
|
{
|
|
if( (mask&(1<<0)) != 0 )
|
|
src->add("x");
|
|
if( (mask&(1<<1)) != 0 )
|
|
src->add("y");
|
|
if( (mask&(1<<2)) != 0 )
|
|
src->add("z");
|
|
if( (mask&(1<<3)) != 0 )
|
|
src->add("w");
|
|
}
|
|
|
|
static void _emitCFRingWriteCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
// calculate parameter output (based on ring buffer output offset relative to GS unit)
|
|
uint32 bytesPerVertex = shaderContext->contextRegisters[mmSQ_GS_VERT_ITEMSIZE] * 4;
|
|
bytesPerVertex = std::max(bytesPerVertex, (uint32)1); // avoid division by zero
|
|
uint32 parameterOffset = ((cfInstruction->exportArrayBase * 4) % bytesPerVertex);
|
|
// for geometry shaders with streamout, MEM_RING_WRITE is used to pass the data to the copy shader, which then uses STREAM*_WRITE
|
|
if (shaderContext->shaderType == LatteConst::ShaderType::Geometry && shaderContext->analyzer.hasStreamoutEnable)
|
|
{
|
|
// if streamout is enabled, we generate transform feedback output code instead of the normal gs output
|
|
for (uint32 burstIndex = 0; burstIndex < (cfInstruction->exportBurstCount + 1); burstIndex++)
|
|
{
|
|
parameterOffset = ((cfInstruction->exportArrayBase * 4 + burstIndex*0x10) % bytesPerVertex);
|
|
// find matching stream write in copy shader
|
|
LatteGSCopyShaderStreamWrite_t* streamWrite = nullptr;
|
|
for (auto& it : shaderContext->parsedGSCopyShader->list_streamWrites)
|
|
{
|
|
if (it.offset == parameterOffset)
|
|
{
|
|
streamWrite = ⁢
|
|
break;
|
|
}
|
|
}
|
|
if (streamWrite == nullptr)
|
|
{
|
|
cemu_assert_suspicious();
|
|
return;
|
|
}
|
|
|
|
for (sint32 i = 0; i < 4; i++)
|
|
{
|
|
if ((cfInstruction->memWriteCompMask&(1 << i)) == 0)
|
|
continue;
|
|
|
|
uint32 u32Offset = streamWrite->exportArrayBase + i;
|
|
src->addFmt("sb[sbBase{} + {}]", streamWrite->bufferIndex, u32Offset);
|
|
|
|
src->add(" = ");
|
|
|
|
_emitTypeConversionPrefixMSL(shaderContext, shaderContext->typeTracker.defaultDataType, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
|
|
src->addFmt("{}.", _getRegisterVarName(shaderContext, cfInstruction->exportSourceGPR+burstIndex));
|
|
if (i == 0)
|
|
src->add("x");
|
|
else if (i == 1)
|
|
src->add("y");
|
|
else if (i == 2)
|
|
src->add("z");
|
|
else if (i == 3)
|
|
src->add("w");
|
|
|
|
_emitTypeConversionSuffixMSL(shaderContext, shaderContext->typeTracker.defaultDataType, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
|
|
src->add(";" _CRLF);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (shaderContext->shaderType == LatteConst::ShaderType::Vertex)
|
|
{
|
|
if (!shaderContext->contextRegistersNew->IsRasterizationEnabled())
|
|
{
|
|
src->add("// Rasterization disabled" _CRLF);
|
|
return;
|
|
}
|
|
|
|
if (cfInstruction->memWriteElemSize != 3)
|
|
cemu_assert_unimplemented();
|
|
if ((cfInstruction->exportArrayBase & 3) != 0)
|
|
cemu_assert_unimplemented();
|
|
for (sint32 burstIndex = 0; burstIndex < (sint32)(cfInstruction->exportBurstCount + 1); burstIndex++)
|
|
{
|
|
src->addFmt("out.passParameterSem{}.", (cfInstruction->exportArrayBase) / 4 + burstIndex);
|
|
_emitXYZWByMask(src, cfInstruction->memWriteCompMask);
|
|
src->addFmt(" = ");
|
|
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_SIGNED_INT, burstIndex);
|
|
src->add(";" _CRLF);
|
|
}
|
|
}
|
|
else if (shaderContext->shaderType == LatteConst::ShaderType::Geometry)
|
|
{
|
|
cemu_assert_debug(cfInstruction->memWriteElemSize == 3);
|
|
//if (cfInstruction->memWriteElemSize != 3)
|
|
// debugBreakpoint();
|
|
cemu_assert_debug((cfInstruction->exportArrayBase & 3) == 0);
|
|
|
|
for (uint32 burstIndex = 0; burstIndex < (cfInstruction->exportBurstCount + 1); burstIndex++)
|
|
{
|
|
uint32 parameterExportType = 0;
|
|
uint32 parameterExportBase = 0;
|
|
if (LatteGSCopyShaderParser_getExportTypeByOffset(shaderContext->parsedGSCopyShader, parameterOffset + burstIndex * (cfInstruction->memWriteElemSize+1)*4, ¶meterExportType, ¶meterExportBase) == false)
|
|
{
|
|
cemu_assert_debug(false);
|
|
shaderContext->hasError = true;
|
|
return;
|
|
}
|
|
|
|
if (parameterExportType == 1 && parameterExportBase == GPU7_DECOMPILER_CF_EXPORT_BASE_POSITION)
|
|
{
|
|
src->add("{" _CRLF);
|
|
src->addFmt("float4 pos = float4(0.0,0.0,0.0,1.0);" _CRLF);
|
|
src->addFmt("pos.");
|
|
_emitXYZWByMask(src, cfInstruction->memWriteCompMask);
|
|
src->addFmt(" = ");
|
|
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, burstIndex);
|
|
src->add(";" _CRLF);
|
|
src->add("SET_POSITION(pos);" _CRLF);
|
|
src->add("}" _CRLF);
|
|
}
|
|
else if (parameterExportType == 2 && parameterExportBase < 16)
|
|
{
|
|
src->addFmt("out.passParameterSem{}.", parameterExportBase);
|
|
_emitXYZWByMask(src, cfInstruction->memWriteCompMask);
|
|
src->addFmt(" = ");
|
|
_emitExportGPRReadCode(shaderContext, cfInstruction, LATTE_DECOMPILER_DTYPE_FLOAT, burstIndex);
|
|
src->add(";" _CRLF);
|
|
}
|
|
else
|
|
cemu_assert_debug(false);
|
|
}
|
|
}
|
|
else
|
|
debugBreakpoint(); // todo
|
|
}
|
|
|
|
static void _emitStreamWriteCode(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
if (shaderContext->analyzer.hasStreamoutEnable == false)
|
|
{
|
|
#ifdef CEMU_DEBUG_ASSERT
|
|
src->add("// omitted streamout write" _CRLF);
|
|
#endif
|
|
return;
|
|
}
|
|
uint32 streamoutBufferIndex;
|
|
if (cfInstruction->type == GPU7_CF_INST_MEM_STREAM0_WRITE)
|
|
streamoutBufferIndex = 0;
|
|
else if (cfInstruction->type == GPU7_CF_INST_MEM_STREAM1_WRITE)
|
|
streamoutBufferIndex = 1;
|
|
else
|
|
cemu_assert_unimplemented();
|
|
|
|
if (shaderContext->shaderType == LatteConst::ShaderType::Vertex)
|
|
{
|
|
uint32 arraySize = cfInstruction->memWriteArraySize + 1;
|
|
|
|
for (sint32 i = 0; i < (sint32)arraySize; i++)
|
|
{
|
|
if ((cfInstruction->memWriteCompMask&(1 << i)) == 0)
|
|
continue;
|
|
|
|
uint32 u32Offset = cfInstruction->exportArrayBase + i;
|
|
src->addFmt("sb[sbBase{} + {}]", streamoutBufferIndex, u32Offset);
|
|
|
|
src->add(" = ");
|
|
|
|
_emitTypeConversionPrefixMSL(shaderContext, shaderContext->typeTracker.defaultDataType, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
|
|
src->add(_getRegisterVarName(shaderContext, cfInstruction->exportSourceGPR));
|
|
_appendChannelAccess(src, i);
|
|
_emitTypeConversionSuffixMSL(shaderContext, shaderContext->typeTracker.defaultDataType, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
|
|
|
|
src->add(";" _CRLF);
|
|
}
|
|
}
|
|
else
|
|
cemu_assert_debug(false);
|
|
}
|
|
|
|
static void _emitCFCall(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
uint32 subroutineAddr = cfInstruction->addr;
|
|
LatteDecompilerSubroutineInfo* subroutineInfo = nullptr;
|
|
// find subroutine
|
|
for (auto& subroutineItr : shaderContext->list_subroutines)
|
|
{
|
|
if (subroutineItr.cfAddr == subroutineAddr)
|
|
{
|
|
subroutineInfo = &subroutineItr;
|
|
break;
|
|
}
|
|
}
|
|
if (subroutineInfo == nullptr)
|
|
{
|
|
cemu_assert_debug(false);
|
|
return;
|
|
}
|
|
// inline function
|
|
if (shaderContext->isSubroutine)
|
|
{
|
|
cemu_assert_debug(false); // inlining with cascaded function calls not supported
|
|
return;
|
|
}
|
|
// init CF stack variables
|
|
src->addFmt("activeMaskStackSub{:04x}[0] = true;" _CRLF, subroutineInfo->cfAddr);
|
|
src->addFmt("activeMaskStackCSub{:04x}[0] = true;" _CRLF, subroutineInfo->cfAddr);
|
|
src->addFmt("activeMaskStackCSub{:04x}[1] = true;" _CRLF, subroutineInfo->cfAddr);
|
|
|
|
shaderContext->isSubroutine = true;
|
|
shaderContext->subroutineInfo = subroutineInfo;
|
|
for(auto& cfInstruction : subroutineInfo->instructions)
|
|
LatteDecompiler_emitClauseCodeMSL(shaderContext, &cfInstruction, true);
|
|
shaderContext->isSubroutine = false;
|
|
shaderContext->subroutineInfo = nullptr;
|
|
}
|
|
|
|
void LatteDecompiler_emitClauseCodeMSL(LatteDecompilerShaderContext* shaderContext, LatteDecompilerCFInstruction* cfInstruction, bool isSubroutine)
|
|
{
|
|
StringBuf* src = shaderContext->shaderSource;
|
|
|
|
if( cfInstruction->type == GPU7_CF_INST_ALU || cfInstruction->type == GPU7_CF_INST_ALU_PUSH_BEFORE || cfInstruction->type == GPU7_CF_INST_ALU_POP_AFTER || cfInstruction->type == GPU7_CF_INST_ALU_POP2_AFTER || cfInstruction->type == GPU7_CF_INST_ALU_BREAK || cfInstruction->type == GPU7_CF_INST_ALU_ELSE_AFTER )
|
|
{
|
|
// emit ALU code
|
|
if (shaderContext->analyzer.modifiesPixelActiveState)
|
|
{
|
|
if(cfInstruction->type == GPU7_CF_INST_ALU_PUSH_BEFORE)
|
|
src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1 - 1));
|
|
else
|
|
src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1));
|
|
}
|
|
if (cfInstruction->type == GPU7_CF_INST_ALU_PUSH_BEFORE)
|
|
{
|
|
src->addFmt("{} = {};" _CRLF, _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth-1));
|
|
src->addFmt("{} = {};" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth));
|
|
}
|
|
_emitALUClauseCode(shaderContext, cfInstruction);
|
|
if( shaderContext->analyzer.modifiesPixelActiveState )
|
|
src->add("}" _CRLF);
|
|
cemu_assert_debug(!(shaderContext->analyzer.modifiesPixelActiveState == false && cfInstruction->type != GPU7_CF_INST_ALU));
|
|
// handle ELSE case of PUSH_BEFORE
|
|
if( cfInstruction->type == GPU7_CF_INST_ALU_PUSH_BEFORE )
|
|
{
|
|
src->add("else {" _CRLF);
|
|
src->addFmt("{} = false;" _CRLF, _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth));
|
|
src->addFmt("{} = false;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1));
|
|
src->add("}" _CRLF);
|
|
}
|
|
// post clause handler
|
|
if( cfInstruction->type == GPU7_CF_INST_ALU_POP_AFTER )
|
|
{
|
|
src->addFmt("{} = {} == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1 - 1), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth - 1), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth - 1));
|
|
}
|
|
else if( cfInstruction->type == GPU7_CF_INST_ALU_POP2_AFTER )
|
|
{
|
|
src->addFmt("{} = {} == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1 - 2), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth - 2), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth - 2));
|
|
}
|
|
else if( cfInstruction->type == GPU7_CF_INST_ALU_ELSE_AFTER )
|
|
{
|
|
// no condition test
|
|
// pop stack
|
|
if( cfInstruction->popCount != 0 )
|
|
debugBreakpoint();
|
|
// else operation
|
|
src->addFmt("{} = {} == false;" _CRLF, _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth));
|
|
src->addFmt("{} = {} == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth));
|
|
}
|
|
}
|
|
else if( cfInstruction->type == GPU7_CF_INST_TEX )
|
|
{
|
|
// emit TEX code
|
|
if (shaderContext->analyzer.modifiesPixelActiveState)
|
|
{
|
|
src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth+1));
|
|
}
|
|
_emitTEXClauseCode(shaderContext, cfInstruction);
|
|
if (shaderContext->analyzer.modifiesPixelActiveState)
|
|
{
|
|
src->add("}" _CRLF);
|
|
}
|
|
}
|
|
else if( cfInstruction->type == GPU7_CF_INST_EXPORT || cfInstruction->type == GPU7_CF_INST_EXPORT_DONE )
|
|
{
|
|
// emit export code
|
|
_emitExportCode(shaderContext, cfInstruction);
|
|
}
|
|
else if( cfInstruction->type == GPU7_CF_INST_ELSE )
|
|
{
|
|
// todo: Condition test, popCount?
|
|
src->addFmt("{} = {} == false;" _CRLF, _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth));
|
|
src->addFmt("{} = {} == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth));
|
|
}
|
|
else if( cfInstruction->type == GPU7_CF_INST_POP )
|
|
{
|
|
src->addFmt("{} = {} == true && {} == true;" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1 - cfInstruction->popCount), _getActiveMaskVarName(shaderContext, cfInstruction->activeStackDepth - cfInstruction->popCount), _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth - cfInstruction->popCount));
|
|
}
|
|
else if( cfInstruction->type == GPU7_CF_INST_LOOP_START_DX10 ||
|
|
cfInstruction->type == GPU7_CF_INST_LOOP_START_NO_AL)
|
|
{
|
|
// start of loop
|
|
// if pixel is disabled, then skip loop
|
|
if (ActiveSettings::ShaderPreventInfiniteLoopsEnabled())
|
|
{
|
|
// with iteration limit to prevent infinite loops
|
|
src->addFmt("int loopCounter{} = 0;" _CRLF, (sint32)cfInstruction->cfAddr);
|
|
src->addFmt("while( {} == true && loopCounter{} < 500 )" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1), (sint32)cfInstruction->cfAddr);
|
|
src->add("{" _CRLF);
|
|
src->addFmt("loopCounter{}++;" _CRLF, (sint32)cfInstruction->cfAddr);
|
|
}
|
|
else
|
|
{
|
|
src->addFmt("while( {} == true )" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1));
|
|
src->add("{" _CRLF);
|
|
}
|
|
}
|
|
else if( cfInstruction->type == GPU7_CF_INST_LOOP_END )
|
|
{
|
|
// this might not always work
|
|
if( cfInstruction->popCount != 0 )
|
|
debugBreakpoint();
|
|
src->add("}" _CRLF);
|
|
}
|
|
else if( cfInstruction->type == GPU7_CF_INST_LOOP_BREAK )
|
|
{
|
|
if( cfInstruction->popCount != 0 )
|
|
debugBreakpoint();
|
|
if (shaderContext->analyzer.modifiesPixelActiveState)
|
|
{
|
|
src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1));
|
|
}
|
|
// note: active stack level is set to the same level as the loop begin. popCount is ignored
|
|
src->add("break;" _CRLF);
|
|
|
|
if (shaderContext->analyzer.modifiesPixelActiveState)
|
|
src->add("}" _CRLF);
|
|
|
|
}
|
|
else if( cfInstruction->type == GPU7_CF_INST_MEM_STREAM0_WRITE ||
|
|
cfInstruction->type == GPU7_CF_INST_MEM_STREAM1_WRITE )
|
|
{
|
|
_emitStreamWriteCode(shaderContext, cfInstruction);
|
|
}
|
|
else if( cfInstruction->type == GPU7_CF_INST_MEM_RING_WRITE )
|
|
{
|
|
_emitCFRingWriteCode(shaderContext, cfInstruction);
|
|
}
|
|
else if( cfInstruction->type == GPU7_CF_INST_EMIT_VERTEX )
|
|
{
|
|
if( shaderContext->analyzer.modifiesPixelActiveState )
|
|
src->addFmt("if( {} == true ) {{" _CRLF, _getActiveMaskCVarName(shaderContext, cfInstruction->activeStackDepth + 1));
|
|
// write point size
|
|
if (shaderContext->analyzer.outputPointSize && shaderContext->analyzer.writesPointSize == false)
|
|
src->add("out.pointSize = supportBuffer.pointSize;" _CRLF);
|
|
src->add("mesh.set_vertex(vertexIndex, out);" _CRLF);
|
|
src->add("vertexIndex++;" _CRLF);
|
|
// increment transform feedback pointer
|
|
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
|
|
{
|
|
if (!shaderContext->output->streamoutBufferWriteMask[i])
|
|
continue;
|
|
cemu_assert_debug((shaderContext->output->streamoutBufferStride[i] & 3) == 0);
|
|
src->addFmt("sbBase{} += {};" _CRLF, i, shaderContext->output->streamoutBufferStride[i] / 4);
|
|
}
|
|
|
|
if( shaderContext->analyzer.modifiesPixelActiveState )
|
|
src->add("}" _CRLF);
|
|
}
|
|
else if (cfInstruction->type == GPU7_CF_INST_CALL)
|
|
{
|
|
_emitCFCall(shaderContext, cfInstruction);
|
|
}
|
|
else if (cfInstruction->type == GPU7_CF_INST_RETURN)
|
|
{
|
|
// todo (handle properly)
|
|
}
|
|
else
|
|
{
|
|
cemu_assert_debug(false);
|
|
}
|
|
}
|
|
|
|
void LatteDecompiler_emitHelperFunctions(LatteDecompilerShaderContext* shaderContext, StringBuf* fCStr_shaderSource)
|
|
{
|
|
if( shaderContext->analyzer.hasRedcCUBE )
|
|
{
|
|
fCStr_shaderSource->add("void redcCUBE(float4 src0, float4 src1, thread float3& stm, thread int& faceId)\r\n"
|
|
"{\r\n"
|
|
"// stm -> x .. s, y .. t, z .. MajorAxis*2.0\r\n"
|
|
|
|
"float3 inputCoord = normalize(float3(src1.y, src1.x, src0.x));\r\n"
|
|
|
|
"float rx = inputCoord.x;\r\n"
|
|
"float ry = inputCoord.y;\r\n"
|
|
"float rz = inputCoord.z;\r\n"
|
|
"if( abs(rx) > abs(ry) && abs(rx) > abs(rz) )\r\n"
|
|
"{\r\n"
|
|
"stm.z = rx*2.0;\r\n"
|
|
"stm.xy = float2(ry,rz); \r\n"
|
|
"if( rx >= 0.0 )\r\n"
|
|
"{\r\n"
|
|
"faceId = 0;\r\n"
|
|
"}\r\n"
|
|
"else\r\n"
|
|
"{\r\n"
|
|
"faceId = 1;\r\n"
|
|
"}\r\n"
|
|
"}\r\n"
|
|
"else if( abs(ry) > abs(rx) && abs(ry) > abs(rz) )\r\n"
|
|
"{\r\n"
|
|
"stm.z = ry*2.0;\r\n"
|
|
"stm.xy = float2(rx,rz); \r\n"
|
|
"if( ry >= 0.0 )\r\n"
|
|
"{\r\n"
|
|
"faceId = 2;\r\n"
|
|
"}\r\n"
|
|
"else\r\n"
|
|
"{\r\n"
|
|
"faceId = 3;\r\n"
|
|
"}\r\n"
|
|
"}\r\n"
|
|
"else //if( abs(rz) > abs(ry) && abs(rz) > abs(rx) )\r\n"
|
|
"{\r\n"
|
|
"stm.z = rz*2.0;\r\n"
|
|
"stm.xy = float2(rx,ry); \r\n"
|
|
"if( rz >= 0.0 )\r\n"
|
|
"{\r\n"
|
|
"faceId = 4;\r\n"
|
|
"}\r\n"
|
|
"else\r\n"
|
|
"{\r\n"
|
|
"faceId = 5;\r\n"
|
|
"}\r\n"
|
|
"}\r\n"
|
|
"}\r\n");
|
|
}
|
|
|
|
if( shaderContext->analyzer.hasCubeMapTexture )
|
|
{
|
|
fCStr_shaderSource->add("float3 redcCUBEReverse(float2 st, int faceId)\r\n"
|
|
"{\r\n"
|
|
"st.yx = st.xy;\r\n"
|
|
"float3 v;\r\n"
|
|
"float majorAxis = 1.0;\r\n"
|
|
"if( faceId == 0 )\r\n"
|
|
"{\r\n"
|
|
"v.yz = (st-float2(1.5))*(majorAxis*2.0);\r\n"
|
|
"v.x = 1.0;\r\n"
|
|
"}\r\n"
|
|
"else if( faceId == 1 )\r\n"
|
|
"{\r\n"
|
|
"v.yz = (st-float2(1.5))*(majorAxis*2.0);\r\n"
|
|
"v.x = -1.0;\r\n"
|
|
"}\r\n"
|
|
"else if( faceId == 2 )\r\n"
|
|
"{\r\n"
|
|
"v.xz = (st-float2(1.5))*(majorAxis*2.0);\r\n"
|
|
"v.y = 1.0;\r\n"
|
|
"}\r\n"
|
|
"else if( faceId == 3 )\r\n"
|
|
"{\r\n"
|
|
"v.xz = (st-float2(1.5))*(majorAxis*2.0);\r\n"
|
|
"v.y = -1.0;\r\n"
|
|
"}\r\n"
|
|
"else if( faceId == 4 )\r\n"
|
|
"{\r\n"
|
|
"v.xy = (st-float2(1.5))*(majorAxis*2.0);\r\n"
|
|
"v.z = 1.0;\r\n"
|
|
"}\r\n"
|
|
"else\r\n"
|
|
"{\r\n"
|
|
"v.xy = (st-float2(1.5))*(majorAxis*2.0);\r\n"
|
|
"v.z = -1.0;\r\n"
|
|
"}\r\n"
|
|
|
|
"return v;\r\n"
|
|
"}\r\n");
|
|
}
|
|
|
|
// Sample compare emulate
|
|
// TODO: only add when needed
|
|
// TODO: lod_options overload
|
|
// TODO: when the sampler has linear min mag filter, use gather and filter manually
|
|
// TODO: offset?
|
|
fCStr_shaderSource->add(""
|
|
"template<typename TextureT, typename CoordT>\r\n"
|
|
"float sampleCompareEmulate(TextureT tex, sampler samplr, CoordT coord, float compareValue) {\r\n"
|
|
"return compareValue < tex.sample(samplr, coord).x ? 1.0 : 0.0;\r\n"
|
|
"}\r\n"
|
|
);
|
|
|
|
// Texture calculate lod
|
|
// TODO: only add when needed
|
|
fCStr_shaderSource->add(""
|
|
"template<typename TextureT, typename CoordT>\r\n"
|
|
"float2 textureCalculateLod(TextureT tex, sampler samplr, CoordT coord) {\r\n"
|
|
"float lod = tex.calculate_unclamped_lod(samplr, coord);\r\n"
|
|
"return float2(floor(lod), fract(lod));\r\n"
|
|
"}\r\n");
|
|
|
|
// clamp
|
|
fCStr_shaderSource->add(""
|
|
"int clampFI32(int v)\r\n"
|
|
"{\r\n"
|
|
"if( v == 0x7FFFFFFF )\r\n"
|
|
" return as_type<int>(1.0);\r\n"
|
|
"else if( v == 0xFFFFFFFF )\r\n"
|
|
" return as_type<int>(0.0);\r\n"
|
|
"return as_type<int>(clamp(as_type<float>(v), 0.0, 1.0));\r\n"
|
|
"}\r\n");
|
|
|
|
// mul non-ieee way (0*NaN/INF => 0.0)
|
|
if (shaderContext->options->strictMul)
|
|
{
|
|
// things we tried:
|
|
//fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ return mix(a*b,0.0,a==0.0||b==0.0); }" STR_LINEBREAK);
|
|
//fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ return mix(vec2(a*b,0.0),vec2(0.0,0.0),(equal(vec2(a),vec2(0.0,0.0))||equal(vec2(b),vec2(0.0,0.0)))).x; }" STR_LINEBREAK);
|
|
//fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; }" STR_LINEBREAK);
|
|
//fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){float r = a*b;r = intBitsToFloat(floatBitsToInt(r)&(((floatBitsToInt(a) != 0) && (floatBitsToInt(b) != 0))?0xFFFFFFFF:0));return r;}" STR_LINEBREAK); works
|
|
|
|
// for "min" it used to be: float mul_nonIEEE(float a, float b){ return min(a*b,min(abs(a)*3.40282347E+38F,abs(b)*3.40282347E+38F)); }
|
|
|
|
if( LatteGPUState.glVendor == GLVENDOR_NVIDIA && !ActiveSettings::DumpShadersEnabled())
|
|
fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){return mix(0.0, a*b, (a != 0.0) && (b != 0.0));}" _CRLF); // compiles faster on Nvidia and also results in lower RAM usage (OpenGL)
|
|
else
|
|
fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ if( a == 0.0 || b == 0.0 ) return 0.0; return a*b; }" _CRLF);
|
|
|
|
// DXKV-like: fCStr_shaderSource->add("float mul_nonIEEE(float a, float b){ return (b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b); }" _CRLF);
|
|
}
|
|
}
|
|
|
|
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerEmitMSLHeader.hpp"
|
|
|
|
static void LatteDecompiler_emitAttributeImport(LatteDecompilerShaderContext* shaderContext, LatteParsedFetchShaderAttribute_t& attrib)
|
|
{
|
|
auto src = shaderContext->shaderSource;
|
|
|
|
static const char* dsMappingTableFloat[6] = { "int(attrDecoder.x)", "int(attrDecoder.y)", "int(attrDecoder.z)", "int(attrDecoder.w)", /*"floatBitsToInt(0.0)"*/ "0", /*"floatBitsToInt(1.0)"*/ "0x3f800000" };
|
|
static const char* dsMappingTableInt[6] = { "int(attrDecoder.x)", "int(attrDecoder.y)", "int(attrDecoder.z)", "int(attrDecoder.w)", "0", "1" };
|
|
|
|
// get register index based on vtx semantic table
|
|
uint32 attributeShaderLoc = 0xFFFFFFFF;
|
|
for (sint32 f = 0; f < 32; f++)
|
|
{
|
|
if (shaderContext->contextRegisters[mmSQ_VTX_SEMANTIC_0 + f] == attrib.semanticId)
|
|
{
|
|
attributeShaderLoc = f;
|
|
break;
|
|
}
|
|
}
|
|
if (attributeShaderLoc == 0xFFFFFFFF)
|
|
return; // attribute is not mapped to VS input
|
|
uint32 registerIndex = attributeShaderLoc + 1; // R0 is skipped
|
|
// is register used?
|
|
if ((shaderContext->analyzer.gprUseMask[registerIndex / 8] & (1 << (registerIndex % 8))) == 0)
|
|
{
|
|
src->addFmt("// skipped unused attribute for r{}" _CRLF, registerIndex);
|
|
return;
|
|
}
|
|
|
|
LatteDecompiler_emitAttributeDecodeMSL(shaderContext->shader, src, &attrib);
|
|
|
|
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
src->addFmt("{} = int4(", _getRegisterVarName(shaderContext, registerIndex));
|
|
else
|
|
src->addFmt("{} = float4(", _getRegisterVarName(shaderContext, registerIndex));
|
|
for (sint32 f = 0; f < 4; f++)
|
|
{
|
|
uint8 ds = attrib.ds[f];
|
|
if (f > 0)
|
|
src->add(", ");
|
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, shaderContext->typeTracker.defaultDataType);
|
|
if (ds >= 6)
|
|
{
|
|
cemu_assert_unimplemented();
|
|
ds = 4; // read as 0.0
|
|
}
|
|
if (attrib.nfa != 1)
|
|
{
|
|
src->add(dsMappingTableFloat[ds]);
|
|
}
|
|
else
|
|
{
|
|
src->add(dsMappingTableInt[ds]);
|
|
}
|
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_SIGNED_INT, shaderContext->typeTracker.defaultDataType);
|
|
}
|
|
src->add(");" _CRLF);
|
|
}
|
|
|
|
void LatteDecompiler_emitMSLShader(LatteDecompilerShaderContext* shaderContext, LatteDecompilerShader* shader)
|
|
{
|
|
bool isRectVertexShader = UseRectEmulation(*shaderContext->contextRegistersNew);
|
|
bool usesGeometryShader = UseGeometryShader(*shaderContext->contextRegistersNew, shaderContext->options->usesGeometryShader);
|
|
bool fetchVertexManually = (usesGeometryShader || (shaderContext->fetchShader && shaderContext->fetchShader->mtlFetchVertexManually));
|
|
|
|
StringBuf* src = new StringBuf(1024*1024*12); // reserve 12MB for generated source (we resize-to-fit at the end)
|
|
shaderContext->shaderSource = src;
|
|
|
|
// debug info
|
|
src->addFmt("// shader {:016x}" _CRLF, shaderContext->shaderBaseHash);
|
|
#ifdef CEMU_DEBUG_ASSERT
|
|
src->addFmt("// usesIntegerValues: {}" _CRLF, shaderContext->analyzer.usesIntegerValues ? "true" : "false");
|
|
src->addFmt(_CRLF);
|
|
#endif
|
|
// include metal standard library
|
|
src->add("#include <metal_stdlib>" _CRLF);
|
|
src->add("using namespace metal;" _CRLF);
|
|
// header part (definitions for inputs and outputs)
|
|
LatteDecompiler::emitHeader(shaderContext, isRectVertexShader, usesGeometryShader, fetchVertexManually);
|
|
// helper functions
|
|
LatteDecompiler_emitHelperFunctions(shaderContext, src);
|
|
const char* functionType = "";
|
|
const char* outputTypeName = "";
|
|
switch (shader->shaderType)
|
|
{
|
|
case LatteConst::ShaderType::Vertex:
|
|
if (fetchVertexManually)
|
|
{
|
|
// TODO: clean this up
|
|
// fetchVertex will modify vid in case of an object shader and an indexed draw
|
|
|
|
// Vertex buffers
|
|
std::string vertexBufferDefinitions = "#define VERTEX_BUFFER_DEFINITIONS ";
|
|
std::string vertexBuffers = "#define VERTEX_BUFFERS ";
|
|
std::string inputFetchDefinition = "VertexIn fetchVertex(";
|
|
if (usesGeometryShader)
|
|
inputFetchDefinition += "thread uint&";
|
|
else
|
|
inputFetchDefinition += "uint";
|
|
inputFetchDefinition += " vid, uint iid";
|
|
if (usesGeometryShader)
|
|
inputFetchDefinition += ", device uint* indexBuffer, uchar indexType";
|
|
inputFetchDefinition += " VERTEX_BUFFER_DEFINITIONS) {\n";
|
|
|
|
// Index buffer
|
|
if (usesGeometryShader)
|
|
{
|
|
inputFetchDefinition += "if (indexType == 1) // UShort\n";
|
|
inputFetchDefinition += "vid = ((device ushort*)indexBuffer)[vid];\n";
|
|
inputFetchDefinition += "else if (indexType == 2) // UInt\n";
|
|
inputFetchDefinition += "vid = ((device uint*)indexBuffer)[vid];\n";
|
|
}
|
|
|
|
inputFetchDefinition += "VertexIn in;\n";
|
|
for (auto& bufferGroup : shaderContext->fetchShader->bufferGroups)
|
|
{
|
|
std::optional<LatteConst::VertexFetchType2> fetchType;
|
|
|
|
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
|
|
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
|
|
uint32 bufferStride = (shaderContext->contextRegisters[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
|
|
|
|
for (sint32 j = 0; j < bufferGroup.attribCount; ++j)
|
|
{
|
|
auto& attr = bufferGroup.attrib[j];
|
|
|
|
uint32 semanticId = shaderContext->output->resourceMappingMTL.attributeMapping[attr.semanticId];
|
|
if (semanticId == (uint32)-1)
|
|
continue; // attribute not used?
|
|
|
|
std::string formatName;
|
|
uint8 componentCount = 0;
|
|
switch (GetMtlVertexFormat(attr.format))
|
|
{
|
|
case MTL::VertexFormatUChar:
|
|
formatName = "uchar";
|
|
componentCount = 1;
|
|
break;
|
|
case MTL::VertexFormatUChar2:
|
|
formatName = "uchar2";
|
|
componentCount = 2;
|
|
break;
|
|
case MTL::VertexFormatUChar3:
|
|
formatName = "uchar3";
|
|
componentCount = 3;
|
|
break;
|
|
case MTL::VertexFormatUChar4:
|
|
formatName = "uchar4";
|
|
componentCount = 4;
|
|
break;
|
|
case MTL::VertexFormatUShort:
|
|
formatName = "ushort";
|
|
componentCount = 1;
|
|
break;
|
|
case MTL::VertexFormatUShort2:
|
|
formatName = "ushort2";
|
|
componentCount = 2;
|
|
break;
|
|
case MTL::VertexFormatUShort3:
|
|
formatName = "ushort3";
|
|
componentCount = 3;
|
|
break;
|
|
case MTL::VertexFormatUShort4:
|
|
formatName = "ushort4";
|
|
componentCount = 4;
|
|
break;
|
|
case MTL::VertexFormatUInt:
|
|
formatName = "uint";
|
|
componentCount = 1;
|
|
break;
|
|
case MTL::VertexFormatUInt2:
|
|
formatName = "uint2";
|
|
componentCount = 2;
|
|
break;
|
|
case MTL::VertexFormatUInt3:
|
|
formatName = "uint3";
|
|
componentCount = 3;
|
|
break;
|
|
case MTL::VertexFormatUInt4:
|
|
formatName = "uint4";
|
|
componentCount = 4;
|
|
break;
|
|
}
|
|
|
|
// Get the fetch type
|
|
std::string fetchTypeStr;
|
|
if (attr.fetchType == LatteConst::VertexFetchType2::VERTEX_DATA)
|
|
fetchTypeStr = "vid";
|
|
else if (attr.fetchType == LatteConst::VertexFetchType2::INSTANCE_DATA)
|
|
fetchTypeStr = "iid";
|
|
else if (attr.fetchType == LatteConst::VertexFetchType2::NO_INDEX_OFFSET_DATA)
|
|
fetchTypeStr = "0"; // TODO: correct?
|
|
|
|
// Fetch the attribute
|
|
inputFetchDefinition += fmt::format("in.ATTRIBUTE_NAME{} = uint4(uint", semanticId);
|
|
if (componentCount != 1)
|
|
inputFetchDefinition += fmt::format("{}", componentCount);
|
|
inputFetchDefinition += fmt::format("(*(device {}*)", formatName);
|
|
inputFetchDefinition += fmt::format("(vertexBuffer{}", attr.attributeBufferIndex);
|
|
inputFetchDefinition += fmt::format(" + {} * {} + {}))", fetchTypeStr, bufferStride, attr.offset);
|
|
for (uint8 i = 0; i < (4 - componentCount); i++)
|
|
inputFetchDefinition += ", 0";
|
|
inputFetchDefinition += ");\n";
|
|
|
|
if (fetchType.has_value())
|
|
cemu_assert_debug(fetchType == attr.fetchType);
|
|
else
|
|
fetchType = attr.fetchType;
|
|
|
|
if (attr.fetchType == LatteConst::INSTANCE_DATA)
|
|
{
|
|
cemu_assert_debug(attr.aluDivisor == 1); // other divisor not yet supported
|
|
}
|
|
}
|
|
|
|
// TODO: fetch type
|
|
|
|
vertexBufferDefinitions += fmt::format(", device uchar* vertexBuffer{} [[buffer({})]]", bufferIndex, GET_MTL_VERTEX_BUFFER_INDEX(bufferIndex));
|
|
vertexBuffers += fmt::format(", vertexBuffer{}", bufferIndex);
|
|
}
|
|
|
|
inputFetchDefinition += "return in;\n";
|
|
inputFetchDefinition += "}\n";
|
|
|
|
src->add(vertexBufferDefinitions.c_str());
|
|
src->add("\n");
|
|
src->add(vertexBuffers.c_str());
|
|
src->add("\n");
|
|
src->add(inputFetchDefinition.c_str());
|
|
}
|
|
|
|
if (usesGeometryShader)
|
|
{
|
|
functionType = "[[object, max_total_threads_per_threadgroup(VERTICES_PER_VERTEX_PRIMITIVE), max_total_threadgroups_per_mesh_grid(1)]]";
|
|
outputTypeName = "void";
|
|
}
|
|
else
|
|
{
|
|
functionType = "vertex";
|
|
if (shaderContext->contextRegistersNew->IsRasterizationEnabled())
|
|
outputTypeName = "VertexOut";
|
|
else
|
|
outputTypeName = "void";
|
|
}
|
|
break;
|
|
case LatteConst::ShaderType::Geometry:
|
|
functionType = "[[mesh, max_total_threads_per_threadgroup(1)]]";
|
|
outputTypeName = "void";
|
|
break;
|
|
case LatteConst::ShaderType::Pixel:
|
|
functionType = "fragment";
|
|
outputTypeName = "FragmentOut";
|
|
break;
|
|
}
|
|
// start of main
|
|
src->addFmt("{} {} main0(", functionType, outputTypeName);
|
|
LatteDecompiler::emitInputs(shaderContext, isRectVertexShader, usesGeometryShader, fetchVertexManually);
|
|
src->add(") {" _CRLF);
|
|
if (fetchVertexManually && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry))
|
|
{
|
|
if (shader->shaderType == LatteConst::ShaderType::Vertex)
|
|
{
|
|
if (usesGeometryShader)
|
|
{
|
|
// Calculate the imaginary vertex id
|
|
LattePrimitiveMode vsOutPrimType = shaderContext->contextRegistersNew->VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE();
|
|
if (PrimitiveRequiresConnection(vsOutPrimType))
|
|
src->add("uint vid = tig + tid;" _CRLF);
|
|
else
|
|
src->add("uint vid = tig * VERTICES_PER_VERTEX_PRIMITIVE + tid;" _CRLF);
|
|
src->add("uint iid = vid / supportBuffer.verticesPerInstance;" _CRLF);
|
|
src->add("vid %= supportBuffer.verticesPerInstance;" _CRLF);
|
|
|
|
// Fetch the input
|
|
src->add("VertexIn in = fetchVertex(vid, iid, indexBuffer, indexType VERTEX_BUFFERS);" _CRLF);
|
|
|
|
// Output is defined as object payload
|
|
src->add("object_data VertexOut& out = objectPayload.vertexOut[tid];" _CRLF);
|
|
}
|
|
else
|
|
{
|
|
// Fetch the input
|
|
src->add("VertexIn in = fetchVertex(vid, iid VERTEX_BUFFERS);" _CRLF);
|
|
}
|
|
}
|
|
else if (shader->shaderType == LatteConst::ShaderType::Geometry)
|
|
{
|
|
src->add("GeometryOut out;" _CRLF);
|
|
// The index of the current vertex that is being emitted
|
|
src->add("uint vertexIndex = 0;" _CRLF);
|
|
}
|
|
}
|
|
|
|
if (shader->shaderType == LatteConst::ShaderType::Pixel || (shaderContext->contextRegistersNew->IsRasterizationEnabled() && !usesGeometryShader))
|
|
{
|
|
src->addFmt("{} out;" _CRLF, outputTypeName);
|
|
}
|
|
|
|
// variable definition
|
|
if (shaderContext->typeTracker.useArrayGPRs == false)
|
|
{
|
|
// each register is a separate variable
|
|
for (sint32 i = 0; i < 128; i++)
|
|
{
|
|
if (shaderContext->analyzer.usesRelativeGPRRead || (shaderContext->analyzer.gprUseMask[i / 8] & (1 << (i & 7))) != 0)
|
|
{
|
|
if (shaderContext->typeTracker.genIntReg)
|
|
src->addFmt("int4 R{}i = int4(0);" _CRLF, i);
|
|
else if (shaderContext->typeTracker.genFloatReg)
|
|
src->addFmt("float4 R{}f = float4(0.0);" _CRLF, i);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// registers are represented using a single large array
|
|
if (shaderContext->typeTracker.genIntReg)
|
|
src->addFmt("int4 Ri[128];" _CRLF);
|
|
else if (shaderContext->typeTracker.genFloatReg)
|
|
src->addFmt("float4 Rf[128];" _CRLF);
|
|
for (sint32 i = 0; i < 128; i++)
|
|
{
|
|
if (shaderContext->typeTracker.genIntReg)
|
|
src->addFmt("Ri[{}] = int4(0);" _CRLF, i);
|
|
else if (shaderContext->typeTracker.genFloatReg)
|
|
src->addFmt("Rf[{}] = float4(0.0);" _CRLF, i);
|
|
}
|
|
}
|
|
|
|
if( shader->shaderType == LatteConst::ShaderType::Vertex )
|
|
src->addFmt("uint4 attrDecoder;" _CRLF);
|
|
if (shaderContext->typeTracker.genIntReg)
|
|
src->addFmt("int backupReg0i, backupReg1i, backupReg2i, backupReg3i, backupReg4i;" _CRLF);
|
|
if (shaderContext->typeTracker.genFloatReg)
|
|
src->addFmt("float backupReg0f, backupReg1f, backupReg2f, backupReg3f, backupReg4f;" _CRLF);
|
|
if (shaderContext->typeTracker.genIntReg)
|
|
{
|
|
src->addFmt("int PV0ix = 0, PV0iy = 0, PV0iz = 0, PV0iw = 0, PV1ix = 0, PV1iy = 0, PV1iz = 0, PV1iw = 0;" _CRLF);
|
|
src->addFmt("int PS0i = 0, PS1i = 0;" _CRLF);
|
|
src->addFmt("int4 tempi = int4(0);" _CRLF);
|
|
}
|
|
if (shaderContext->typeTracker.genFloatReg)
|
|
{
|
|
src->addFmt("float PV0fx = 0.0, PV0fy = 0.0, PV0fz = 0.0, PV0fw = 0.0, PV1fx = 0.0, PV1fy = 0.0, PV1fz = 0.0, PV1fw = 0.0;" _CRLF);
|
|
src->addFmt("float PS0f = 0.0, PS1f = 0.0;" _CRLF);
|
|
src->addFmt("float4 tempf = float4(0.0);" _CRLF);
|
|
}
|
|
if (shaderContext->analyzer.hasGradientLookup)
|
|
{
|
|
src->add("float4 gradH;" _CRLF);
|
|
src->add("float4 gradV;" _CRLF);
|
|
}
|
|
src->add("float tempResultf;" _CRLF);
|
|
src->add("int tempResulti;" _CRLF);
|
|
src->add("int4 ARi = int4(0);" _CRLF);
|
|
src->add("bool predResult = true;" _CRLF);
|
|
if(shaderContext->analyzer.modifiesPixelActiveState )
|
|
{
|
|
src->addFmt("bool activeMaskStack[{}];" _CRLF, shaderContext->analyzer.activeStackMaxDepth+1);
|
|
src->addFmt("bool activeMaskStackC[{}];" _CRLF, shaderContext->analyzer.activeStackMaxDepth+2);
|
|
for (sint32 i = 0; i < shaderContext->analyzer.activeStackMaxDepth; i++)
|
|
{
|
|
src->addFmt("activeMaskStack[{}] = false;" _CRLF, i);
|
|
}
|
|
for (sint32 i = 0; i < shaderContext->analyzer.activeStackMaxDepth+1; i++)
|
|
{
|
|
src->addFmt("activeMaskStackC[{}] = false;" _CRLF, i);
|
|
}
|
|
src->addFmt("activeMaskStack[0] = true;" _CRLF);
|
|
src->addFmt("activeMaskStackC[0] = true;" _CRLF);
|
|
src->addFmt("activeMaskStackC[1] = true;" _CRLF);
|
|
// generate vars for each subroutine
|
|
for (auto& subroutineInfo : shaderContext->list_subroutines)
|
|
{
|
|
sint32 subroutineMaxStackDepth = 0;
|
|
src->addFmt("bool activeMaskStackSub{:04x}[{}];" _CRLF, subroutineInfo.cfAddr, subroutineMaxStackDepth + 1);
|
|
src->addFmt("bool activeMaskStackCSub{:04x}[{}];" _CRLF, subroutineInfo.cfAddr, subroutineMaxStackDepth + 2);
|
|
}
|
|
}
|
|
// helper variables for cube maps (todo: Only emit when used)
|
|
if (shaderContext->analyzer.hasRedcCUBE)
|
|
{
|
|
src->add("float3 cubeMapSTM;" _CRLF);
|
|
src->add("int cubeMapFaceId;" _CRLF);
|
|
}
|
|
for(sint32 i=0; i<LATTE_NUM_MAX_TEX_UNITS; i++)
|
|
{
|
|
if(!shaderContext->output->textureUnitMask[i])
|
|
continue;
|
|
if( shader->textureUnitDim[i] != Latte::E_DIM::DIM_CUBEMAP )
|
|
continue;
|
|
src->addFmt("float cubeMapArrayIndex{} = 0.0;" _CRLF, i);
|
|
}
|
|
// init base offset for streamout buffer writes
|
|
if (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry)
|
|
{
|
|
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
|
|
{
|
|
if(!shaderContext->output->streamoutBufferWriteMask[i])
|
|
continue;
|
|
|
|
cemu_assert_debug((shaderContext->output->streamoutBufferStride[i]&3) == 0);
|
|
|
|
if (shader->shaderType == LatteConst::ShaderType::Vertex) // vertex shader
|
|
src->addFmt("int sbBase{} = supportBuffer.streamoutBufferBase{}/4 + (vid + supportBuffer.verticesPerInstance * iid)*{};" _CRLF, i, i, shaderContext->output->streamoutBufferStride[i] / 4);
|
|
else // geometry shader
|
|
{
|
|
uint32 gsOutPrimType = shaderContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE];
|
|
uint32 bytesPerVertex = shaderContext->contextRegisters[mmSQ_GS_VERT_ITEMSIZE] * 4;
|
|
uint32 maxVerticesInGS = ((shaderContext->contextRegisters[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF) * 4) / bytesPerVertex;
|
|
|
|
cemu_assert_debug(gsOutPrimType == 0); // currently we only properly handle GS output primitive points
|
|
|
|
src->addFmt("int sbBase{} = supportBuffer.streamoutBufferBase{}/4 + (gl_PrimitiveIDIn * {})*{};" _CRLF, i, i, maxVerticesInGS, shaderContext->output->streamoutBufferStride[i] / 4);
|
|
}
|
|
}
|
|
|
|
}
|
|
// code to load inputs from previous stage
|
|
if( shader->shaderType == LatteConst::ShaderType::Vertex )
|
|
{
|
|
if( (shaderContext->analyzer.gprUseMask[0/8]&(1<<(0%8))) != 0 )
|
|
{
|
|
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
src->addFmt("{} = int4(vid, 0, 0, iid);" _CRLF, _getRegisterVarName(shaderContext, 0));
|
|
else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
src->addFmt("{} = float4(vid, 0, 0, iid);" _CRLF, _getRegisterVarName(shaderContext, 0)); // TODO: as_type<int4>(float4(vid, 0, 0, iid))?
|
|
else
|
|
cemu_assert_unimplemented();
|
|
}
|
|
|
|
LatteFetchShader* parsedFetchShader = shaderContext->fetchShader;
|
|
for(auto& bufferGroup : parsedFetchShader->bufferGroups)
|
|
{
|
|
for(sint32 i=0; i<bufferGroup.attribCount; i++)
|
|
LatteDecompiler_emitAttributeImport(shaderContext, bufferGroup.attrib[i]);
|
|
}
|
|
for (auto& bufferGroup : parsedFetchShader->bufferGroupsInvalid)
|
|
{
|
|
// these attributes point to non-existent buffers
|
|
// todo - figure out how the hardware actually handles this, currently we assume the input values are zero
|
|
for (sint32 i = 0; i < bufferGroup.attribCount; i++)
|
|
LatteDecompiler_emitAttributeImport(shaderContext, bufferGroup.attrib[i]);
|
|
}
|
|
}
|
|
else if (shader->shaderType == LatteConst::ShaderType::Pixel)
|
|
{
|
|
LatteShaderPSInputTable* psInputTable = LatteSHRC_GetPSInputTable();
|
|
|
|
uint32 psControl0 = shaderContext->contextRegisters[mmSPI_PS_IN_CONTROL_0];
|
|
uint32 psControl1 = shaderContext->contextRegisters[mmSPI_PS_IN_CONTROL_1];
|
|
|
|
uint32 spiInterpControl = shaderContext->contextRegisters[mmSPI_INTERP_CONTROL_0];
|
|
uint8 spriteEnable = (spiInterpControl >> 1) & 1;
|
|
cemu_assert_debug(spriteEnable == 0);
|
|
|
|
uint8 frontFace_enabled = (psControl1 >> 8) & 1;
|
|
uint8 frontFace_chan = (psControl1 >> 9) & 3;
|
|
uint8 frontFace_allBits = (psControl1 >> 11) & 1;
|
|
uint8 frontFace_regIndex = (psControl1 >> 12) & 0x1F;
|
|
|
|
// handle param_gen
|
|
if (psInputTable->paramGen != 0)
|
|
{
|
|
cemu_assert_debug((psInputTable->paramGen) == 1); // handle the other bits (the same set of coordinates with different perspective/projection settings?)
|
|
uint32 paramGenGPRIndex = psInputTable->paramGenGPR;
|
|
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
src->addFmt("{} = pointCoord.xyxy;" _CRLF, _getRegisterVarName(shaderContext, paramGenGPRIndex));
|
|
else
|
|
src->addFmt("{} = as_type<int4>(pointCoord.xyxy);" _CRLF, _getRegisterVarName(shaderContext, paramGenGPRIndex));
|
|
}
|
|
|
|
for (sint32 i = 0; i < psInputTable->count; i++)
|
|
{
|
|
uint32 psControl0 = shaderContext->contextRegisters[mmSPI_PS_IN_CONTROL_0];
|
|
uint32 spi0_paramGen = (psControl0 >> 15) & 0xF;
|
|
|
|
sint32 gprIndex = i;// +spi0_paramGen + paramRegOffset;
|
|
if ((shaderContext->analyzer.gprUseMask[gprIndex / 8] & (1 << (gprIndex % 8))) == 0 && shaderContext->analyzer.usesRelativeGPRRead == false)
|
|
continue;
|
|
uint32 psInputSemanticId = psInputTable->import[i].semanticId;
|
|
if (psInputSemanticId == LATTE_ANALYZER_IMPORT_INDEX_SPIPOSITION)
|
|
{
|
|
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
src->addFmt("{} = GET_FRAGCOORD();" _CRLF, _getRegisterVarName(shaderContext, gprIndex));
|
|
else
|
|
src->addFmt("{} = as_type<int4>(GET_FRAGCOORD());" _CRLF, _getRegisterVarName(shaderContext, gprIndex));
|
|
continue;
|
|
}
|
|
|
|
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
src->addFmt("{} = as_type<int4>(in.passParameterSem{});" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId);
|
|
else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
src->addFmt("{} = in.passParameterSem{};" _CRLF, _getRegisterVarName(shaderContext, gprIndex), psInputSemanticId);
|
|
else
|
|
cemu_assert_unimplemented();
|
|
}
|
|
// front facing attribute
|
|
if (frontFace_enabled)
|
|
{
|
|
if ((shaderContext->analyzer.gprUseMask[0 / 8] & (1 << (0 % 8))) != 0)
|
|
{
|
|
if (frontFace_allBits)
|
|
cemu_assert_debug(false);
|
|
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_SIGNED_INT)
|
|
src->addFmt("{}.{} = as_type<int>(frontFacing ? 1.0 : 0.0);" _CRLF, _getRegisterVarName(shaderContext, frontFace_regIndex), _getElementStrByIndex(frontFace_chan));
|
|
else if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
|
src->addFmt("{}.{} = frontFacing ? 1.0 : 0.0;" _CRLF, _getRegisterVarName(shaderContext, frontFace_regIndex), _getElementStrByIndex(frontFace_chan));
|
|
else
|
|
cemu_assert_debug(false);
|
|
}
|
|
}
|
|
}
|
|
for(auto& cfInstruction : shaderContext->cfInstructions)
|
|
LatteDecompiler_emitClauseCodeMSL(shaderContext, &cfInstruction, false);
|
|
//if(shader->shaderType == LatteConst::ShaderType::Geometry)
|
|
// src->add("EndPrimitive();" _CRLF);
|
|
// vertex shader should write renderstate point size at the end if required but not modified by shader
|
|
if (shaderContext->analyzer.outputPointSize && !shaderContext->analyzer.writesPointSize)
|
|
{
|
|
if (shader->shaderType == LatteConst::ShaderType::Vertex && !shaderContext->options->usesGeometryShader && shaderContext->contextRegistersNew->IsRasterizationEnabled())
|
|
src->add("out.pointSize = supportBuffer.pointSize;" _CRLF);
|
|
}
|
|
|
|
if (usesGeometryShader && (shader->shaderType == LatteConst::ShaderType::Vertex || shader->shaderType == LatteConst::ShaderType::Geometry))
|
|
{
|
|
if (shader->shaderType == LatteConst::ShaderType::Vertex)
|
|
{
|
|
src->add("if (tid == 0) {" _CRLF);
|
|
src->add("meshGridProperties.set_threadgroups_per_grid(uint3(1, 1, 1));" _CRLF);
|
|
src->add("}" _CRLF);
|
|
}
|
|
else if (shader->shaderType == LatteConst::ShaderType::Geometry)
|
|
{
|
|
src->add("mesh.set_primitive_count(GET_PRIMITIVE_COUNT(vertexIndex));" _CRLF);
|
|
|
|
// Set indices
|
|
if (shaderContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE] == 1) // Line strip
|
|
{
|
|
src->add("for (uint8_t i = 0; i < GET_PRIMITIVE_COUNT(vertexIndex) * 2; i++) {" _CRLF);
|
|
src->add("mesh.set_index(i, (i 2 3) + i % 2);" _CRLF);
|
|
src->add("}" _CRLF);
|
|
}
|
|
else if (shaderContext->contextRegisters[mmVGT_GS_OUT_PRIM_TYPE] == 2) // Triangle strip
|
|
{
|
|
src->add("for (uint8_t i = 0; i < GET_PRIMITIVE_COUNT(vertexIndex) * 3; i++) {" _CRLF);
|
|
src->add("mesh.set_index(i, (i / 3) + i % 3);" _CRLF);
|
|
src->add("}" _CRLF);
|
|
}
|
|
else
|
|
{
|
|
src->add("for (uint8_t i = 0; i < vertexIndex; i++) {" _CRLF);
|
|
src->add("mesh.set_index(i, i);" _CRLF);
|
|
src->add("}" _CRLF);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (shader->shaderType == LatteConst::ShaderType::Pixel || (shaderContext->contextRegistersNew->IsRasterizationEnabled() && !usesGeometryShader))
|
|
{
|
|
// Return
|
|
src->add("return out;" _CRLF);
|
|
}
|
|
|
|
// end of shader main
|
|
src->add("}" _CRLF);
|
|
src->shrink_to_fit();
|
|
shader->strBuf_shaderSource = src;
|
|
}
|