Merge pull request #16 from SamoZ256/metal-accurate-barriers

Framebuffer fetch
This commit is contained in:
SamoZ256 2025-01-12 12:19:30 +01:00 committed by GitHub
commit 159a10ea6c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 522 additions and 373 deletions

View file

@ -504,45 +504,6 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
uint64 vsHash2 = 0;
_calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2);
uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL);
if (g_renderer->GetType() == RendererAPI::Metal)
{
if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually)
{
for (sint32 g = 0; g < _activeFetchShader->bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = _activeFetchShader->bufferGroups[g];
uint32 bufferIndex = group.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (LatteGPUState.contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
vsHash += (uint64)bufferStride;
vsHash = std::rotl<uint64>(vsHash, 7);
}
}
if (!usesGeometryShader)
{
// Rasterization
bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
// HACK
if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA())
rasterizationEnabled = true;
const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL;
uint32 cullFront = polygonControlReg.get_CULL_FRONT();
uint32 cullBack = polygonControlReg.get_CULL_BACK();
if (cullFront && cullBack)
rasterizationEnabled = false;
if (rasterizationEnabled)
vsHash += 51ULL;
// Vertex fetch
if (_activeFetchShader->mtlFetchVertexManually)
vsHash += 349ULL;
}
}
uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F;
vsHash += tmp;
@ -563,6 +524,46 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF())
vsHash += 0x1537;
if (g_renderer->GetType() == RendererAPI::Metal)
{
if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually)
{
for (sint32 g = 0; g < _activeFetchShader->bufferGroups.size(); g++)
{
LatteParsedFetchShaderBufferGroup_t& group = _activeFetchShader->bufferGroups[g];
uint32 bufferIndex = group.attributeBufferIndex;
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
uint32 bufferStride = (LatteGPUState.contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
vsHash += (uint64)bufferStride;
vsHash = std::rotl<uint64>(vsHash, 7);
}
}
if (!usesGeometryShader)
{
// Rasterization
bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
// HACK
if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA())
rasterizationEnabled = true;
const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL;
uint32 cullFront = polygonControlReg.get_CULL_FRONT();
uint32 cullBack = polygonControlReg.get_CULL_BACK();
if (cullFront && cullBack)
rasterizationEnabled = false;
if (rasterizationEnabled)
vsHash += 51ULL;
// Vertex fetch
if (_activeFetchShader->mtlFetchVertexManually)
vsHash += 349ULL;
}
}
_shaderBaseHash_vs = vsHash;
}
@ -589,19 +590,6 @@ void LatteSHRC_UpdatePSBaseHash(uint8* pixelShaderPtr, uint32 pixelShaderSize, b
// get vertex shader
uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL);
#if ENABLE_METAL
if (g_renderer->GetType() == RendererAPI::Metal)
{
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew);
uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType;
psHash += (uint64)dataType;
psHash = std::rotl<uint64>(psHash, 7);
}
}
#endif
_shaderBaseHash_ps = psHash;
}
@ -635,6 +623,7 @@ uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* cont
auxHashTex += 0x333;
}
}
return auxHash + auxHashTex;
}
@ -668,6 +657,28 @@ uint64 LatteSHRC_CalcPSAuxHash(LatteDecompilerShader* pixelShader, uint32* conte
auxHash = (auxHash << 3) | (auxHash >> 61);
auxHash += (uint64)dim;
}
// Textures as render targets
for (uint32 i = 0; i < pixelShader->textureUnitListCount; i++)
{
uint8 t = pixelShader->textureUnitList[i];
auxHash = std::rotl<uint64>(auxHash, 11);
auxHash += (uint64)pixelShader->textureRenderTargetIndex[t];
}
#if ENABLE_METAL
if (g_renderer->GetType() == RendererAPI::Metal)
{
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew);
uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType;
auxHash = std::rotl<uint64>(auxHash, 7);
auxHash += (uint64)dataType;
}
}
#endif
return auxHash;
}

View file

@ -183,6 +183,7 @@ struct LatteDecompilerShader
std::bitset<LATTE_NUM_MAX_TEX_UNITS> textureUnitMask2;
uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]{ 0 }; // LATTE_DECOMPILER_SAMPLER_NONE means undefined
bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]{};
uint8 textureRenderTargetIndex[LATTE_NUM_MAX_TEX_UNITS];
// analyzer stage (pixel outputs)
uint32 pixelColorOutputMask{ 0 }; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments)

View file

@ -9,6 +9,9 @@
#include "Cafe/HW/Latte/Core/LatteShader.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
// Defined in LatteTextureLegacy.cpp
Latte::E_GX2SURFFMT LatteTexture_ReconstructGX2Format(const Latte::LATTE_SQ_TEX_RESOURCE_WORD1_N& texUnitWord1, const Latte::LATTE_SQ_TEX_RESOURCE_WORD4_N& texUnitWord4);
/*
* Return index of used color attachment based on shader pixel export index (0-7)
*/
@ -850,6 +853,59 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
shader->textureUnitList[shader->textureUnitListCount] = i;
shader->textureUnitListCount++;
}
shader->textureRenderTargetIndex[i] = 255;
}
// check if textures are used as render targets
if (shader->shaderType == LatteConst::ShaderType::Pixel)
{
uint8 colorBufferMask = LatteMRT::GetActiveColorBufferMask(shader, *shaderContext->contextRegistersNew);
for (sint32 i = 0; i < shader->textureUnitListCount; i++)
{
sint32 textureIndex = shader->textureUnitList[i];
const auto& texRegister = texRegs[textureIndex];
// get physical address of texture data
MPTR physAddr = (texRegister.word2.get_BASE_ADDRESS() << 8);
if (physAddr == MPTR_NULL)
continue; // invalid data
// Check for dimension
auto dim = shader->textureUnitDim[textureIndex];
// TODO: 2D arrays could technically be supported as well
if (dim != Latte::E_DIM::DIM_2D)
continue;
// Check for mip level
// TODO: uncomment?
/*
auto lastMip = texRegister.word5.get_LAST_LEVEL();
// TODO: multiple mip levels could technically be supported as well
if (lastMip != 0)
continue;
*/
Latte::E_GX2SURFFMT format = LatteTexture_ReconstructGX2Format(texRegister.word1, texRegister.word4);
// Check if the texture is used as render target
for (sint32 j = 0; j < LATTE_NUM_COLOR_TARGET; j++)
{
if (((colorBufferMask) & (1 << j)) == 0)
continue; // color buffer not enabled
uint32* colorBufferRegBase = shaderContext->contextRegisters + (mmCB_COLOR0_BASE + j);
uint32 regColorBufferBase = colorBufferRegBase[mmCB_COLOR0_BASE - mmCB_COLOR0_BASE] & 0xFFFFFF00; // the low 8 bits are ignored? How to Survive seems to rely on this
MPTR colorBufferPhysMem = regColorBufferBase;
Latte::E_GX2SURFFMT colorBufferFormat = LatteMRT::GetColorBufferFormat(j, *shaderContext->contextRegistersNew);
// TODO: check if mip matches as well?
if (physAddr == colorBufferPhysMem && format == colorBufferFormat)
{
shader->textureRenderTargetIndex[textureIndex] = j;
break;
}
}
}
}
// for geometry shaders check the copy shader for stream writes
if (shader->shaderType == LatteConst::ShaderType::Geometry && shaderContext->parsedGSCopyShader->list_streamWrites.empty() == false)

View file

@ -10,7 +10,7 @@
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h"
#include "Cafe/HW/Latte/Core/FetchShader.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "config/ActiveSettings.h"
#include "util/helpers/StringBuf.h"
@ -2260,264 +2260,275 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
return;
}
if (emulateCompare)
// Do a framebuffer fetch if possible
uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex];
if (static_cast<MetalRenderer*>(g_renderer.get())->SupportsFramebufferFetch() && renderTargetIndex != 255)
{
cemu_assert_debug(!isGather);
src->add("sampleCompareEmulate(");
}
src->addFmt("tex{}", texInstruction->textureFetch.textureIndex);
if (!emulateCompare)
{
src->add(".");
if (isRead)
{
if (hasOffset)
cemu_assert_unimplemented();
src->add("read(");
unnormalizationHandled = true;
useTexelCoordinates = true;
}
else
{
if (isGather)
src->add("gather");
else
src->add("sample");
if (isCompare)
src->add("_compare");
src->addFmt("(samplr{}, ", texInstruction->textureFetch.textureIndex);
}
// TODO: support comparison samplers
src->addFmt("col{}", renderTargetIndex);
}
else
{
src->addFmt(", samplr{}, ", texInstruction->textureFetch.textureIndex);
}
if (emulateCompare)
{
cemu_assert_debug(!isGather);
// for textureGather() add shift (todo: depends on rounding mode set in sampler registers?)
if (texOpcode == GPU7_TEX_INST_FETCH4)
{
if (texDim == Latte::E_DIM::DIM_2D)
{
//src->addFmt2("(vec2(-0.1) / vec2(textureSize(tex{},0).xy)) + ", texInstruction->textureIndex);
src->add("sampleCompareEmulate(");
}
// vec2(-0.00001) is minimum to break Nvidia
// vec2(0.0001) is minimum to fix shadows on Intel, also fixes it on AMD (Windows and Linux)
src->addFmt("tex{}", texInstruction->textureFetch.textureIndex);
if (!emulateCompare)
{
src->add(".");
if (isRead)
{
if (hasOffset)
cemu_assert_unimplemented();
src->add("read(");
unnormalizationHandled = true;
useTexelCoordinates = true;
}
else
{
if (isGather)
src->add("gather");
else
src->add("sample");
if (isCompare)
src->add("_compare");
src->addFmt("(samplr{}, ", texInstruction->textureFetch.textureIndex);
}
}
else
{
src->addFmt(", samplr{}, ", texInstruction->textureFetch.textureIndex);
}
// todo - emulating coordinate rounding mode correctly is tricky
// GX2 supports two modes: Truncate or rounding according to DX9 rules
// Vulkan uses truncate mode when point sampling (min and mag is both nearest) otherwise it uses rounding
// adding a small fixed bias is enough to avoid vendor-specific cases where small inaccuracies cause the number to get rounded down due to truncation
src->addFmt("float2(0.0001) + ");
}
}
const sint32 texCoordDataType = (texOpcode == GPU7_TEX_INST_LD) ? LATTE_DECOMPILER_DTYPE_SIGNED_INT : LATTE_DECOMPILER_DTYPE_FLOAT;
if(useTexelCoordinates)
{
// handle integer coordinates for texelFetch
if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA)
{
src->add("uint2(");
src->add("float2(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, texCoordDataType);
src->addFmt(", ");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, texCoordDataType);
src->addFmt(")*supportBuffer.tex{}Scale", texInstruction->textureFetch.textureIndex); // close float2 and scale
src->add("), 0"); // close int2 and lod param
// todo - lod
}
else if (texDim == Latte::E_DIM::DIM_1D)
{
// VC DS games forget to initialize textures and use texel fetch on an uninitialized texture (a dim of 0 maps to 1D)
src->add("uint(");
src->add("float(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, (texOpcode == GPU7_TEX_INST_LD) ? LATTE_DECOMPILER_DTYPE_SIGNED_INT : LATTE_DECOMPILER_DTYPE_FLOAT);
src->addFmt(")*supportBuffer.tex{}Scale.x", texInstruction->textureFetch.textureIndex);
src->add("), 0");
// todo - lod
}
else
cemu_assert_debug(false);
}
else /* useTexelCoordinates == false */
{
// float coordinates
if ( (texOpcode == GPU7_TEX_INST_SAMPLE_C || texOpcode == GPU7_TEX_INST_SAMPLE_C_L || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ) )
{
// shadow sampler
if (texDim == Latte::E_DIM::DIM_2D_ARRAY)
{
// 3 coords + compare value
src->add("float2(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(", ");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add("), uint(rint(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add("))");
src->addFmt(", {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer0));
}
else if (texDim == Latte::E_DIM::DIM_CUBEMAP)
{
// 2 coords + faceId
if (texInstruction->textureFetch.srcSel[0] >= 4 || texInstruction->textureFetch.srcSel[1] >= 4)
{
debugBreakpoint();
}
src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0));
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
src->addFmt(")");
src->addFmt(", uint(cubeMapArrayIndex{})", texInstruction->textureFetch.textureIndex); // cubemap index
}
else if (texDim == Latte::E_DIM::DIM_1D)
{
// 1 coord + 1 unused coord (per spec) + compare value
if (texInstruction->textureFetch.srcSel[0] >= 4)
{
debugBreakpoint();
}
src->addFmt("{}, {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1));
}
else
{
// 2 coords + compare value (as float3)
if (texInstruction->textureFetch.srcSel[0] >= 4 && texInstruction->textureFetch.srcSel[1] >= 4)
{
debugBreakpoint();
}
src->addFmt("float2({}), {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1));
}
}
else if(texDim == Latte::E_DIM::DIM_2D_ARRAY)
{
// 3 coords
src->add("float2(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(", ");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add("), uint(rint(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add("))");
}
else if(texDim == Latte::E_DIM::DIM_3D)
{
// 3 coords
src->add("float3(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(", ");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(", ");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(")");
}
else if( texDim == Latte::E_DIM::DIM_CUBEMAP )
{
// 2 coords + faceId
cemu_assert_debug(texInstruction->textureFetch.srcSel[0] < 4);
cemu_assert_debug(texInstruction->textureFetch.srcSel[1] < 4);
src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0));
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
src->add(")");
src->addFmt(", uint(cubeMapArrayIndex{})", texInstruction->textureFetch.textureIndex); // cubemap index
}
else if( texDim == Latte::E_DIM::DIM_1D )
{
// 1 coord
src->add(_getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0));
}
else
{
// 2 coords
src->add("float2(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(",");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(")");
// avoid truncate to effectively round downwards on texel edges
if (ActiveSettings::ForceSamplerRoundToPrecision())
src->addFmt("+ float2(1.0)/float2(tex{}.get_width(), tex{}.get_height())/512.0", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
}
// lod or lod bias parameter
// 1D textures don't support lod
if (texDim != Latte::E_DIM::DIM_1D && texDim != Latte::E_DIM::DIM_1D_ARRAY)
{
if (texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LB || texOpcode == GPU7_TEX_INST_SAMPLE_C_L)
// for textureGather() add shift (todo: depends on rounding mode set in sampler registers?)
if (texOpcode == GPU7_TEX_INST_FETCH4)
{
if (texDim == Latte::E_DIM::DIM_2D)
{
src->add(", ");
if (texOpcode == GPU7_TEX_INST_SAMPLE_LB)
//src->addFmt2("(vec2(-0.1) / vec2(textureSize(tex{},0).xy)) + ", texInstruction->textureIndex);
// vec2(-0.00001) is minimum to break Nvidia
// vec2(0.0001) is minimum to fix shadows on Intel, also fixes it on AMD (Windows and Linux)
// todo - emulating coordinate rounding mode correctly is tricky
// GX2 supports two modes: Truncate or rounding according to DX9 rules
// Vulkan uses truncate mode when point sampling (min and mag is both nearest) otherwise it uses rounding
// adding a small fixed bias is enough to avoid vendor-specific cases where small inaccuracies cause the number to get rounded down due to truncation
src->addFmt("float2(0.0001) + ");
}
}
const sint32 texCoordDataType = (texOpcode == GPU7_TEX_INST_LD) ? LATTE_DECOMPILER_DTYPE_SIGNED_INT : LATTE_DECOMPILER_DTYPE_FLOAT;
if(useTexelCoordinates)
{
// handle integer coordinates for texelFetch
if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA)
{
src->add("uint2(");
src->add("float2(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, texCoordDataType);
src->addFmt(", ");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, texCoordDataType);
src->addFmt(")*supportBuffer.tex{}Scale", texInstruction->textureFetch.textureIndex); // close float2 and scale
src->add("), 0"); // close int2 and lod param
// todo - lod
}
else if (texDim == Latte::E_DIM::DIM_1D)
{
// VC DS games forget to initialize textures and use texel fetch on an uninitialized texture (a dim of 0 maps to 1D)
src->add("uint(");
src->add("float(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, (texOpcode == GPU7_TEX_INST_LD) ? LATTE_DECOMPILER_DTYPE_SIGNED_INT : LATTE_DECOMPILER_DTYPE_FLOAT);
src->addFmt(")*supportBuffer.tex{}Scale.x", texInstruction->textureFetch.textureIndex);
src->add("), 0");
// todo - lod
}
else
cemu_assert_debug(false);
}
else /* useTexelCoordinates == false */
{
// float coordinates
if ( (texOpcode == GPU7_TEX_INST_SAMPLE_C || texOpcode == GPU7_TEX_INST_SAMPLE_C_L || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ) )
{
// shadow sampler
if (texDim == Latte::E_DIM::DIM_2D_ARRAY)
{
src->addFmt("bias({})", _FormatFloatAsConstant((float)texInstruction->textureFetch.lodBias / 16.0f));
// 3 coords + compare value
src->add("float2(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(", ");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add("), uint(rint(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add("))");
src->addFmt(", {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer0));
}
else if (texDim == Latte::E_DIM::DIM_CUBEMAP)
{
// 2 coords + faceId
if (texInstruction->textureFetch.srcSel[0] >= 4 || texInstruction->textureFetch.srcSel[1] >= 4)
{
debugBreakpoint();
}
src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0));
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
src->addFmt(")");
src->addFmt(", uint(cubeMapArrayIndex{})", texInstruction->textureFetch.textureIndex); // cubemap index
}
else if (texDim == Latte::E_DIM::DIM_1D)
{
// 1 coord + 1 unused coord (per spec) + compare value
if (texInstruction->textureFetch.srcSel[0] >= 4)
{
debugBreakpoint();
}
src->addFmt("{}, {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1));
}
else
{
// TODO: is this correct?
src->add("level(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 3, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(")");
// 2 coords + compare value (as float3)
if (texInstruction->textureFetch.srcSel[0] >= 4 && texInstruction->textureFetch.srcSel[1] >= 4)
{
debugBreakpoint();
}
src->addFmt("float2({}), {}", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0), _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[3], -1, -1, -1, tempBuffer1));
}
}
else if (texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ)
else if(texDim == Latte::E_DIM::DIM_2D_ARRAY)
{
src->add(", level(0.0)");
// 3 coords
src->add("float2(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(", ");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add("), uint(rint(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add("))");
}
}
}
// gradient parameters
if (texOpcode == GPU7_TEX_INST_SAMPLE_G)
{
if (texDim == Latte::E_DIM::DIM_2D ||
texDim == Latte::E_DIM::DIM_1D)
{
src->add(", gradient2d(gradH.xy, gradV.xy)");
}
else
{
cemu_assert_unimplemented();
}
}
else if(texDim == Latte::E_DIM::DIM_3D)
{
// 3 coords
src->add("float3(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(", ");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(", ");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(")");
}
else if( texDim == Latte::E_DIM::DIM_CUBEMAP )
{
// 2 coords + faceId
cemu_assert_debug(texInstruction->textureFetch.srcSel[0] < 4);
cemu_assert_debug(texInstruction->textureFetch.srcSel[1] < 4);
src->addFmt("redcCUBEReverse({},", _getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], texInstruction->textureFetch.srcSel[1], -1, -1, tempBuffer0));
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 2, LATTE_DECOMPILER_DTYPE_SIGNED_INT);
src->add(")");
src->addFmt(", uint(cubeMapArrayIndex{})", texInstruction->textureFetch.textureIndex); // cubemap index
}
else if( texDim == Latte::E_DIM::DIM_1D )
{
// 1 coord
src->add(_getTexGPRAccess(shaderContext, texInstruction->srcGpr, LATTE_DECOMPILER_DTYPE_FLOAT, texInstruction->textureFetch.srcSel[0], -1, -1, -1, tempBuffer0));
}
else
{
// 2 coords
src->add("float2(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 0, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(",");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 1, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(")");
// avoid truncate to effectively round downwards on texel edges
if (ActiveSettings::ForceSamplerRoundToPrecision())
src->addFmt("+ float2(1.0)/float2(tex{}.get_width(), tex{}.get_height())/512.0", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
}
// lod or lod bias parameter
// 1D textures don't support lod
if (texDim != Latte::E_DIM::DIM_1D && texDim != Latte::E_DIM::DIM_1D_ARRAY)
{
if (texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LB || texOpcode == GPU7_TEX_INST_SAMPLE_C_L)
{
src->add(", ");
if (texOpcode == GPU7_TEX_INST_SAMPLE_LB)
{
src->addFmt("bias({})", _FormatFloatAsConstant((float)texInstruction->textureFetch.lodBias / 16.0f));
}
else
{
// TODO: is this correct?
src->add("level(");
_emitTEXSampleCoordInputComponent(shaderContext, texInstruction, 3, LATTE_DECOMPILER_DTYPE_FLOAT);
src->add(")");
}
}
else if (texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ)
{
src->add(", level(0.0)");
}
}
}
// gradient parameters
if (texOpcode == GPU7_TEX_INST_SAMPLE_G)
{
if (texDim == Latte::E_DIM::DIM_2D ||
texDim == Latte::E_DIM::DIM_1D)
{
src->add(", gradient2d(gradH.xy, gradV.xy)");
}
else
{
cemu_assert_unimplemented();
}
}
// offset
if( texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ || texOpcode == GPU7_TEX_INST_SAMPLE || texOpcode == GPU7_TEX_INST_SAMPLE_C )
{
if( hasOffset )
{
uint8 offsetComponentCount = 0;
if( texDim == Latte::E_DIM::DIM_1D )
offsetComponentCount = 1;
else if( texDim == Latte::E_DIM::DIM_2D )
offsetComponentCount = 2;
else if( texDim == Latte::E_DIM::DIM_3D )
offsetComponentCount = 3;
else if( texDim == Latte::E_DIM::DIM_2D_ARRAY )
offsetComponentCount = 2;
else
cemu_assert_unimplemented();
// offset
if( texOpcode == GPU7_TEX_INST_SAMPLE_L || texOpcode == GPU7_TEX_INST_SAMPLE_LZ || texOpcode == GPU7_TEX_INST_SAMPLE_C_LZ || texOpcode == GPU7_TEX_INST_SAMPLE || texOpcode == GPU7_TEX_INST_SAMPLE_C )
{
if( hasOffset )
{
uint8 offsetComponentCount = 0;
if( texDim == Latte::E_DIM::DIM_1D )
offsetComponentCount = 1;
else if( texDim == Latte::E_DIM::DIM_2D )
offsetComponentCount = 2;
else if( texDim == Latte::E_DIM::DIM_3D )
offsetComponentCount = 3;
else if( texDim == Latte::E_DIM::DIM_2D_ARRAY )
offsetComponentCount = 2;
else
cemu_assert_unimplemented();
if( (texInstruction->textureFetch.offsetX&1) )
cemu_assert_unimplemented();
if( (texInstruction->textureFetch.offsetY&1) )
cemu_assert_unimplemented();
if ((texInstruction->textureFetch.offsetZ & 1))
cemu_assert_unimplemented();
if( (texInstruction->textureFetch.offsetX&1) )
cemu_assert_unimplemented();
if( (texInstruction->textureFetch.offsetY&1) )
cemu_assert_unimplemented();
if ((texInstruction->textureFetch.offsetZ & 1))
cemu_assert_unimplemented();
if( offsetComponentCount == 1 )
src->addFmt(",{}", texInstruction->textureFetch.offsetX/2);
else if( offsetComponentCount == 2 )
src->addFmt(",int2({},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2);
else if( offsetComponentCount == 3 )
src->addFmt(",int3({},{},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2);
}
}
if( offsetComponentCount == 1 )
src->addFmt(",{}", texInstruction->textureFetch.offsetX/2);
else if( offsetComponentCount == 2 )
src->addFmt(",int2({},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2);
else if( offsetComponentCount == 3 )
src->addFmt(",int3({},{},{})", texInstruction->textureFetch.offsetX/2, texInstruction->textureFetch.offsetY/2, texInstruction->textureFetch.offsetZ/2);
}
}
// lod bias (TODO: wht?)
// lod bias (TODO: wht?)
src->add(")");
}
src->add(")");
// sample_compare doesn't return a float
if (!isCompare)
{
@ -2630,20 +2641,28 @@ static void _emitTEXGetTextureResInfoCode(LatteDecompilerShaderContext* shaderCo
// todo - mip index parameter?
auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex];
if (texDim == Latte::E_DIM::DIM_1D)
src->addFmt(" = int4(tex{}.get_width(), 1, 1, 1).", texInstruction->textureFetch.textureIndex);
else if (texDim == Latte::E_DIM::DIM_1D_ARRAY)
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_array_size(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
else if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA)
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
else if (texDim == Latte::E_DIM::DIM_2D_ARRAY)
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), tex{}.get_array_size(), 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
if (static_cast<MetalRenderer*>(g_renderer.get())->SupportsFramebufferFetch() && shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex] != 255)
{
// TODO: use the render target size
src->addFmt(" = int4(1920, 1080, 1, 1).");
}
else
{
cemu_assert_debug(false);
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex];
if (texDim == Latte::E_DIM::DIM_1D)
src->addFmt(" = int4(tex{}.get_width(), 1, 1, 1).", texInstruction->textureFetch.textureIndex);
else if (texDim == Latte::E_DIM::DIM_1D_ARRAY)
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_array_size(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
else if (texDim == Latte::E_DIM::DIM_2D || texDim == Latte::E_DIM::DIM_2D_MSAA)
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
else if (texDim == Latte::E_DIM::DIM_2D_ARRAY)
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), tex{}.get_array_size(), 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
else
{
cemu_assert_debug(false);
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
}
}
for(sint32 f=0; f<4; f++)
@ -2693,24 +2712,31 @@ static void _emitTEXGetCompTexLodCode(LatteDecompilerShaderContext* shaderContex
src->add(" = ");
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType);
if( shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex] == Latte::E_DIM::DIM_CUBEMAP )
if (static_cast<MetalRenderer*>(g_renderer.get())->SupportsFramebufferFetch() && shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex] != 255)
{
// 3 coordinates
if(shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, {}.{}{}{}),0.0,0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]);
else
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, bitCast<float>({}.{}{}{})),0.0,0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]);
// We assume that textures accessed as framebuffer fetch are always sampled at pixel coordinates, therefore the lod would always be 0.0
src->add("float4(0.0, 0.0, 0.0, 0.0)");
}
else
{
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, {}.{}{}),0.0,0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]);
else
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, bitCast<float>({}.{}{})),0.0,0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]);
debugBreakpoint();
if (shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex] == Latte::E_DIM::DIM_CUBEMAP)
{
// 3 coordinates
if(shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, {}.{}{}{}), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]);
else
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, bitCast<float>({}.{}{}{})), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]);
}
else
{
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, {}.{}{}), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]);
else
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, bitCast<float>({}.{}{})), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]);
debugBreakpoint();
}
}
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType);
src->add(".");

View file

@ -1,7 +1,7 @@
#pragma once
#include "Common/precompiled.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "HW/Latte/Core/LatteShader.h"
namespace LatteDecompiler
@ -448,6 +448,8 @@ namespace LatteDecompiler
static void _emitTextureDefinitions(LatteDecompilerShaderContext* shaderContext)
{
bool renderTargetIndexUsed[LATTE_NUM_COLOR_TARGET] = {false};
auto src = shaderContext->shaderSource;
// texture sampler definition
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
@ -455,44 +457,56 @@ namespace LatteDecompiler
if (!shaderContext->output->textureUnitMask[i])
continue;
src->add(", ");
// Only 2D and 2D array textures can be used with comparison samplers
if (shaderContext->shader->textureUsesDepthCompare[i] && IsValidDepthTextureType(shaderContext->shader->textureUnitDim[i]))
src->add("depth");
else
src->add("texture");
if (shaderContext->shader->textureIsIntegerFormat[i])
uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[i];
if (static_cast<MetalRenderer*>(g_renderer.get())->SupportsFramebufferFetch() && renderTargetIndex != 255)
{
// integer samplers
if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D)
src->add("1d<uint>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA)
src->add("2d<uint>");
else
cemu_assert_unimplemented();
if (!renderTargetIndexUsed[renderTargetIndex])
{
src->addFmt(", {} col{} [[color({})]]", GetDataTypeStr(GetColorBufferDataType(renderTargetIndex, *shaderContext->contextRegistersNew)), renderTargetIndex, renderTargetIndex);
renderTargetIndexUsed[renderTargetIndex] = true;
}
}
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA)
src->add("2d<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D)
src->add("1d<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_ARRAY)
src->add("2d_array<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_CUBEMAP)
src->add("cube_array<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_3D)
src->add("3d<float>");
else
{
cemu_assert_unimplemented();
}
src->add(", ");
uint32 binding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i];
//uint32 textureBinding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] % 31;
//uint32 samplerBinding = textureBinding % 16;
src->addFmt(" tex{} [[texture({})]]", i, binding);
src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding);
// Only certain texture dimensions can be used with comparison samplers
if (shaderContext->shader->textureUsesDepthCompare[i] && IsValidDepthTextureType(shaderContext->shader->textureUnitDim[i]))
src->add("depth");
else
src->add("texture");
if (shaderContext->shader->textureIsIntegerFormat[i])
{
// integer samplers
if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D)
src->add("1d<uint>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA)
src->add("2d<uint>");
else
cemu_assert_unimplemented();
}
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D || shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_MSAA)
src->add("2d<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_1D)
src->add("1d<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_2D_ARRAY)
src->add("2d_array<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_CUBEMAP)
src->add("cube_array<float>");
else if (shaderContext->shader->textureUnitDim[i] == Latte::E_DIM::DIM_3D)
src->add("3d<float>");
else
{
cemu_assert_unimplemented();
}
uint32 binding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i];
//uint32 textureBinding = shaderContext->output->resourceMappingMTL.textureUnitToBindingPoint[i] % 31;
//uint32 samplerBinding = textureBinding % 16;
src->addFmt(" tex{} [[texture({})]]", i, binding);
src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding);
}
}
}

View file

@ -55,7 +55,7 @@ inline const char* GetDataTypeStr(MetalDataType dataType)
return "float4";
default:
cemu_assert_suspicious();
return "";
return "INVALID";
}
}

View file

@ -87,6 +87,7 @@ MetalRenderer::MetalRenderer()
// Feature support
m_isAppleGPU = m_device->supportsFamily(MTL::GPUFamilyApple1);
m_supportsFramebufferFetch = GetConfig().framebuffer_fetch.GetValue() ? m_device->supportsFamily(MTL::GPUFamilyApple2) : false;
m_hasUnifiedMemory = m_device->hasUnifiedMemory();
m_supportsMetal3 = m_device->supportsFamily(MTL::GPUFamilyMetal3);
m_recommendedMaxVRAMUsage = m_device->recommendedMaxWorkingSetSize();
@ -584,21 +585,22 @@ void MetalRenderer::DeleteFontTextures()
void MetalRenderer::AppendOverlayDebugInfo()
{
ImGui::Text("--- GPU info ---");
ImGui::Text("GPU %s", m_device->name()->utf8String());
ImGui::Text("Is Apple GPU %s", (m_isAppleGPU ? "yes" : "no"));
ImGui::Text("Has unified memory %s", (m_hasUnifiedMemory ? "yes" : "no"));
ImGui::Text("Supports Metal3 %s", (m_supportsMetal3 ? "yes" : "no"));
ImGui::Text("GPU %s", m_device->name()->utf8String());
ImGui::Text("Is Apple GPU %s", (m_isAppleGPU ? "yes" : "no"));
ImGui::Text("Supports framebuffer fetch %s", (m_supportsFramebufferFetch ? "yes" : "no"));
ImGui::Text("Has unified memory %s", (m_hasUnifiedMemory ? "yes" : "no"));
ImGui::Text("Supports Metal3 %s", (m_supportsMetal3 ? "yes" : "no"));
ImGui::Text("--- Metal info ---");
ImGui::Text("Render pipeline states %zu", m_pipelineCache->GetPipelineCacheSize());
ImGui::Text("Buffer allocator memory %zuMB", m_performanceMonitor.m_bufferAllocatorMemory / 1024 / 1024);
ImGui::Text("Render pipeline states %zu", m_pipelineCache->GetPipelineCacheSize());
ImGui::Text("Buffer allocator memory %zuMB", m_performanceMonitor.m_bufferAllocatorMemory / 1024 / 1024);
ImGui::Text("--- Metal info (per frame) ---");
ImGui::Text("Command buffers %u", m_performanceMonitor.m_commandBuffers);
ImGui::Text("Render passes %u", m_performanceMonitor.m_renderPasses);
ImGui::Text("Clears %u", m_performanceMonitor.m_clears);
ImGui::Text("Manual vertex fetch draws %u (mesh draws: %u)", m_performanceMonitor.m_manualVertexFetchDraws, m_performanceMonitor.m_meshDraws);
ImGui::Text("Triangle fans %u", m_performanceMonitor.m_triangleFans);
ImGui::Text("Command buffers %u", m_performanceMonitor.m_commandBuffers);
ImGui::Text("Render passes %u", m_performanceMonitor.m_renderPasses);
ImGui::Text("Clears %u", m_performanceMonitor.m_clears);
ImGui::Text("Manual vertex fetch draws %u (mesh draws: %u)", m_performanceMonitor.m_manualVertexFetchDraws, m_performanceMonitor.m_meshDraws);
ImGui::Text("Triangle fans %u", m_performanceMonitor.m_triangleFans);
}
void MetalRenderer::renderTarget_setViewport(float x, float y, float width, float height, float nearZ, float farZ, bool halfZ)
@ -1008,6 +1010,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader();
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
/*
bool neverSkipAccurateBarrier = false;
// "Accurate barriers" is usually enabled globally but since the CPU cost is substantial we allow users to disable it (debug -> 'Accurate barriers' option)
@ -1031,8 +1034,13 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
endRenderPass = CheckIfRenderPassNeedsFlush(geometryShader);
if (endRenderPass)
{
EndEncoding();
// TODO: only log in debug?
cemuLog_logOnce(LogType::Force, "Ending render pass due to render target self-dependency\n");
}
}
*/
// Primitive type
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]);
@ -1863,6 +1871,7 @@ bool MetalRenderer::AcquireDrawable(bool mainWindow)
return layer.AcquireDrawable();
}
/*
bool MetalRenderer::CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader)
{
sint32 textureCount = shader->resourceMapping.getTextureCount();
@ -1871,6 +1880,11 @@ bool MetalRenderer::CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader)
const auto relative_textureUnit = shader->resourceMapping.getTextureUnitFromBindingPoint(i);
auto hostTextureUnit = relative_textureUnit;
auto textureDim = shader->textureUnitDim[relative_textureUnit];
// Texture is accessed as a framebuffer fetch, therefore there is no need to flush it
if (shader->textureRenderTargetIndex[relative_textureUnit] != 255)
continue;
auto texUnitRegIndex = hostTextureUnit * 7;
switch (shader->shaderType)
{
@ -1895,20 +1909,19 @@ bool MetalRenderer::CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader)
continue;
LatteTexture* baseTexture = textureView->baseTexture;
if (!m_state.m_isFirstDrawInRenderPass)
// If the texture is also used in the current render pass, we need to end the render pass to "flush" the texture
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
// If the texture is also used in the current render pass, we need to end the render pass to "flush" the texture
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
{
auto colorTarget = m_state.m_activeFBO.m_fbo->colorBuffer[i].texture;
if (colorTarget && colorTarget->baseTexture == baseTexture)
return true;
}
auto colorTarget = m_state.m_activeFBO.m_fbo->colorBuffer[i].texture;
if (colorTarget && colorTarget->baseTexture == baseTexture)
return true;
}
}
return false;
}
*/
void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader, bool usesGeometryShader)
{
@ -1919,6 +1932,11 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
{
const auto relative_textureUnit = shader->resourceMapping.getTextureUnitFromBindingPoint(i);
auto hostTextureUnit = relative_textureUnit;
// Don't bind textures that are accessed with a framebuffer fetch
if (m_supportsFramebufferFetch && shader->textureRenderTargetIndex[relative_textureUnit] != 255)
continue;
auto textureDim = shader->textureUnitDim[relative_textureUnit];
auto texUnitRegIndex = hostTextureUnit * 7;
switch (shader->shaderType)

View file

@ -125,8 +125,7 @@ struct MetalState
MetalActiveFBOState m_lastUsedFBO;
size_t m_vertexBufferOffsets[MAX_MTL_VERTEX_BUFFERS];
// TODO: find out what is the max number of bound textures on the Wii U
class LatteTextureViewMtl* m_textures[64] = {nullptr};
class LatteTextureViewMtl* m_textures[LATTE_NUM_MAX_TEX_UNITS] = {nullptr};
size_t m_uniformBufferOffsets[METAL_GENERAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
MTL::Viewport m_viewport;
@ -363,7 +362,7 @@ public:
bool AcquireDrawable(bool mainWindow);
bool CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader);
//bool CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader);
void BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader, bool usesGeometryShader);
void ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a);
@ -376,6 +375,11 @@ public:
return m_isAppleGPU;
}
bool SupportsFramebufferFetch() const
{
return m_supportsFramebufferFetch;
}
bool HasUnifiedMemory() const
{
return m_hasUnifiedMemory;
@ -478,6 +482,7 @@ private:
// Feature support
bool m_isAppleGPU;
bool m_supportsFramebufferFetch;
bool m_hasUnifiedMemory;
bool m_supportsMetal3;
uint32 m_recommendedMaxVRAMUsage;

View file

@ -338,6 +338,7 @@ void CemuConfig::Load(XMLConfigParser& parser)
#endif
gdb_port = debug.get("GDBPort", 1337);
gpu_capture_dir = debug.get("GPUCaptureDir", "");
framebuffer_fetch = debug.get("FramebufferFetch", true);
// input
auto input = parser.get("Input");
@ -540,7 +541,8 @@ void CemuConfig::Save(XMLConfigParser& parser)
debug.set("CrashDumpUnix", crash_dump.GetValue());
#endif
debug.set("GDBPort", gdb_port);
debug.set("GPUCaptureDir", gpu_capture_dir.GetValue());
debug.set("GPUCaptureDir", gpu_capture_dir);
debug.set("FramebufferFetch", framebuffer_fetch);
// input
auto input = config.set("Input");

View file

@ -527,7 +527,8 @@ struct CemuConfig
// debug
ConfigValueBounds<CrashDump> crash_dump{ CrashDump::Disabled };
ConfigValue<uint16> gdb_port{ 1337 };
ConfigValue<std::string> gpu_capture_dir{};
ConfigValue<std::string> gpu_capture_dir{ "" };
ConfigValue<bool> framebuffer_fetch{ true };
void Load(XMLConfigParser& parser);
void Save(XMLConfigParser& parser);

View file

@ -910,6 +910,18 @@ wxPanel* GeneralSettings2::AddDebugPage(wxNotebook* notebook)
debug_panel_sizer->Add(debug_row, 0, wxALL | wxEXPAND, 5);
}
{
auto* debug_row = new wxFlexGridSizer(0, 2, 0, 0);
debug_row->SetFlexibleDirection(wxBOTH);
debug_row->SetNonFlexibleGrowMode(wxFLEX_GROWMODE_SPECIFIED);
m_framebuffer_fetch = new wxCheckBox(panel, wxID_ANY, _("Framebuffer fetch"));
m_framebuffer_fetch->SetToolTip(_("Enable framebuffer fetch for eligible textures on supported devices."));
debug_row->Add(m_framebuffer_fetch, 0, wxALL | wxEXPAND, 5);
debug_panel_sizer->Add(debug_row, 0, wxALL | wxEXPAND, 5);
}
panel->SetSizerAndFit(debug_panel_sizer);
return panel;
@ -1121,6 +1133,7 @@ void GeneralSettings2::StoreConfig()
config.crash_dump = (CrashDump)m_crash_dump->GetSelection();
config.gdb_port = m_gdb_port->GetValue();
config.gpu_capture_dir = m_gpu_capture_dir->GetValue().utf8_string();
config.framebuffer_fetch = m_framebuffer_fetch->IsChecked();
g_config.Save();
}
@ -1816,6 +1829,7 @@ void GeneralSettings2::ApplyConfig()
m_crash_dump->SetSelection((int)config.crash_dump.GetValue());
m_gdb_port->SetValue(config.gdb_port.GetValue());
m_gpu_capture_dir->SetValue(wxHelper::FromUtf8(config.gpu_capture_dir.GetValue()));
m_framebuffer_fetch->SetValue(config.framebuffer_fetch);
}
void GeneralSettings2::OnAudioAPISelected(wxCommandEvent& event)

View file

@ -80,6 +80,7 @@ private:
wxChoice* m_crash_dump;
wxSpinCtrl* m_gdb_port;
wxTextCtrl* m_gpu_capture_dir;
wxCheckBox* m_framebuffer_fetch;
void OnAccountCreate(wxCommandEvent& event);
void OnAccountDelete(wxCommandEvent& event);