mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-05 22:41:18 +12:00
Merge pull request #16 from SamoZ256/metal-accurate-barriers
Framebuffer fetch
This commit is contained in:
commit
159a10ea6c
12 changed files with 522 additions and 373 deletions
|
@ -504,6 +504,26 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
|
||||||
uint64 vsHash2 = 0;
|
uint64 vsHash2 = 0;
|
||||||
_calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2);
|
_calculateShaderProgramHash(vsProgramCode, vertexShaderSize, &hashCacheVS, &vsHash1, &vsHash2);
|
||||||
uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL);
|
uint64 vsHash = vsHash1 + vsHash2 + _activeFetchShader->key + _activePSImportTable.key + (usesGeometryShader ? 0x1111ULL : 0ULL);
|
||||||
|
|
||||||
|
uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F;
|
||||||
|
vsHash += tmp;
|
||||||
|
|
||||||
|
auto primitiveType = LatteGPUState.contextNew.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE();
|
||||||
|
// TODO: include always in the hash in case of geometry shader or rect shader
|
||||||
|
if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS)
|
||||||
|
{
|
||||||
|
vsHash += 13ULL;
|
||||||
|
}
|
||||||
|
else if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS)
|
||||||
|
{
|
||||||
|
// required for Vulkan since we have to write the pointsize in the shader
|
||||||
|
vsHash += 71ULL;
|
||||||
|
}
|
||||||
|
vsHash += (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] ? 21 : 0);
|
||||||
|
// halfZ
|
||||||
|
if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF())
|
||||||
|
vsHash += 0x1537;
|
||||||
|
|
||||||
if (g_renderer->GetType() == RendererAPI::Metal)
|
if (g_renderer->GetType() == RendererAPI::Metal)
|
||||||
{
|
{
|
||||||
if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually)
|
if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually)
|
||||||
|
@ -544,25 +564,6 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32 tmp = LatteGPUState.contextNew.PA_CL_VTE_CNTL.getRawValue() ^ 0x43F;
|
|
||||||
vsHash += tmp;
|
|
||||||
|
|
||||||
auto primitiveType = LatteGPUState.contextNew.VGT_PRIMITIVE_TYPE.get_PRIMITIVE_MODE();
|
|
||||||
// TODO: include always in the hash in case of geometry shader or rect shader
|
|
||||||
if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::RECTS)
|
|
||||||
{
|
|
||||||
vsHash += 13ULL;
|
|
||||||
}
|
|
||||||
else if (primitiveType == Latte::LATTE_VGT_PRIMITIVE_TYPE::E_PRIMITIVE_TYPE::POINTS)
|
|
||||||
{
|
|
||||||
// required for Vulkan since we have to write the pointsize in the shader
|
|
||||||
vsHash += 71ULL;
|
|
||||||
}
|
|
||||||
vsHash += (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] ? 21 : 0);
|
|
||||||
// halfZ
|
|
||||||
if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF())
|
|
||||||
vsHash += 0x1537;
|
|
||||||
|
|
||||||
_shaderBaseHash_vs = vsHash;
|
_shaderBaseHash_vs = vsHash;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -589,19 +590,6 @@ void LatteSHRC_UpdatePSBaseHash(uint8* pixelShaderPtr, uint32 pixelShaderSize, b
|
||||||
// get vertex shader
|
// get vertex shader
|
||||||
uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL);
|
uint64 psHash = psHash1 + psHash2 + _activePSImportTable.key + (usesGeometryShader ? hashCacheGS.prevHash1 : 0ULL);
|
||||||
|
|
||||||
#if ENABLE_METAL
|
|
||||||
if (g_renderer->GetType() == RendererAPI::Metal)
|
|
||||||
{
|
|
||||||
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
|
|
||||||
{
|
|
||||||
auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew);
|
|
||||||
uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType;
|
|
||||||
psHash += (uint64)dataType;
|
|
||||||
psHash = std::rotl<uint64>(psHash, 7);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
_shaderBaseHash_ps = psHash;
|
_shaderBaseHash_ps = psHash;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -635,6 +623,7 @@ uint64 LatteSHRC_CalcVSAuxHash(LatteDecompilerShader* vertexShader, uint32* cont
|
||||||
auxHashTex += 0x333;
|
auxHashTex += 0x333;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return auxHash + auxHashTex;
|
return auxHash + auxHashTex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -668,6 +657,28 @@ uint64 LatteSHRC_CalcPSAuxHash(LatteDecompilerShader* pixelShader, uint32* conte
|
||||||
auxHash = (auxHash << 3) | (auxHash >> 61);
|
auxHash = (auxHash << 3) | (auxHash >> 61);
|
||||||
auxHash += (uint64)dim;
|
auxHash += (uint64)dim;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Textures as render targets
|
||||||
|
for (uint32 i = 0; i < pixelShader->textureUnitListCount; i++)
|
||||||
|
{
|
||||||
|
uint8 t = pixelShader->textureUnitList[i];
|
||||||
|
auxHash = std::rotl<uint64>(auxHash, 11);
|
||||||
|
auxHash += (uint64)pixelShader->textureRenderTargetIndex[t];
|
||||||
|
}
|
||||||
|
|
||||||
|
#if ENABLE_METAL
|
||||||
|
if (g_renderer->GetType() == RendererAPI::Metal)
|
||||||
|
{
|
||||||
|
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
|
||||||
|
{
|
||||||
|
auto format = LatteMRT::GetColorBufferFormat(i, LatteGPUState.contextNew);
|
||||||
|
uint8 dataType = (uint8)GetMtlPixelFormatInfo(format, false).dataType;
|
||||||
|
auxHash = std::rotl<uint64>(auxHash, 7);
|
||||||
|
auxHash += (uint64)dataType;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
return auxHash;
|
return auxHash;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -183,6 +183,7 @@ struct LatteDecompilerShader
|
||||||
std::bitset<LATTE_NUM_MAX_TEX_UNITS> textureUnitMask2;
|
std::bitset<LATTE_NUM_MAX_TEX_UNITS> textureUnitMask2;
|
||||||
uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]{ 0 }; // LATTE_DECOMPILER_SAMPLER_NONE means undefined
|
uint16 textureUnitSamplerAssignment[LATTE_NUM_MAX_TEX_UNITS]{ 0 }; // LATTE_DECOMPILER_SAMPLER_NONE means undefined
|
||||||
bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]{};
|
bool textureUsesDepthCompare[LATTE_NUM_MAX_TEX_UNITS]{};
|
||||||
|
uint8 textureRenderTargetIndex[LATTE_NUM_MAX_TEX_UNITS];
|
||||||
|
|
||||||
// analyzer stage (pixel outputs)
|
// analyzer stage (pixel outputs)
|
||||||
uint32 pixelColorOutputMask{ 0 }; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments)
|
uint32 pixelColorOutputMask{ 0 }; // from LSB to MSB, 1 bit per written output. 1 if written (indices of color attachments)
|
||||||
|
|
|
@ -9,6 +9,9 @@
|
||||||
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
||||||
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
||||||
|
|
||||||
|
// Defined in LatteTextureLegacy.cpp
|
||||||
|
Latte::E_GX2SURFFMT LatteTexture_ReconstructGX2Format(const Latte::LATTE_SQ_TEX_RESOURCE_WORD1_N& texUnitWord1, const Latte::LATTE_SQ_TEX_RESOURCE_WORD4_N& texUnitWord4);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return index of used color attachment based on shader pixel export index (0-7)
|
* Return index of used color attachment based on shader pixel export index (0-7)
|
||||||
*/
|
*/
|
||||||
|
@ -850,6 +853,59 @@ void LatteDecompiler_analyze(LatteDecompilerShaderContext* shaderContext, LatteD
|
||||||
shader->textureUnitList[shader->textureUnitListCount] = i;
|
shader->textureUnitList[shader->textureUnitListCount] = i;
|
||||||
shader->textureUnitListCount++;
|
shader->textureUnitListCount++;
|
||||||
}
|
}
|
||||||
|
shader->textureRenderTargetIndex[i] = 255;
|
||||||
|
}
|
||||||
|
// check if textures are used as render targets
|
||||||
|
if (shader->shaderType == LatteConst::ShaderType::Pixel)
|
||||||
|
{
|
||||||
|
uint8 colorBufferMask = LatteMRT::GetActiveColorBufferMask(shader, *shaderContext->contextRegistersNew);
|
||||||
|
for (sint32 i = 0; i < shader->textureUnitListCount; i++)
|
||||||
|
{
|
||||||
|
sint32 textureIndex = shader->textureUnitList[i];
|
||||||
|
const auto& texRegister = texRegs[textureIndex];
|
||||||
|
|
||||||
|
// get physical address of texture data
|
||||||
|
MPTR physAddr = (texRegister.word2.get_BASE_ADDRESS() << 8);
|
||||||
|
if (physAddr == MPTR_NULL)
|
||||||
|
continue; // invalid data
|
||||||
|
|
||||||
|
// Check for dimension
|
||||||
|
auto dim = shader->textureUnitDim[textureIndex];
|
||||||
|
// TODO: 2D arrays could technically be supported as well
|
||||||
|
if (dim != Latte::E_DIM::DIM_2D)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Check for mip level
|
||||||
|
// TODO: uncomment?
|
||||||
|
/*
|
||||||
|
auto lastMip = texRegister.word5.get_LAST_LEVEL();
|
||||||
|
// TODO: multiple mip levels could technically be supported as well
|
||||||
|
if (lastMip != 0)
|
||||||
|
continue;
|
||||||
|
*/
|
||||||
|
|
||||||
|
Latte::E_GX2SURFFMT format = LatteTexture_ReconstructGX2Format(texRegister.word1, texRegister.word4);
|
||||||
|
|
||||||
|
// Check if the texture is used as render target
|
||||||
|
for (sint32 j = 0; j < LATTE_NUM_COLOR_TARGET; j++)
|
||||||
|
{
|
||||||
|
if (((colorBufferMask) & (1 << j)) == 0)
|
||||||
|
continue; // color buffer not enabled
|
||||||
|
|
||||||
|
uint32* colorBufferRegBase = shaderContext->contextRegisters + (mmCB_COLOR0_BASE + j);
|
||||||
|
uint32 regColorBufferBase = colorBufferRegBase[mmCB_COLOR0_BASE - mmCB_COLOR0_BASE] & 0xFFFFFF00; // the low 8 bits are ignored? How to Survive seems to rely on this
|
||||||
|
|
||||||
|
MPTR colorBufferPhysMem = regColorBufferBase;
|
||||||
|
Latte::E_GX2SURFFMT colorBufferFormat = LatteMRT::GetColorBufferFormat(j, *shaderContext->contextRegistersNew);
|
||||||
|
|
||||||
|
// TODO: check if mip matches as well?
|
||||||
|
if (physAddr == colorBufferPhysMem && format == colorBufferFormat)
|
||||||
|
{
|
||||||
|
shader->textureRenderTargetIndex[textureIndex] = j;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// for geometry shaders check the copy shader for stream writes
|
// for geometry shaders check the copy shader for stream writes
|
||||||
if (shader->shaderType == LatteConst::ShaderType::Geometry && shaderContext->parsedGSCopyShader->list_streamWrites.empty() == false)
|
if (shader->shaderType == LatteConst::ShaderType::Geometry && shaderContext->parsedGSCopyShader->list_streamWrites.empty() == false)
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h"
|
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompilerInstructions.h"
|
||||||
#include "Cafe/HW/Latte/Core/FetchShader.h"
|
#include "Cafe/HW/Latte/Core/FetchShader.h"
|
||||||
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
||||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||||
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
|
||||||
#include "config/ActiveSettings.h"
|
#include "config/ActiveSettings.h"
|
||||||
#include "util/helpers/StringBuf.h"
|
#include "util/helpers/StringBuf.h"
|
||||||
|
@ -2260,6 +2260,15 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Do a framebuffer fetch if possible
|
||||||
|
uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex];
|
||||||
|
if (static_cast<MetalRenderer*>(g_renderer.get())->SupportsFramebufferFetch() && renderTargetIndex != 255)
|
||||||
|
{
|
||||||
|
// TODO: support comparison samplers
|
||||||
|
src->addFmt("col{}", renderTargetIndex);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
if (emulateCompare)
|
if (emulateCompare)
|
||||||
{
|
{
|
||||||
cemu_assert_debug(!isGather);
|
cemu_assert_debug(!isGather);
|
||||||
|
@ -2518,6 +2527,8 @@ static void _emitTEXSampleTextureCode(LatteDecompilerShaderContext* shaderContex
|
||||||
// lod bias (TODO: wht?)
|
// lod bias (TODO: wht?)
|
||||||
|
|
||||||
src->add(")");
|
src->add(")");
|
||||||
|
}
|
||||||
|
|
||||||
// sample_compare doesn't return a float
|
// sample_compare doesn't return a float
|
||||||
if (!isCompare)
|
if (!isCompare)
|
||||||
{
|
{
|
||||||
|
@ -2630,6 +2641,13 @@ static void _emitTEXGetTextureResInfoCode(LatteDecompilerShaderContext* shaderCo
|
||||||
|
|
||||||
// todo - mip index parameter?
|
// todo - mip index parameter?
|
||||||
|
|
||||||
|
if (static_cast<MetalRenderer*>(g_renderer.get())->SupportsFramebufferFetch() && shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex] != 255)
|
||||||
|
{
|
||||||
|
// TODO: use the render target size
|
||||||
|
src->addFmt(" = int4(1920, 1080, 1, 1).");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex];
|
auto texDim = shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex];
|
||||||
|
|
||||||
if (texDim == Latte::E_DIM::DIM_1D)
|
if (texDim == Latte::E_DIM::DIM_1D)
|
||||||
|
@ -2645,6 +2663,7 @@ static void _emitTEXGetTextureResInfoCode(LatteDecompilerShaderContext* shaderCo
|
||||||
cemu_assert_debug(false);
|
cemu_assert_debug(false);
|
||||||
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
|
src->addFmt(" = int4(tex{}.get_width(), tex{}.get_height(), 1, 1).", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for(sint32 f=0; f<4; f++)
|
for(sint32 f=0; f<4; f++)
|
||||||
{
|
{
|
||||||
|
@ -2693,23 +2712,30 @@ static void _emitTEXGetCompTexLodCode(LatteDecompilerShaderContext* shaderContex
|
||||||
src->add(" = ");
|
src->add(" = ");
|
||||||
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType);
|
_emitTypeConversionPrefixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType);
|
||||||
|
|
||||||
if( shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex] == Latte::E_DIM::DIM_CUBEMAP )
|
if (static_cast<MetalRenderer*>(g_renderer.get())->SupportsFramebufferFetch() && shaderContext->shader->textureRenderTargetIndex[texInstruction->textureFetch.textureIndex] != 255)
|
||||||
|
{
|
||||||
|
// We assume that textures accessed as framebuffer fetch are always sampled at pixel coordinates, therefore the lod would always be 0.0
|
||||||
|
src->add("float4(0.0, 0.0, 0.0, 0.0)");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (shaderContext->shader->textureUnitDim[texInstruction->textureFetch.textureIndex] == Latte::E_DIM::DIM_CUBEMAP)
|
||||||
{
|
{
|
||||||
// 3 coordinates
|
// 3 coordinates
|
||||||
if(shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
if(shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
||||||
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, {}.{}{}{}),0.0,0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]);
|
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, {}.{}{}{}), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]);
|
||||||
else
|
else
|
||||||
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, bitCast<float>({}.{}{}{})),0.0,0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]);
|
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, bitCast<float>({}.{}{}{})), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]], resultElemTable[texInstruction->textureFetch.srcSel[2]]);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
if (shaderContext->typeTracker.defaultDataType == LATTE_DECOMPILER_DTYPE_FLOAT)
|
||||||
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, {}.{}{}),0.0,0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]);
|
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, {}.{}{}), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]);
|
||||||
else
|
else
|
||||||
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, bitCast<float>({}.{}{})),0.0,0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]);
|
src->addFmt("float4(textureCalculateLod(tex{}, samplr{}, bitCast<float>({}.{}{})), 0.0, 0.0)", texInstruction->textureFetch.textureIndex, texInstruction->textureFetch.textureIndex, _getRegisterVarName(shaderContext, texInstruction->srcGpr), resultElemTable[texInstruction->textureFetch.srcSel[0]], resultElemTable[texInstruction->textureFetch.srcSel[1]]);
|
||||||
debugBreakpoint();
|
debugBreakpoint();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType);
|
_emitTypeConversionSuffixMSL(shaderContext, LATTE_DECOMPILER_DTYPE_FLOAT, shaderContext->typeTracker.defaultDataType);
|
||||||
src->add(".");
|
src->add(".");
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "Common/precompiled.h"
|
#include "Common/precompiled.h"
|
||||||
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
|
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
|
||||||
#include "HW/Latte/Core/LatteShader.h"
|
#include "HW/Latte/Core/LatteShader.h"
|
||||||
|
|
||||||
namespace LatteDecompiler
|
namespace LatteDecompiler
|
||||||
|
@ -448,6 +448,8 @@ namespace LatteDecompiler
|
||||||
|
|
||||||
static void _emitTextureDefinitions(LatteDecompilerShaderContext* shaderContext)
|
static void _emitTextureDefinitions(LatteDecompilerShaderContext* shaderContext)
|
||||||
{
|
{
|
||||||
|
bool renderTargetIndexUsed[LATTE_NUM_COLOR_TARGET] = {false};
|
||||||
|
|
||||||
auto src = shaderContext->shaderSource;
|
auto src = shaderContext->shaderSource;
|
||||||
// texture sampler definition
|
// texture sampler definition
|
||||||
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
|
for (sint32 i = 0; i < LATTE_NUM_MAX_TEX_UNITS; i++)
|
||||||
|
@ -455,9 +457,20 @@ namespace LatteDecompiler
|
||||||
if (!shaderContext->output->textureUnitMask[i])
|
if (!shaderContext->output->textureUnitMask[i])
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
uint8 renderTargetIndex = shaderContext->shader->textureRenderTargetIndex[i];
|
||||||
|
if (static_cast<MetalRenderer*>(g_renderer.get())->SupportsFramebufferFetch() && renderTargetIndex != 255)
|
||||||
|
{
|
||||||
|
if (!renderTargetIndexUsed[renderTargetIndex])
|
||||||
|
{
|
||||||
|
src->addFmt(", {} col{} [[color({})]]", GetDataTypeStr(GetColorBufferDataType(renderTargetIndex, *shaderContext->contextRegistersNew)), renderTargetIndex, renderTargetIndex);
|
||||||
|
renderTargetIndexUsed[renderTargetIndex] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
src->add(", ");
|
src->add(", ");
|
||||||
|
|
||||||
// Only 2D and 2D array textures can be used with comparison samplers
|
// Only certain texture dimensions can be used with comparison samplers
|
||||||
if (shaderContext->shader->textureUsesDepthCompare[i] && IsValidDepthTextureType(shaderContext->shader->textureUnitDim[i]))
|
if (shaderContext->shader->textureUsesDepthCompare[i] && IsValidDepthTextureType(shaderContext->shader->textureUnitDim[i]))
|
||||||
src->add("depth");
|
src->add("depth");
|
||||||
else
|
else
|
||||||
|
@ -495,6 +508,7 @@ namespace LatteDecompiler
|
||||||
src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding);
|
src->addFmt(", sampler samplr{} [[sampler({})]]", i, binding);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void emitInputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool fetchVertexManually)
|
static void emitInputs(LatteDecompilerShaderContext* decompilerContext, bool isRectVertexShader, bool fetchVertexManually)
|
||||||
{
|
{
|
||||||
|
|
|
@ -55,7 +55,7 @@ inline const char* GetDataTypeStr(MetalDataType dataType)
|
||||||
return "float4";
|
return "float4";
|
||||||
default:
|
default:
|
||||||
cemu_assert_suspicious();
|
cemu_assert_suspicious();
|
||||||
return "";
|
return "INVALID";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -87,6 +87,7 @@ MetalRenderer::MetalRenderer()
|
||||||
|
|
||||||
// Feature support
|
// Feature support
|
||||||
m_isAppleGPU = m_device->supportsFamily(MTL::GPUFamilyApple1);
|
m_isAppleGPU = m_device->supportsFamily(MTL::GPUFamilyApple1);
|
||||||
|
m_supportsFramebufferFetch = GetConfig().framebuffer_fetch.GetValue() ? m_device->supportsFamily(MTL::GPUFamilyApple2) : false;
|
||||||
m_hasUnifiedMemory = m_device->hasUnifiedMemory();
|
m_hasUnifiedMemory = m_device->hasUnifiedMemory();
|
||||||
m_supportsMetal3 = m_device->supportsFamily(MTL::GPUFamilyMetal3);
|
m_supportsMetal3 = m_device->supportsFamily(MTL::GPUFamilyMetal3);
|
||||||
m_recommendedMaxVRAMUsage = m_device->recommendedMaxWorkingSetSize();
|
m_recommendedMaxVRAMUsage = m_device->recommendedMaxWorkingSetSize();
|
||||||
|
@ -586,6 +587,7 @@ void MetalRenderer::AppendOverlayDebugInfo()
|
||||||
ImGui::Text("--- GPU info ---");
|
ImGui::Text("--- GPU info ---");
|
||||||
ImGui::Text("GPU %s", m_device->name()->utf8String());
|
ImGui::Text("GPU %s", m_device->name()->utf8String());
|
||||||
ImGui::Text("Is Apple GPU %s", (m_isAppleGPU ? "yes" : "no"));
|
ImGui::Text("Is Apple GPU %s", (m_isAppleGPU ? "yes" : "no"));
|
||||||
|
ImGui::Text("Supports framebuffer fetch %s", (m_supportsFramebufferFetch ? "yes" : "no"));
|
||||||
ImGui::Text("Has unified memory %s", (m_hasUnifiedMemory ? "yes" : "no"));
|
ImGui::Text("Has unified memory %s", (m_hasUnifiedMemory ? "yes" : "no"));
|
||||||
ImGui::Text("Supports Metal3 %s", (m_supportsMetal3 ? "yes" : "no"));
|
ImGui::Text("Supports Metal3 %s", (m_supportsMetal3 ? "yes" : "no"));
|
||||||
|
|
||||||
|
@ -1008,6 +1010,7 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
||||||
LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader();
|
LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader();
|
||||||
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
|
const auto fetchShader = LatteSHRC_GetActiveFetchShader();
|
||||||
|
|
||||||
|
/*
|
||||||
bool neverSkipAccurateBarrier = false;
|
bool neverSkipAccurateBarrier = false;
|
||||||
|
|
||||||
// "Accurate barriers" is usually enabled globally but since the CPU cost is substantial we allow users to disable it (debug -> 'Accurate barriers' option)
|
// "Accurate barriers" is usually enabled globally but since the CPU cost is substantial we allow users to disable it (debug -> 'Accurate barriers' option)
|
||||||
|
@ -1031,8 +1034,13 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
|
||||||
endRenderPass = CheckIfRenderPassNeedsFlush(geometryShader);
|
endRenderPass = CheckIfRenderPassNeedsFlush(geometryShader);
|
||||||
|
|
||||||
if (endRenderPass)
|
if (endRenderPass)
|
||||||
|
{
|
||||||
EndEncoding();
|
EndEncoding();
|
||||||
|
// TODO: only log in debug?
|
||||||
|
cemuLog_logOnce(LogType::Force, "Ending render pass due to render target self-dependency\n");
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
// Primitive type
|
// Primitive type
|
||||||
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]);
|
const LattePrimitiveMode primitiveMode = static_cast<LattePrimitiveMode>(LatteGPUState.contextRegister[mmVGT_PRIMITIVE_TYPE]);
|
||||||
|
@ -1863,6 +1871,7 @@ bool MetalRenderer::AcquireDrawable(bool mainWindow)
|
||||||
return layer.AcquireDrawable();
|
return layer.AcquireDrawable();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
bool MetalRenderer::CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader)
|
bool MetalRenderer::CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader)
|
||||||
{
|
{
|
||||||
sint32 textureCount = shader->resourceMapping.getTextureCount();
|
sint32 textureCount = shader->resourceMapping.getTextureCount();
|
||||||
|
@ -1871,6 +1880,11 @@ bool MetalRenderer::CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader)
|
||||||
const auto relative_textureUnit = shader->resourceMapping.getTextureUnitFromBindingPoint(i);
|
const auto relative_textureUnit = shader->resourceMapping.getTextureUnitFromBindingPoint(i);
|
||||||
auto hostTextureUnit = relative_textureUnit;
|
auto hostTextureUnit = relative_textureUnit;
|
||||||
auto textureDim = shader->textureUnitDim[relative_textureUnit];
|
auto textureDim = shader->textureUnitDim[relative_textureUnit];
|
||||||
|
|
||||||
|
// Texture is accessed as a framebuffer fetch, therefore there is no need to flush it
|
||||||
|
if (shader->textureRenderTargetIndex[relative_textureUnit] != 255)
|
||||||
|
continue;
|
||||||
|
|
||||||
auto texUnitRegIndex = hostTextureUnit * 7;
|
auto texUnitRegIndex = hostTextureUnit * 7;
|
||||||
switch (shader->shaderType)
|
switch (shader->shaderType)
|
||||||
{
|
{
|
||||||
|
@ -1895,8 +1909,7 @@ bool MetalRenderer::CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
LatteTexture* baseTexture = textureView->baseTexture;
|
LatteTexture* baseTexture = textureView->baseTexture;
|
||||||
if (!m_state.m_isFirstDrawInRenderPass)
|
|
||||||
{
|
|
||||||
// If the texture is also used in the current render pass, we need to end the render pass to "flush" the texture
|
// If the texture is also used in the current render pass, we need to end the render pass to "flush" the texture
|
||||||
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
|
for (uint8 i = 0; i < LATTE_NUM_COLOR_TARGET; i++)
|
||||||
{
|
{
|
||||||
|
@ -1905,10 +1918,10 @@ bool MetalRenderer::CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader, bool usesGeometryShader)
|
void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader, bool usesGeometryShader)
|
||||||
{
|
{
|
||||||
|
@ -1919,6 +1932,11 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
|
||||||
{
|
{
|
||||||
const auto relative_textureUnit = shader->resourceMapping.getTextureUnitFromBindingPoint(i);
|
const auto relative_textureUnit = shader->resourceMapping.getTextureUnitFromBindingPoint(i);
|
||||||
auto hostTextureUnit = relative_textureUnit;
|
auto hostTextureUnit = relative_textureUnit;
|
||||||
|
|
||||||
|
// Don't bind textures that are accessed with a framebuffer fetch
|
||||||
|
if (m_supportsFramebufferFetch && shader->textureRenderTargetIndex[relative_textureUnit] != 255)
|
||||||
|
continue;
|
||||||
|
|
||||||
auto textureDim = shader->textureUnitDim[relative_textureUnit];
|
auto textureDim = shader->textureUnitDim[relative_textureUnit];
|
||||||
auto texUnitRegIndex = hostTextureUnit * 7;
|
auto texUnitRegIndex = hostTextureUnit * 7;
|
||||||
switch (shader->shaderType)
|
switch (shader->shaderType)
|
||||||
|
|
|
@ -125,8 +125,7 @@ struct MetalState
|
||||||
MetalActiveFBOState m_lastUsedFBO;
|
MetalActiveFBOState m_lastUsedFBO;
|
||||||
|
|
||||||
size_t m_vertexBufferOffsets[MAX_MTL_VERTEX_BUFFERS];
|
size_t m_vertexBufferOffsets[MAX_MTL_VERTEX_BUFFERS];
|
||||||
// TODO: find out what is the max number of bound textures on the Wii U
|
class LatteTextureViewMtl* m_textures[LATTE_NUM_MAX_TEX_UNITS] = {nullptr};
|
||||||
class LatteTextureViewMtl* m_textures[64] = {nullptr};
|
|
||||||
size_t m_uniformBufferOffsets[METAL_GENERAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
|
size_t m_uniformBufferOffsets[METAL_GENERAL_SHADER_TYPE_TOTAL][MAX_MTL_BUFFERS];
|
||||||
|
|
||||||
MTL::Viewport m_viewport;
|
MTL::Viewport m_viewport;
|
||||||
|
@ -363,7 +362,7 @@ public:
|
||||||
|
|
||||||
bool AcquireDrawable(bool mainWindow);
|
bool AcquireDrawable(bool mainWindow);
|
||||||
|
|
||||||
bool CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader);
|
//bool CheckIfRenderPassNeedsFlush(LatteDecompilerShader* shader);
|
||||||
void BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader, bool usesGeometryShader);
|
void BindStageResources(MTL::RenderCommandEncoder* renderCommandEncoder, LatteDecompilerShader* shader, bool usesGeometryShader);
|
||||||
|
|
||||||
void ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a);
|
void ClearColorTextureInternal(MTL::Texture* mtlTexture, sint32 sliceIndex, sint32 mipIndex, float r, float g, float b, float a);
|
||||||
|
@ -376,6 +375,11 @@ public:
|
||||||
return m_isAppleGPU;
|
return m_isAppleGPU;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool SupportsFramebufferFetch() const
|
||||||
|
{
|
||||||
|
return m_supportsFramebufferFetch;
|
||||||
|
}
|
||||||
|
|
||||||
bool HasUnifiedMemory() const
|
bool HasUnifiedMemory() const
|
||||||
{
|
{
|
||||||
return m_hasUnifiedMemory;
|
return m_hasUnifiedMemory;
|
||||||
|
@ -478,6 +482,7 @@ private:
|
||||||
|
|
||||||
// Feature support
|
// Feature support
|
||||||
bool m_isAppleGPU;
|
bool m_isAppleGPU;
|
||||||
|
bool m_supportsFramebufferFetch;
|
||||||
bool m_hasUnifiedMemory;
|
bool m_hasUnifiedMemory;
|
||||||
bool m_supportsMetal3;
|
bool m_supportsMetal3;
|
||||||
uint32 m_recommendedMaxVRAMUsage;
|
uint32 m_recommendedMaxVRAMUsage;
|
||||||
|
|
|
@ -338,6 +338,7 @@ void CemuConfig::Load(XMLConfigParser& parser)
|
||||||
#endif
|
#endif
|
||||||
gdb_port = debug.get("GDBPort", 1337);
|
gdb_port = debug.get("GDBPort", 1337);
|
||||||
gpu_capture_dir = debug.get("GPUCaptureDir", "");
|
gpu_capture_dir = debug.get("GPUCaptureDir", "");
|
||||||
|
framebuffer_fetch = debug.get("FramebufferFetch", true);
|
||||||
|
|
||||||
// input
|
// input
|
||||||
auto input = parser.get("Input");
|
auto input = parser.get("Input");
|
||||||
|
@ -540,7 +541,8 @@ void CemuConfig::Save(XMLConfigParser& parser)
|
||||||
debug.set("CrashDumpUnix", crash_dump.GetValue());
|
debug.set("CrashDumpUnix", crash_dump.GetValue());
|
||||||
#endif
|
#endif
|
||||||
debug.set("GDBPort", gdb_port);
|
debug.set("GDBPort", gdb_port);
|
||||||
debug.set("GPUCaptureDir", gpu_capture_dir.GetValue());
|
debug.set("GPUCaptureDir", gpu_capture_dir);
|
||||||
|
debug.set("FramebufferFetch", framebuffer_fetch);
|
||||||
|
|
||||||
// input
|
// input
|
||||||
auto input = config.set("Input");
|
auto input = config.set("Input");
|
||||||
|
|
|
@ -527,7 +527,8 @@ struct CemuConfig
|
||||||
// debug
|
// debug
|
||||||
ConfigValueBounds<CrashDump> crash_dump{ CrashDump::Disabled };
|
ConfigValueBounds<CrashDump> crash_dump{ CrashDump::Disabled };
|
||||||
ConfigValue<uint16> gdb_port{ 1337 };
|
ConfigValue<uint16> gdb_port{ 1337 };
|
||||||
ConfigValue<std::string> gpu_capture_dir{};
|
ConfigValue<std::string> gpu_capture_dir{ "" };
|
||||||
|
ConfigValue<bool> framebuffer_fetch{ true };
|
||||||
|
|
||||||
void Load(XMLConfigParser& parser);
|
void Load(XMLConfigParser& parser);
|
||||||
void Save(XMLConfigParser& parser);
|
void Save(XMLConfigParser& parser);
|
||||||
|
|
|
@ -910,6 +910,18 @@ wxPanel* GeneralSettings2::AddDebugPage(wxNotebook* notebook)
|
||||||
debug_panel_sizer->Add(debug_row, 0, wxALL | wxEXPAND, 5);
|
debug_panel_sizer->Add(debug_row, 0, wxALL | wxEXPAND, 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto* debug_row = new wxFlexGridSizer(0, 2, 0, 0);
|
||||||
|
debug_row->SetFlexibleDirection(wxBOTH);
|
||||||
|
debug_row->SetNonFlexibleGrowMode(wxFLEX_GROWMODE_SPECIFIED);
|
||||||
|
|
||||||
|
m_framebuffer_fetch = new wxCheckBox(panel, wxID_ANY, _("Framebuffer fetch"));
|
||||||
|
m_framebuffer_fetch->SetToolTip(_("Enable framebuffer fetch for eligible textures on supported devices."));
|
||||||
|
|
||||||
|
debug_row->Add(m_framebuffer_fetch, 0, wxALL | wxEXPAND, 5);
|
||||||
|
debug_panel_sizer->Add(debug_row, 0, wxALL | wxEXPAND, 5);
|
||||||
|
}
|
||||||
|
|
||||||
panel->SetSizerAndFit(debug_panel_sizer);
|
panel->SetSizerAndFit(debug_panel_sizer);
|
||||||
|
|
||||||
return panel;
|
return panel;
|
||||||
|
@ -1121,6 +1133,7 @@ void GeneralSettings2::StoreConfig()
|
||||||
config.crash_dump = (CrashDump)m_crash_dump->GetSelection();
|
config.crash_dump = (CrashDump)m_crash_dump->GetSelection();
|
||||||
config.gdb_port = m_gdb_port->GetValue();
|
config.gdb_port = m_gdb_port->GetValue();
|
||||||
config.gpu_capture_dir = m_gpu_capture_dir->GetValue().utf8_string();
|
config.gpu_capture_dir = m_gpu_capture_dir->GetValue().utf8_string();
|
||||||
|
config.framebuffer_fetch = m_framebuffer_fetch->IsChecked();
|
||||||
|
|
||||||
g_config.Save();
|
g_config.Save();
|
||||||
}
|
}
|
||||||
|
@ -1816,6 +1829,7 @@ void GeneralSettings2::ApplyConfig()
|
||||||
m_crash_dump->SetSelection((int)config.crash_dump.GetValue());
|
m_crash_dump->SetSelection((int)config.crash_dump.GetValue());
|
||||||
m_gdb_port->SetValue(config.gdb_port.GetValue());
|
m_gdb_port->SetValue(config.gdb_port.GetValue());
|
||||||
m_gpu_capture_dir->SetValue(wxHelper::FromUtf8(config.gpu_capture_dir.GetValue()));
|
m_gpu_capture_dir->SetValue(wxHelper::FromUtf8(config.gpu_capture_dir.GetValue()));
|
||||||
|
m_framebuffer_fetch->SetValue(config.framebuffer_fetch);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GeneralSettings2::OnAudioAPISelected(wxCommandEvent& event)
|
void GeneralSettings2::OnAudioAPISelected(wxCommandEvent& event)
|
||||||
|
|
|
@ -80,6 +80,7 @@ private:
|
||||||
wxChoice* m_crash_dump;
|
wxChoice* m_crash_dump;
|
||||||
wxSpinCtrl* m_gdb_port;
|
wxSpinCtrl* m_gdb_port;
|
||||||
wxTextCtrl* m_gpu_capture_dir;
|
wxTextCtrl* m_gpu_capture_dir;
|
||||||
|
wxCheckBox* m_framebuffer_fetch;
|
||||||
|
|
||||||
void OnAccountCreate(wxCommandEvent& event);
|
void OnAccountCreate(wxCommandEvent& event);
|
||||||
void OnAccountDelete(wxCommandEvent& event);
|
void OnAccountDelete(wxCommandEvent& event);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue