mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-05 14:31:17 +12:00
228 lines
7.9 KiB
C++
228 lines
7.9 KiB
C++
#include "Cafe/HW/Latte/ISA/RegDefines.h"
|
|
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
|
#include "Cafe/HW/Latte/Core/Latte.h"
|
|
#include "Cafe/HW/Latte/Core/LatteDraw.h"
|
|
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
|
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
|
|
#include "Cafe/HW/Latte/Core/FetchShader.h"
|
|
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
|
|
#include "Cafe/GameProfile/GameProfile.h"
|
|
|
|
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
|
|
#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h"
|
|
|
|
template<int vectorLen>
|
|
void rectGenerate4thVertex(uint32be* output, uint32be* input0, uint32be* input1, uint32be* input2)
|
|
{
|
|
float* v = (float*)output;
|
|
|
|
for (sint32 i = 0; i < vectorLen; i++)
|
|
output[vectorLen * 0 + i] = _swapEndianU32(input0[i]);
|
|
for (sint32 i = 0; i < vectorLen; i++)
|
|
output[vectorLen * 1 + i] = _swapEndianU32(input1[i]);
|
|
for (sint32 i = 0; i < vectorLen; i++)
|
|
output[vectorLen * 2 + i] = _swapEndianU32(input2[i]);
|
|
|
|
float minX = std::min(v[vectorLen * 0 + 0], std::min(v[vectorLen * 1 + 0], v[vectorLen * 2 + 0]));
|
|
float maxX = std::max(v[vectorLen * 0 + 0], std::max(v[vectorLen * 1 + 0], v[vectorLen * 2 + 0]));
|
|
float minY = std::min(v[vectorLen * 0 + 1], std::min(v[vectorLen * 1 + 1], v[vectorLen * 2 + 1]));;
|
|
float maxY = std::max(v[vectorLen * 0 + 1], std::max(v[vectorLen * 1 + 1], v[vectorLen * 2 + 1]));;
|
|
|
|
float totalX = minX;
|
|
totalX += maxY;
|
|
float halfX = totalX / 2.0f;
|
|
|
|
float totalY = minY;
|
|
totalY += maxY;
|
|
float halfY = totalY / 2.0f;
|
|
|
|
sint32 countX =
|
|
((v[vectorLen * 0 + 0] < halfX) ? 1 : 0) +
|
|
((v[vectorLen * 1 + 0] < halfX) ? 1 : 0) +
|
|
((v[vectorLen * 2 + 0] < halfX) ? 1 : 0);
|
|
|
|
sint32 countY =
|
|
((v[vectorLen * 0 + 1] < halfY) ? 1 : 0) +
|
|
((v[vectorLen * 1 + 1] < halfY) ? 1 : 0) +
|
|
((v[vectorLen * 2 + 1] < halfY) ? 1 : 0);
|
|
|
|
if (countX < 2)
|
|
v[vectorLen * 3 + 0] = minX;
|
|
else
|
|
v[vectorLen * 3 + 0] = maxX;
|
|
if (countY < 2)
|
|
v[vectorLen * 3 + 1] = minY;
|
|
else
|
|
v[vectorLen * 3 + 1] = maxY;
|
|
|
|
if (vectorLen >= 3)
|
|
v[vectorLen * 3 + 2] = v[vectorLen * 0 + 2]; // z from v0
|
|
if (vectorLen >= 4)
|
|
v[vectorLen * 3 + 3] = v[vectorLen * 0 + 3]; // w from v0
|
|
|
|
// order of rectangle vertices is
|
|
// v0 v1
|
|
// v2 v3
|
|
|
|
for (sint32 f = 0; f < vectorLen*4; f++)
|
|
output[f] = _swapEndianU32(output[f]);
|
|
}
|
|
|
|
#define ATTRIBUTE_CACHE_RING_SIZE (128) // up to 128 entries can be cached
|
|
|
|
void LatteBufferCache_LoadRemappedUniforms(LatteDecompilerShader* shader, float* uniformData)
|
|
{
|
|
uint32 shaderAluConst;
|
|
uint32 shaderUniformRegisterOffset;
|
|
|
|
switch (shader->shaderType)
|
|
{
|
|
case LatteConst::ShaderType::Vertex:
|
|
shaderAluConst = 0x400;
|
|
shaderUniformRegisterOffset = mmSQ_VTX_UNIFORM_BLOCK_START;
|
|
break;
|
|
case LatteConst::ShaderType::Pixel:
|
|
shaderAluConst = 0;
|
|
shaderUniformRegisterOffset = mmSQ_PS_UNIFORM_BLOCK_START;
|
|
break;
|
|
case LatteConst::ShaderType::Geometry:
|
|
shaderAluConst = 0; // geometry shader has no ALU const
|
|
shaderUniformRegisterOffset = mmSQ_GS_UNIFORM_BLOCK_START;
|
|
break;
|
|
default:
|
|
cemu_assert_debug(false);
|
|
}
|
|
|
|
// sourced from uniform registers
|
|
uint32* aluConstBase = LatteGPUState.contextRegister + mmSQ_ALU_CONSTANT0_0 + shaderAluConst;
|
|
for (auto it : shader->list_remappedUniformEntries_register)
|
|
{
|
|
uint64* uniformRegData = (uint64*)(aluConstBase + it.indexOffset / 4);
|
|
uint64* regDest = (uint64*)((uint8*)uniformData + it.mappedIndexOffset);
|
|
regDest[0] = uniformRegData[0];
|
|
regDest[1] = uniformRegData[1];
|
|
}
|
|
// sourced from uniform buffers
|
|
for (auto& bufferGroup : shader->list_remappedUniformEntries_bufferGroups)
|
|
{
|
|
MPTR physicalAddr = LatteGPUState.contextRegister[shaderUniformRegisterOffset + bufferGroup.kcacheBankIdOffset / 4];
|
|
if (physicalAddr)
|
|
{
|
|
uint8* uniformBase = memory_base + physicalAddr;
|
|
for (auto& it : bufferGroup.entries)
|
|
{
|
|
uint64* regDest = (uint64*)((uint8*)uniformData + it.mappedIndexOffset);
|
|
uint64* uniformEntrySrc = (uint64*)(uniformBase + it.indexOffset);
|
|
memcpy(regDest, uniformEntrySrc, 16);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (auto& it : bufferGroup.entries)
|
|
{
|
|
uint64* regDest = (uint64*)((uint8*)uniformData + it.mappedIndexOffset);
|
|
regDest[0] = 0;
|
|
regDest[1] = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void LatteBufferCache_syncGPUUniformBuffers(LatteDecompilerShader* shader, const uint32 uniformBufferRegOffset, LatteConst::ShaderType shaderType)
|
|
{
|
|
if (shader->uniformMode == LATTE_DECOMPILER_UNIFORM_MODE_FULL_CBANK)
|
|
{
|
|
for(const auto& buf : shader->list_quickBufferList)
|
|
{
|
|
sint32 i = buf.index;
|
|
MPTR physicalAddr = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 0];
|
|
uint32 uniformSize = LatteGPUState.contextRegister[uniformBufferRegOffset + i * 7 + 1] + 1;
|
|
if (physicalAddr == MPTR_NULL) [[unlikely]]
|
|
{
|
|
g_renderer->buffer_bindUniformBuffer(shaderType, i, 0, 0);
|
|
continue;
|
|
}
|
|
uniformSize = std::min<uint32>(uniformSize, buf.size);
|
|
uint32 bindOffset = LatteBufferCache_retrieveDataInCache(physicalAddr, uniformSize);
|
|
g_renderer->buffer_bindUniformBuffer(shaderType, i, bindOffset, uniformSize);
|
|
}
|
|
}
|
|
}
|
|
|
|
// upload vertex and uniform buffers
|
|
bool LatteBufferCache_Sync(uint32 minIndex, uint32 maxIndex, uint32 baseInstance, uint32 instanceCount)
|
|
{
|
|
static uint32 s_syncBufferCounter = 0;
|
|
|
|
s_syncBufferCounter++;
|
|
if (s_syncBufferCounter >= 30)
|
|
{
|
|
LatteBufferCache_incrementalCleanup();
|
|
s_syncBufferCounter = 0;
|
|
}
|
|
|
|
LatteBufferCache_processDCFlushQueue();
|
|
// process queued deallocations from previous drawcall
|
|
LatteBufferCache_processDeallocations();
|
|
|
|
// sync and bind vertex buffers
|
|
LatteFetchShader* parsedFetchShader = LatteSHRC_GetActiveFetchShader();
|
|
if (!parsedFetchShader)
|
|
return false;
|
|
for (auto& bufferGroup : parsedFetchShader->bufferGroups)
|
|
{
|
|
uint32 bufferIndex = bufferGroup.attributeBufferIndex;
|
|
uint32 bufferBaseRegisterIndex = mmSQ_VTX_ATTRIBUTE_BLOCK_START + bufferIndex * 7;
|
|
MPTR bufferAddress = LatteGPUState.contextRegister[bufferBaseRegisterIndex + 0];
|
|
uint32 bufferSize = LatteGPUState.contextRegister[bufferBaseRegisterIndex + 1] + 1;
|
|
uint32 bufferStride = (LatteGPUState.contextRegister[bufferBaseRegisterIndex + 2] >> 11) & 0xFFFF;
|
|
|
|
if (bufferAddress == MPTR_NULL)
|
|
{
|
|
g_renderer->buffer_bindVertexBuffer(bufferIndex, 0, 0);
|
|
continue;
|
|
}
|
|
|
|
// dont rely on buffer size given by game
|
|
uint32 fixedBufferSize = 0;
|
|
if (bufferGroup.hasVtxIndexAccess)
|
|
fixedBufferSize = bufferStride * (maxIndex + 1) + bufferGroup.maxOffset;
|
|
if (bufferGroup.hasInstanceIndexAccess)
|
|
{
|
|
uint32 fixedBufferSizeInstance = bufferStride * ((baseInstance + instanceCount) + 1) + bufferGroup.maxOffset;
|
|
fixedBufferSize = std::max(fixedBufferSize, fixedBufferSizeInstance);
|
|
}
|
|
if (fixedBufferSize == 0 || bufferStride == 0)
|
|
fixedBufferSize += 128;
|
|
|
|
|
|
#if BOOST_OS_MACOS
|
|
if(bufferStride % 4 != 0)
|
|
{
|
|
if (g_renderer->GetType() == RendererAPI::Vulkan)
|
|
{
|
|
if (VulkanRenderer* vkRenderer = VulkanRenderer::GetInstance())
|
|
{
|
|
auto fixedBuffer = vkRenderer->buffer_genStrideWorkaroundVertexBuffer(bufferAddress, fixedBufferSize, bufferStride);
|
|
vkRenderer->buffer_bindVertexStrideWorkaroundBuffer(fixedBuffer.first, fixedBuffer.second, bufferIndex, fixedBufferSize);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
uint32 bindOffset = LatteBufferCache_retrieveDataInCache(bufferAddress, fixedBufferSize);
|
|
g_renderer->buffer_bindVertexBuffer(bufferIndex, bindOffset, fixedBufferSize);
|
|
}
|
|
// sync uniform buffers
|
|
LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader();
|
|
if (vertexShader)
|
|
LatteBufferCache_syncGPUUniformBuffers(vertexShader, mmSQ_VTX_UNIFORM_BLOCK_START, LatteConst::ShaderType::Vertex);
|
|
LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader();
|
|
if (geometryShader)
|
|
LatteBufferCache_syncGPUUniformBuffers(geometryShader, mmSQ_GS_UNIFORM_BLOCK_START, LatteConst::ShaderType::Geometry);
|
|
LatteDecompilerShader* pixelShader = LatteSHRC_GetActivePixelShader();
|
|
if (pixelShader)
|
|
LatteBufferCache_syncGPUUniformBuffers(pixelShader, mmSQ_PS_UNIFORM_BLOCK_START, LatteConst::ShaderType::Pixel);
|
|
return true;
|
|
}
|