Cemu/src/Cafe/HW/Latte/Core/LatteStreamoutGPU.cpp
Exzap 3bceb39966
Remove PUBLIC_RELEASE flag and tie asserts to debug config (#287)
Removes the -DPUBLIC_RELEASE flag. Cemu's debug asserts are now only enabled if the build configuration is Debug. Similarly, on Windows the console is only shown for Debug builds.
2022-09-24 08:43:27 +02:00

168 lines
6.5 KiB
C++

#include "Cafe/HW/Latte/Core/LatteConst.h"
#include "Cafe/HW/Latte/ISA/RegDefines.h"
#include "Cafe/HW/Latte/Core/Latte.h"
#include "Cafe/HW/Latte/Core/LatteDraw.h"
#include "Cafe/HW/Latte/Core/LatteShader.h"
#include "Cafe/GameProfile/GameProfile.h"
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
#include "util/containers/IntervalBucketContainer.h"
#include "Cafe/HW/Latte/Renderer/Renderer.h"
#include "Cafe/HW/Latte/Core/LatteRingBuffer.h"
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
struct
{
sint32 currentRingbufferOffset;
VirtualBufferHeap_t* mainBufferHeap;
}streamoutManager;
sint32 LatteStreamout_GetRingBufferSize()
{
return 8 * 1024 * 1024; // 8MB
}
sint32 LatteStreamout_allocateGPURingbufferMem(sint32 size)
{
// pad size to 256 byte alignment
size = (size + 255)&~255;
// get next offset
if ((streamoutManager.currentRingbufferOffset + size) > LatteStreamout_GetRingBufferSize())
{
streamoutManager.currentRingbufferOffset = 0;
}
sint32 allocOffset = streamoutManager.currentRingbufferOffset;
streamoutManager.currentRingbufferOffset += size;
return allocOffset;
}
void LatteStreamout_InitCache()
{
streamoutManager.currentRingbufferOffset = 0;
streamoutManager.mainBufferHeap = nullptr;
}
bool _transformFeedbackIsActive = false;
struct
{
uint32 vertexCount;
uint32 instanceCount;
uint32 streamoutWriteMask;
struct
{
bool isActive;
sint32 ringBufferOffset;
uint32 rangeAddr;
uint32 rangeSize; // size of written streamout data, bounded by buffer size
}streamoutBufferWrite[LATTE_NUM_STREAMOUT_BUFFER];
}activeStreamoutOperation;
uint32 LatteStreamout_getNumberOfWrittenVertices()
{
// todo: Currently we only handle GX2_POINTS
return activeStreamoutOperation.vertexCount * activeStreamoutOperation.instanceCount;
}
// returns the number of bytes that are written into the buffer by the current draw operation (ignoring buffer maximum size)
uint32 LatteStreamout_getBufferWriteRangeSize(uint32 streamoutBufferIndex)
{
uint32 bufferStride = LatteGPUState.contextRegister[mmVGT_STRMOUT_VTX_STRIDE_0 + streamoutBufferIndex * 4] << 2;
uint32 bufferSize = LatteGPUState.contextRegister[mmVGT_STRMOUT_BUFFER_SIZE_0 + streamoutBufferIndex * 4] << 2;
uint32 writeSize = LatteStreamout_getNumberOfWrittenVertices() * bufferStride;
if (bufferSize < writeSize)
writeSize = bufferSize;
return writeSize;
}
void LatteStreamout_PrepareDrawcall(uint32 count, uint32 instanceCount)
{
if (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] == 0)
{
_transformFeedbackIsActive = false;
return; // streamout inactive
}
// get active vertex shader
LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader();
// if a geometry shader is used calculate how many vertices it outputs
LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader();
sint32 maxVerticesInGS = 1;
if (geometryShader)
{
uint32 gsOutPrimType = LatteGPUState.contextRegister[mmVGT_GS_OUT_PRIM_TYPE];
uint32 bytesPerVertex = LatteGPUState.contextRegister[mmSQ_GS_VERT_ITEMSIZE] * 4;
maxVerticesInGS = ((LatteGPUState.contextRegister[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF) * 4) / bytesPerVertex;
cemu_assert_debug(maxVerticesInGS > 0);
}
// setup active streamout operation struct
activeStreamoutOperation.vertexCount = count * maxVerticesInGS;
activeStreamoutOperation.instanceCount = instanceCount;
// get mask of all written streamout buffers
uint32 streamoutWriteMask = 0;
if (geometryShader)
{
#ifdef CEMU_DEBUG_ASSERT
cemu_assert_debug(vertexShader->streamoutBufferWriteMask2.any() == false);
#endif
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
if (geometryShader->streamoutBufferWriteMask2[i])
streamoutWriteMask |= (1 << i);
}
else
{
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
if (vertexShader->streamoutBufferWriteMask2[i])
streamoutWriteMask |= (1 << i);
}
activeStreamoutOperation.streamoutWriteMask = streamoutWriteMask;
// bind streamout buffers
for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
{
if ((streamoutWriteMask&(1 << i)) == 0)
{
activeStreamoutOperation.streamoutBufferWrite[i].isActive = false;
continue;
}
uint32 bufferBaseMPTR = LatteGPUState.contextRegister[mmVGT_STRMOUT_BUFFER_BASE_0 + i * 4] << 8;
uint32 bufferSize = LatteGPUState.contextRegister[mmVGT_STRMOUT_BUFFER_SIZE_0 + i * 4] << 2;
uint32 bufferOffset = LatteGPUState.contextRegister[mmVGT_STRMOUT_BUFFER_OFFSET_0 + i * 4];
uint32 streamoutWriteSize = LatteStreamout_getBufferWriteRangeSize(i);
uint32 rangeAddr = bufferBaseMPTR + bufferOffset;
sint32 ringBufferOffset = LatteStreamout_allocateGPURingbufferMem(streamoutWriteSize); // allocate memory for the entire streamout write
// calculate write size after bounding it to the buffer
uint32 remainingBytesToWrite = bufferOffset > bufferSize ? 0 : (bufferSize - bufferOffset);
uint32 rangeSize = std::min(streamoutWriteSize, remainingBytesToWrite);
activeStreamoutOperation.streamoutBufferWrite[i].isActive = true;
activeStreamoutOperation.streamoutBufferWrite[i].ringBufferOffset = ringBufferOffset;
activeStreamoutOperation.streamoutBufferWrite[i].rangeAddr = rangeAddr;
activeStreamoutOperation.streamoutBufferWrite[i].rangeSize = rangeSize;
g_renderer->streamout_setupXfbBuffer(i, ringBufferOffset, rangeAddr, rangeSize);
}
g_renderer->streamout_begin();
_transformFeedbackIsActive = true;
}
void LatteStreamout_FinishDrawcall(bool useDirectMemoryMode)
{
if (_transformFeedbackIsActive)
{
_transformFeedbackIsActive = false;
for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
{
if ((activeStreamoutOperation.streamoutWriteMask&(1 << i)) == 0)
continue;
if (activeStreamoutOperation.streamoutBufferWrite[i].rangeSize > 0)
{
if(useDirectMemoryMode)
g_renderer->bufferCache_copyStreamoutToMainBuffer(activeStreamoutOperation.streamoutBufferWrite[i].ringBufferOffset, activeStreamoutOperation.streamoutBufferWrite[i].rangeAddr, activeStreamoutOperation.streamoutBufferWrite[i].rangeSize);
else
LatteBufferCache_copyStreamoutDataToCache(activeStreamoutOperation.streamoutBufferWrite[i].rangeAddr, activeStreamoutOperation.streamoutBufferWrite[i].rangeSize, activeStreamoutOperation.streamoutBufferWrite[i].ringBufferOffset);
}
// advance streamout offset
uint32 newOffset = LatteGPUState.contextRegister[mmVGT_STRMOUT_BUFFER_OFFSET_0 + i * 4] + activeStreamoutOperation.streamoutBufferWrite[i].rangeSize;
LatteGPUState.contextRegister[mmVGT_STRMOUT_BUFFER_OFFSET_0 + i * 4] = newOffset;
}
g_renderer->streamout_rendererFinishDrawcall();
}
}