mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-02 13:01:18 +12:00
Removes the -DPUBLIC_RELEASE flag. Cemu's debug asserts are now only enabled if the build configuration is Debug. Similarly, on Windows the console is only shown for Debug builds.
168 lines
6.5 KiB
C++
168 lines
6.5 KiB
C++
#include "Cafe/HW/Latte/Core/LatteConst.h"
|
|
#include "Cafe/HW/Latte/ISA/RegDefines.h"
|
|
#include "Cafe/HW/Latte/Core/Latte.h"
|
|
#include "Cafe/HW/Latte/Core/LatteDraw.h"
|
|
#include "Cafe/HW/Latte/Core/LatteShader.h"
|
|
#include "Cafe/GameProfile/GameProfile.h"
|
|
#include "Cafe/HW/Latte/LegacyShaderDecompiler/LatteDecompiler.h"
|
|
|
|
#include "util/containers/IntervalBucketContainer.h"
|
|
#include "Cafe/HW/Latte/Renderer/Renderer.h"
|
|
#include "Cafe/HW/Latte/Core/LatteRingBuffer.h"
|
|
#include "Cafe/HW/Latte/Core/LatteBufferCache.h"
|
|
|
|
struct
|
|
{
|
|
sint32 currentRingbufferOffset;
|
|
VirtualBufferHeap_t* mainBufferHeap;
|
|
}streamoutManager;
|
|
|
|
sint32 LatteStreamout_GetRingBufferSize()
|
|
{
|
|
return 8 * 1024 * 1024; // 8MB
|
|
}
|
|
|
|
sint32 LatteStreamout_allocateGPURingbufferMem(sint32 size)
|
|
{
|
|
// pad size to 256 byte alignment
|
|
size = (size + 255)&~255;
|
|
// get next offset
|
|
if ((streamoutManager.currentRingbufferOffset + size) > LatteStreamout_GetRingBufferSize())
|
|
{
|
|
streamoutManager.currentRingbufferOffset = 0;
|
|
}
|
|
sint32 allocOffset = streamoutManager.currentRingbufferOffset;
|
|
streamoutManager.currentRingbufferOffset += size;
|
|
return allocOffset;
|
|
}
|
|
|
|
void LatteStreamout_InitCache()
|
|
{
|
|
streamoutManager.currentRingbufferOffset = 0;
|
|
streamoutManager.mainBufferHeap = nullptr;
|
|
}
|
|
|
|
bool _transformFeedbackIsActive = false;
|
|
struct
|
|
{
|
|
uint32 vertexCount;
|
|
uint32 instanceCount;
|
|
uint32 streamoutWriteMask;
|
|
struct
|
|
{
|
|
bool isActive;
|
|
sint32 ringBufferOffset;
|
|
uint32 rangeAddr;
|
|
uint32 rangeSize; // size of written streamout data, bounded by buffer size
|
|
}streamoutBufferWrite[LATTE_NUM_STREAMOUT_BUFFER];
|
|
}activeStreamoutOperation;
|
|
|
|
uint32 LatteStreamout_getNumberOfWrittenVertices()
|
|
{
|
|
// todo: Currently we only handle GX2_POINTS
|
|
return activeStreamoutOperation.vertexCount * activeStreamoutOperation.instanceCount;
|
|
}
|
|
|
|
// returns the number of bytes that are written into the buffer by the current draw operation (ignoring buffer maximum size)
|
|
uint32 LatteStreamout_getBufferWriteRangeSize(uint32 streamoutBufferIndex)
|
|
{
|
|
uint32 bufferStride = LatteGPUState.contextRegister[mmVGT_STRMOUT_VTX_STRIDE_0 + streamoutBufferIndex * 4] << 2;
|
|
uint32 bufferSize = LatteGPUState.contextRegister[mmVGT_STRMOUT_BUFFER_SIZE_0 + streamoutBufferIndex * 4] << 2;
|
|
uint32 writeSize = LatteStreamout_getNumberOfWrittenVertices() * bufferStride;
|
|
if (bufferSize < writeSize)
|
|
writeSize = bufferSize;
|
|
return writeSize;
|
|
}
|
|
|
|
void LatteStreamout_PrepareDrawcall(uint32 count, uint32 instanceCount)
|
|
{
|
|
if (LatteGPUState.contextRegister[mmVGT_STRMOUT_EN] == 0)
|
|
{
|
|
_transformFeedbackIsActive = false;
|
|
return; // streamout inactive
|
|
}
|
|
// get active vertex shader
|
|
LatteDecompilerShader* vertexShader = LatteSHRC_GetActiveVertexShader();
|
|
// if a geometry shader is used calculate how many vertices it outputs
|
|
LatteDecompilerShader* geometryShader = LatteSHRC_GetActiveGeometryShader();
|
|
sint32 maxVerticesInGS = 1;
|
|
if (geometryShader)
|
|
{
|
|
uint32 gsOutPrimType = LatteGPUState.contextRegister[mmVGT_GS_OUT_PRIM_TYPE];
|
|
uint32 bytesPerVertex = LatteGPUState.contextRegister[mmSQ_GS_VERT_ITEMSIZE] * 4;
|
|
maxVerticesInGS = ((LatteGPUState.contextRegister[mmSQ_GSVS_RING_ITEMSIZE] & 0x7FFF) * 4) / bytesPerVertex;
|
|
cemu_assert_debug(maxVerticesInGS > 0);
|
|
}
|
|
// setup active streamout operation struct
|
|
activeStreamoutOperation.vertexCount = count * maxVerticesInGS;
|
|
activeStreamoutOperation.instanceCount = instanceCount;
|
|
// get mask of all written streamout buffers
|
|
uint32 streamoutWriteMask = 0;
|
|
if (geometryShader)
|
|
{
|
|
#ifdef CEMU_DEBUG_ASSERT
|
|
cemu_assert_debug(vertexShader->streamoutBufferWriteMask2.any() == false);
|
|
#endif
|
|
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
|
|
if (geometryShader->streamoutBufferWriteMask2[i])
|
|
streamoutWriteMask |= (1 << i);
|
|
}
|
|
else
|
|
{
|
|
for (sint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
|
|
if (vertexShader->streamoutBufferWriteMask2[i])
|
|
streamoutWriteMask |= (1 << i);
|
|
}
|
|
activeStreamoutOperation.streamoutWriteMask = streamoutWriteMask;
|
|
// bind streamout buffers
|
|
for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
|
|
{
|
|
if ((streamoutWriteMask&(1 << i)) == 0)
|
|
{
|
|
activeStreamoutOperation.streamoutBufferWrite[i].isActive = false;
|
|
continue;
|
|
}
|
|
uint32 bufferBaseMPTR = LatteGPUState.contextRegister[mmVGT_STRMOUT_BUFFER_BASE_0 + i * 4] << 8;
|
|
uint32 bufferSize = LatteGPUState.contextRegister[mmVGT_STRMOUT_BUFFER_SIZE_0 + i * 4] << 2;
|
|
uint32 bufferOffset = LatteGPUState.contextRegister[mmVGT_STRMOUT_BUFFER_OFFSET_0 + i * 4];
|
|
uint32 streamoutWriteSize = LatteStreamout_getBufferWriteRangeSize(i);
|
|
uint32 rangeAddr = bufferBaseMPTR + bufferOffset;
|
|
sint32 ringBufferOffset = LatteStreamout_allocateGPURingbufferMem(streamoutWriteSize); // allocate memory for the entire streamout write
|
|
// calculate write size after bounding it to the buffer
|
|
uint32 remainingBytesToWrite = bufferOffset > bufferSize ? 0 : (bufferSize - bufferOffset);
|
|
uint32 rangeSize = std::min(streamoutWriteSize, remainingBytesToWrite);
|
|
|
|
activeStreamoutOperation.streamoutBufferWrite[i].isActive = true;
|
|
activeStreamoutOperation.streamoutBufferWrite[i].ringBufferOffset = ringBufferOffset;
|
|
activeStreamoutOperation.streamoutBufferWrite[i].rangeAddr = rangeAddr;
|
|
activeStreamoutOperation.streamoutBufferWrite[i].rangeSize = rangeSize;
|
|
|
|
g_renderer->streamout_setupXfbBuffer(i, ringBufferOffset, rangeAddr, rangeSize);
|
|
}
|
|
g_renderer->streamout_begin();
|
|
_transformFeedbackIsActive = true;
|
|
}
|
|
|
|
void LatteStreamout_FinishDrawcall(bool useDirectMemoryMode)
|
|
{
|
|
if (_transformFeedbackIsActive)
|
|
{
|
|
_transformFeedbackIsActive = false;
|
|
for (uint32 i = 0; i < LATTE_NUM_STREAMOUT_BUFFER; i++)
|
|
{
|
|
if ((activeStreamoutOperation.streamoutWriteMask&(1 << i)) == 0)
|
|
continue;
|
|
if (activeStreamoutOperation.streamoutBufferWrite[i].rangeSize > 0)
|
|
{
|
|
if(useDirectMemoryMode)
|
|
g_renderer->bufferCache_copyStreamoutToMainBuffer(activeStreamoutOperation.streamoutBufferWrite[i].ringBufferOffset, activeStreamoutOperation.streamoutBufferWrite[i].rangeAddr, activeStreamoutOperation.streamoutBufferWrite[i].rangeSize);
|
|
else
|
|
LatteBufferCache_copyStreamoutDataToCache(activeStreamoutOperation.streamoutBufferWrite[i].rangeAddr, activeStreamoutOperation.streamoutBufferWrite[i].rangeSize, activeStreamoutOperation.streamoutBufferWrite[i].ringBufferOffset);
|
|
}
|
|
// advance streamout offset
|
|
uint32 newOffset = LatteGPUState.contextRegister[mmVGT_STRMOUT_BUFFER_OFFSET_0 + i * 4] + activeStreamoutOperation.streamoutBufferWrite[i].rangeSize;
|
|
LatteGPUState.contextRegister[mmVGT_STRMOUT_BUFFER_OFFSET_0 + i * 4] = newOffset;
|
|
}
|
|
g_renderer->streamout_rendererFinishDrawcall();
|
|
}
|
|
}
|