mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-05 14:31:17 +12:00
GX2+TCL: Reimplement command buffer submission
- GX2 utilizes TCL(.rpl) API for command submission instead of directly writing to an internal GPU fifo - Submission & retire timestamps are correctly implemented as incremental counters - Command buffering behaviour matches console - Fixes race conditions on aarch64
This commit is contained in:
parent
96765e4ac6
commit
28ea70b6d8
21 changed files with 761 additions and 472 deletions
|
@ -11,9 +11,14 @@
|
|||
void gx2Export_GX2SetPixelShader(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
cemuLog_log(LogType::GX2, "GX2SetPixelShader(0x{:08x})", hCPU->gpr[3]);
|
||||
GX2ReserveCmdSpace(100);
|
||||
|
||||
GX2PixelShader_t* pixelShader = (GX2PixelShader_t*)memory_getPointerFromVirtualOffset(hCPU->gpr[3]);
|
||||
|
||||
uint32 numInputs = _swapEndianU32(pixelShader->regs[4]);
|
||||
if( numInputs > 0x20 )
|
||||
numInputs = 0x20;
|
||||
|
||||
GX2::GX2ReserveCmdSpace(26 + numInputs);
|
||||
|
||||
MPTR shaderProgramAddr;
|
||||
uint32 shaderProgramSize;
|
||||
|
||||
|
@ -44,9 +49,6 @@ void gx2Export_GX2SetPixelShader(PPCInterpreter_t* hCPU)
|
|||
_swapEndianU32(pixelShader->regs[2]),
|
||||
_swapEndianU32(pixelShader->regs[3]));
|
||||
// setup pixel shader extended inputs control
|
||||
uint32 numInputs = _swapEndianU32(pixelShader->regs[4]);
|
||||
if( numInputs > 0x20 )
|
||||
numInputs = 0x20;
|
||||
gx2WriteGather_submitU32AsBE(pm4HeaderType3(IT_SET_CONTEXT_REG, 1+numInputs));
|
||||
gx2WriteGather_submitU32AsBE(mmSPI_PS_INPUT_CNTL_0-0xA000);
|
||||
for(uint32 i=0; i<numInputs; i++)
|
||||
|
@ -79,9 +81,17 @@ void gx2Export_GX2SetPixelShader(PPCInterpreter_t* hCPU)
|
|||
void gx2Export_GX2SetGeometryShader(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
cemuLog_log(LogType::GX2, "GX2SetGeometryShader(0x{:08x})", hCPU->gpr[3]);
|
||||
GX2ReserveCmdSpace(100);
|
||||
|
||||
GX2GeometryShader_t* geometryShader = (GX2GeometryShader_t*)memory_getPointerFromVirtualOffset(hCPU->gpr[3]);
|
||||
uint32 numOutputIds = _swapEndianU32(geometryShader->regs[7]);
|
||||
numOutputIds = std::min<uint32>(numOutputIds, 0xA);
|
||||
uint32 reserveSize = 38; // 38 fixed parameters
|
||||
if (numOutputIds != 0)
|
||||
reserveSize += 2 + numOutputIds;
|
||||
if( _swapEndianU32(geometryShader->useStreamout) != 0 )
|
||||
reserveSize += 2 + 12;
|
||||
|
||||
GX2::GX2ReserveCmdSpace(reserveSize);
|
||||
|
||||
MPTR shaderProgramAddr;
|
||||
uint32 shaderProgramSize;
|
||||
|
@ -128,6 +138,7 @@ void gx2Export_GX2SetGeometryShader(PPCInterpreter_t* hCPU)
|
|||
|
||||
if( _swapEndianU32(geometryShader->useStreamout) != 0 )
|
||||
{
|
||||
// todo - IT_EVENT_WRITE packet here
|
||||
// stride 0
|
||||
gx2WriteGather_submitU32AsBE(pm4HeaderType3(IT_SET_CONTEXT_REG, 2));
|
||||
gx2WriteGather_submitU32AsBE(mmVGT_STRMOUT_VTX_STRIDE_0-0xA000);
|
||||
|
@ -180,8 +191,6 @@ void gx2Export_GX2SetGeometryShader(PPCInterpreter_t* hCPU)
|
|||
gx2WriteGather_submitU32AsBE(_swapEndianU32(geometryShader->regs[3]));
|
||||
|
||||
// GS outputs
|
||||
uint32 numOutputIds = _swapEndianU32(geometryShader->regs[7]);
|
||||
numOutputIds = std::min<uint32>(numOutputIds, 0xA);
|
||||
if( numOutputIds != 0 )
|
||||
{
|
||||
gx2WriteGather_submitU32AsBE(pm4HeaderType3(IT_SET_CONTEXT_REG, 1+numOutputIds));
|
||||
|
@ -254,8 +263,7 @@ void gx2Export_GX2SetComputeShader(PPCInterpreter_t* hCPU)
|
|||
shaderPtr = computeShader->rBuffer.GetVirtualAddr();
|
||||
shaderSize = computeShader->rBuffer.GetSize();
|
||||
}
|
||||
|
||||
GX2ReserveCmdSpace(0x11);
|
||||
GX2::GX2ReserveCmdSpace(0x11);
|
||||
|
||||
gx2WriteGather_submit(pm4HeaderType3(IT_SET_CONTEXT_REG, 6),
|
||||
mmSQ_PGM_START_ES-0xA000,
|
||||
|
@ -272,7 +280,7 @@ void gx2Export_GX2SetComputeShader(PPCInterpreter_t* hCPU)
|
|||
|
||||
void _GX2SubmitUniformBlock(uint32 registerBase, uint32 index, MPTR virtualAddress, uint32 size)
|
||||
{
|
||||
GX2ReserveCmdSpace(9);
|
||||
GX2::GX2ReserveCmdSpace(9);
|
||||
gx2WriteGather_submit(pm4HeaderType3(IT_SET_RESOURCE, 8),
|
||||
registerBase + index * 7,
|
||||
memory_virtualToPhysical(virtualAddress),
|
||||
|
@ -307,7 +315,7 @@ void gx2Export_GX2SetGeometryUniformBlock(PPCInterpreter_t* hCPU)
|
|||
|
||||
void gx2Export_GX2RSetVertexUniformBlock(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
GX2ReserveCmdSpace(9);
|
||||
GX2::GX2ReserveCmdSpace(9);
|
||||
|
||||
GX2RBuffer* bufferPtr = (GX2RBuffer*)memory_getPointerFromVirtualOffset(hCPU->gpr[3]);
|
||||
uint32 index = hCPU->gpr[4];
|
||||
|
@ -320,7 +328,7 @@ void gx2Export_GX2RSetVertexUniformBlock(PPCInterpreter_t* hCPU)
|
|||
|
||||
void gx2Export_GX2SetShaderModeEx(PPCInterpreter_t* hCPU)
|
||||
{
|
||||
GX2ReserveCmdSpace(8+4);
|
||||
GX2::GX2ReserveCmdSpace(8+4);
|
||||
uint32 mode = hCPU->gpr[3];
|
||||
|
||||
uint32 sqConfig = hCPU->gpr[3] == 0 ? 4 : 0;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue