mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-03 05:21:18 +12:00
GX2+TCL: Reimplement command buffer submission
- GX2 utilizes TCL(.rpl) API for command submission instead of directly writing to an internal GPU fifo - Submission & retire timestamps are correctly implemented as incremental counters - Command buffering behaviour matches console - Fixes race conditions on aarch64
This commit is contained in:
parent
96765e4ac6
commit
28ea70b6d8
21 changed files with 761 additions and 472 deletions
|
@ -4,178 +4,397 @@
|
|||
#include "Cafe/HW/Latte/Core/LattePM4.h"
|
||||
#include "Cafe/OS/libs/coreinit/coreinit.h"
|
||||
#include "Cafe/OS/libs/coreinit/coreinit_Thread.h"
|
||||
#include "Cafe/OS/libs/TCL/TCL.h"
|
||||
#include "Cafe/HW/Latte/ISA/RegDefines.h"
|
||||
#include "GX2.h"
|
||||
#include "GX2_Command.h"
|
||||
#include "GX2_Shader.h"
|
||||
#include "GX2_Misc.h"
|
||||
#include "OS/libs/coreinit/coreinit_MEM.h"
|
||||
|
||||
extern uint8* gxRingBufferReadPtr;
|
||||
|
||||
GX2WriteGatherPipeState gx2WriteGatherPipe = { 0 };
|
||||
namespace GX2
|
||||
{
|
||||
GX2PerCoreCBState s_perCoreCBState[Espresso::CORE_COUNT];
|
||||
}
|
||||
|
||||
void gx2WriteGather_submitU32AsBE(uint32 v)
|
||||
{
|
||||
uint32 coreIndex = PPCInterpreter_getCoreIndex(PPCInterpreter_getCurrentInstance());
|
||||
if (gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL)
|
||||
if (GX2::s_perCoreCBState[coreIndex].currentWritePtr == nullptr)
|
||||
return;
|
||||
*(uint32*)(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) = _swapEndianU32(v);
|
||||
(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) += 4;
|
||||
*(uint32*)(GX2::s_perCoreCBState[coreIndex].currentWritePtr) = _swapEndianU32(v);
|
||||
GX2::s_perCoreCBState[coreIndex].currentWritePtr++;
|
||||
cemu_assert_debug(GX2::s_perCoreCBState[coreIndex].currentWritePtr <= (GX2::s_perCoreCBState[coreIndex].bufferPtr + GX2::s_perCoreCBState[coreIndex].bufferSizeInU32s));
|
||||
}
|
||||
|
||||
void gx2WriteGather_submitU32AsLE(uint32 v)
|
||||
{
|
||||
uint32 coreIndex = PPCInterpreter_getCoreIndex(PPCInterpreter_getCurrentInstance());
|
||||
if (gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL)
|
||||
if (GX2::s_perCoreCBState[coreIndex].currentWritePtr == nullptr)
|
||||
return;
|
||||
*(uint32*)(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) = v;
|
||||
(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) += 4;
|
||||
*(uint32*)(GX2::s_perCoreCBState[coreIndex].currentWritePtr) = v;
|
||||
GX2::s_perCoreCBState[coreIndex].currentWritePtr++;
|
||||
cemu_assert_debug(GX2::s_perCoreCBState[coreIndex].currentWritePtr <= (GX2::s_perCoreCBState[coreIndex].bufferPtr + GX2::s_perCoreCBState[coreIndex].bufferSizeInU32s));
|
||||
}
|
||||
|
||||
void gx2WriteGather_submitU32AsLEArray(uint32* v, uint32 numValues)
|
||||
{
|
||||
uint32 coreIndex = PPCInterpreter_getCoreIndex(PPCInterpreter_getCurrentInstance());
|
||||
if (gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] == NULL)
|
||||
if (GX2::s_perCoreCBState[coreIndex].currentWritePtr == nullptr)
|
||||
return;
|
||||
memcpy_dwords((*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]), v, numValues);
|
||||
(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex]) += 4 * numValues;
|
||||
memcpy_dwords(GX2::s_perCoreCBState[coreIndex].currentWritePtr, v, numValues);
|
||||
GX2::s_perCoreCBState[coreIndex].currentWritePtr += numValues;
|
||||
cemu_assert_debug(GX2::s_perCoreCBState[coreIndex].currentWritePtr <= (GX2::s_perCoreCBState[coreIndex].bufferPtr + GX2::s_perCoreCBState[coreIndex].bufferSizeInU32s));
|
||||
}
|
||||
|
||||
namespace GX2
|
||||
{
|
||||
sint32 gx2WriteGatherCurrentMainCoreIndex = -1;
|
||||
bool gx2WriteGatherInited = false;
|
||||
|
||||
void GX2WriteGather_ResetToDefaultState()
|
||||
struct GX2CommandState // mapped to PPC space since the GPU writes here
|
||||
{
|
||||
gx2WriteGatherCurrentMainCoreIndex = -1;
|
||||
gx2WriteGatherInited = false;
|
||||
}
|
||||
// command pool
|
||||
MEMPTR<uint32be> commandPoolBase;
|
||||
uint32 commandPoolSizeInU32s;
|
||||
MEMPTR<uint32be> gpuCommandReadPtr;
|
||||
// timestamp
|
||||
uint64be lastSubmissionTime;
|
||||
};
|
||||
|
||||
void GX2Init_writeGather() // init write gather, make current core
|
||||
SysAllocator<GX2CommandState> s_commandState;
|
||||
GX2PerCoreCBState s_mainCoreLastCommandState;
|
||||
bool s_cbBufferIsInternallyAllocated;
|
||||
|
||||
void GX2Command_StartNewCommandBuffer(uint32 numU32s);
|
||||
|
||||
// called from GX2Init. Allocates a 4MB memory chunk from which command buffers are suballocated from
|
||||
void GX2Init_commandBufferPool(void* bufferBase, uint32 bufferSize)
|
||||
{
|
||||
if (gx2WriteGatherPipe.gxRingBuffer == NULL)
|
||||
gx2WriteGatherPipe.gxRingBuffer = (uint8*)malloc(GX2_COMMAND_RING_BUFFER_SIZE);
|
||||
if (gx2WriteGatherCurrentMainCoreIndex == sGX2MainCoreIndex)
|
||||
return; // write gather already configured for same core
|
||||
for (sint32 i = 0; i < PPC_CORE_COUNT; i++)
|
||||
cemu_assert_debug(!s_commandState->commandPoolBase); // should not be allocated already
|
||||
// setup command buffer pool. If not provided allocate a 4MB or custom size buffer
|
||||
uint32 poolSize = bufferSize ? bufferSize : 0x400000; // 4MB (can be overwritten by custom GX2Init parameters?)
|
||||
if (bufferBase)
|
||||
{
|
||||
if (i == sGX2MainCoreIndex)
|
||||
s_commandState->commandPoolBase = (uint32be*)bufferBase;
|
||||
s_cbBufferIsInternallyAllocated = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
s_commandState->commandPoolBase = (uint32be*)coreinit::_weak_MEMAllocFromDefaultHeapEx(poolSize, 0x100);
|
||||
s_cbBufferIsInternallyAllocated = true;
|
||||
}
|
||||
if (!s_commandState->commandPoolBase)
|
||||
{
|
||||
cemuLog_log(LogType::Force, "GX2: Failed to allocate command buffer pool");
|
||||
}
|
||||
s_commandState->commandPoolSizeInU32s = poolSize / sizeof(uint32be);
|
||||
s_commandState->gpuCommandReadPtr = s_commandState->commandPoolBase;
|
||||
// init per-core command buffer state
|
||||
for (uint32 i = 0; i < Espresso::CORE_COUNT; i++)
|
||||
{
|
||||
s_perCoreCBState[i].bufferPtr = nullptr;
|
||||
s_perCoreCBState[i].bufferSizeInU32s = 0;
|
||||
s_perCoreCBState[i].currentWritePtr = nullptr;
|
||||
}
|
||||
// start first command buffer for main core
|
||||
GX2Command_StartNewCommandBuffer(0x100);
|
||||
}
|
||||
|
||||
void GX2Shutdown_commandBufferPool()
|
||||
{
|
||||
if (!s_commandState->commandPoolBase)
|
||||
return;
|
||||
if (s_cbBufferIsInternallyAllocated)
|
||||
coreinit::_weak_MEMFreeToDefaultHeap(s_commandState->commandPoolBase.GetPtr());
|
||||
s_cbBufferIsInternallyAllocated = false;
|
||||
s_commandState->commandPoolBase = nullptr;
|
||||
s_commandState->commandPoolSizeInU32s = 0;
|
||||
s_commandState->gpuCommandReadPtr = nullptr;
|
||||
}
|
||||
|
||||
// current position of where the GPU is reading from. Updated via a memory write command submitted to the GPU
|
||||
uint32 GX2Command_GetPoolGPUReadIndex()
|
||||
{
|
||||
stdx::atomic_ref<MEMPTR<uint32be>> _readPtr(s_commandState->gpuCommandReadPtr);
|
||||
MEMPTR<uint32be> currentReadPtr = _readPtr.load();
|
||||
cemu_assert_debug(currentReadPtr);
|
||||
return (uint32)(currentReadPtr.GetPtr() - s_commandState->commandPoolBase.GetPtr());
|
||||
}
|
||||
|
||||
void GX2Command_WaitForNextBufferRetired()
|
||||
{
|
||||
uint64 retiredTimeStamp = GX2GetRetiredTimeStamp();
|
||||
retiredTimeStamp += 1;
|
||||
// but cant be higher than the submission timestamp
|
||||
stdx::atomic_ref<uint64be> _lastSubmissionTime(s_commandState->lastSubmissionTime);
|
||||
uint64 submissionTimeStamp = _lastSubmissionTime.load();
|
||||
if (retiredTimeStamp > submissionTimeStamp)
|
||||
retiredTimeStamp = submissionTimeStamp;
|
||||
GX2WaitTimeStamp(retiredTimeStamp);
|
||||
}
|
||||
|
||||
void GX2Command_SetupCoreCommandBuffer(uint32be* buffer, uint32 sizeInU32s, bool isDisplayList)
|
||||
{
|
||||
uint32 coreIndex = coreinit::OSGetCoreId();
|
||||
auto& coreCBState = s_perCoreCBState[coreIndex];
|
||||
coreCBState.bufferPtr = buffer;
|
||||
coreCBState.bufferSizeInU32s = sizeInU32s;
|
||||
coreCBState.currentWritePtr = buffer;
|
||||
coreCBState.isDisplayList = isDisplayList;
|
||||
}
|
||||
|
||||
void GX2Command_StartNewCommandBuffer(uint32 numU32s)
|
||||
{
|
||||
uint32 coreIndex = coreinit::OSGetCoreId();
|
||||
auto& coreCBState = s_perCoreCBState[coreIndex];
|
||||
numU32s = std::max<uint32>(numU32s, 0x100);
|
||||
// grab space from command buffer pool and if necessary wait for it
|
||||
uint32be* bufferPtr = nullptr;
|
||||
uint32 bufferSizeInU32s = 0;
|
||||
uint32 readIndex;
|
||||
while (true)
|
||||
{
|
||||
// try to grab buffer data from first available spot:
|
||||
// 1. At the current write location up to the end of the buffer (avoiding an overlap with the read location)
|
||||
// 2. From the start of the buffer up to the read location
|
||||
readIndex = GX2Command_GetPoolGPUReadIndex();
|
||||
uint32be* nextWritePos = coreCBState.bufferPtr ? coreCBState.bufferPtr + coreCBState.bufferSizeInU32s : s_commandState->commandPoolBase.GetPtr();
|
||||
uint32 writeIndex = nextWritePos - s_commandState->commandPoolBase;
|
||||
uint32 poolSizeInU32s = s_commandState->commandPoolSizeInU32s;
|
||||
// readIndex == writeIndex can mean either buffer full or buffer empty
|
||||
// we could use GX2GetRetiredTimeStamp() == GX2GetLastSubmittedTimeStamp() to determine if the buffer is truly empty
|
||||
// but this can have false negatives since the last submission timestamp is updated independently of the read index
|
||||
// so instead we just avoid ever filling the buffer completely
|
||||
cemu_assert_debug(readIndex < poolSizeInU32s);
|
||||
cemu_assert_debug(writeIndex < poolSizeInU32s);
|
||||
if (writeIndex < readIndex)
|
||||
{
|
||||
gx2WriteGatherPipe.writeGatherPtrGxBuffer[i] = gx2WriteGatherPipe.gxRingBuffer;
|
||||
gx2WriteGatherPipe.writeGatherPtrWrite[i] = &gx2WriteGatherPipe.writeGatherPtrGxBuffer[i];
|
||||
// writeIndex has wrapped around
|
||||
uint32 wordsAvailable = readIndex - writeIndex;
|
||||
if (wordsAvailable > 0)
|
||||
wordsAvailable--; // avoid writeIndex becoming equal to readIndex
|
||||
if (wordsAvailable >= numU32s)
|
||||
{
|
||||
bufferPtr = s_commandState->commandPoolBase + writeIndex;
|
||||
bufferSizeInU32s = wordsAvailable;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
gx2WriteGatherPipe.writeGatherPtrGxBuffer[i] = NULL;
|
||||
gx2WriteGatherPipe.writeGatherPtrWrite[i] = NULL;
|
||||
uint32 wordsAvailable = poolSizeInU32s - writeIndex;
|
||||
if (wordsAvailable > 0)
|
||||
wordsAvailable--; // avoid writeIndex becoming equal to readIndex
|
||||
if (wordsAvailable >= numU32s)
|
||||
{
|
||||
bufferPtr = nextWritePos;
|
||||
bufferSizeInU32s = wordsAvailable;
|
||||
break;
|
||||
}
|
||||
// not enough space at end of buffer, try to grab from the beginning of the buffer
|
||||
wordsAvailable = readIndex;
|
||||
if (wordsAvailable > 0)
|
||||
wordsAvailable--; // avoid writeIndex becoming equal to readIndex
|
||||
if (wordsAvailable >= numU32s)
|
||||
{
|
||||
bufferPtr = s_commandState->commandPoolBase;
|
||||
bufferSizeInU32s = wordsAvailable;
|
||||
break;
|
||||
}
|
||||
}
|
||||
gx2WriteGatherPipe.displayListStart[i] = MPTR_NULL;
|
||||
gx2WriteGatherPipe.writeGatherPtrDisplayList[i] = NULL;
|
||||
gx2WriteGatherPipe.displayListMaxSize[i] = 0;
|
||||
GX2Command_WaitForNextBufferRetired();
|
||||
}
|
||||
cemu_assert_debug(bufferPtr);
|
||||
bufferSizeInU32s = std::min<uint32>(numU32s, 0x20000); // size cap
|
||||
#ifdef CEMU_DEBUG_ASSERT
|
||||
uint32 newWriteIndex = ((bufferPtr - s_commandState->commandPoolBase) + bufferSizeInU32s) % s_commandState->commandPoolSizeInU32s;
|
||||
cemu_assert_debug(newWriteIndex != readIndex);
|
||||
#endif
|
||||
// setup buffer and make it the current write gather target
|
||||
cemu_assert_debug(bufferPtr >= s_commandState->commandPoolBase && (bufferPtr + bufferSizeInU32s) <= s_commandState->commandPoolBase + s_commandState->commandPoolSizeInU32s);
|
||||
GX2Command_SetupCoreCommandBuffer(bufferPtr, bufferSizeInU32s, false);
|
||||
}
|
||||
|
||||
void GX2Command_SubmitCommandBuffer(uint32be* buffer, uint32 sizeInU32s, MEMPTR<uint32be>* completionGPUReadPointer, bool triggerMarkerInterrupt)
|
||||
{
|
||||
uint32be cmd[10];
|
||||
uint32 cmdLen = 4;
|
||||
cmd[0] = pm4HeaderType3(IT_INDIRECT_BUFFER_PRIV, 3);
|
||||
cmd[1] = memory_virtualToPhysical(MEMPTR<void>(buffer).GetMPTR());
|
||||
cmd[2] = 0x00000000; // address high bits
|
||||
cmd[3] = sizeInU32s;
|
||||
if (completionGPUReadPointer)
|
||||
{
|
||||
// append command to update completionGPUReadPointer after the GPU is done with the command buffer
|
||||
cmd[4] = pm4HeaderType3(IT_MEM_WRITE, 4);
|
||||
cmd[5] = memory_virtualToPhysical(MEMPTR<void>(completionGPUReadPointer).GetMPTR()) | 2;
|
||||
cmd[6] = 0x40000;
|
||||
cmd[7] = MEMPTR<void>(buffer + sizeInU32s).GetMPTR(); // value to write
|
||||
cmd[8] = 0x00000000;
|
||||
cmdLen = 9;
|
||||
}
|
||||
|
||||
betype<TCL::TCLSubmissionFlag> submissionFlags{};
|
||||
if (!triggerMarkerInterrupt)
|
||||
submissionFlags |= TCL::TCLSubmissionFlag::NO_MARKER_INTERRUPT;
|
||||
submissionFlags |= TCL::TCLSubmissionFlag::USE_RETIRED_MARKER;
|
||||
|
||||
TCL::TCLSubmitToRing(cmd, cmdLen, &submissionFlags, &s_commandState->lastSubmissionTime);
|
||||
}
|
||||
|
||||
void GX2Command_PadCurrentBuffer()
|
||||
{
|
||||
uint32 coreIndex = coreinit::OSGetCoreId();
|
||||
auto& coreCBState = s_perCoreCBState[coreIndex];
|
||||
if (!coreCBState.currentWritePtr)
|
||||
return;
|
||||
uint32 writeDistance = (uint32)(coreCBState.currentWritePtr - coreCBState.bufferPtr);
|
||||
if ((writeDistance&7) != 0)
|
||||
{
|
||||
uint32 distanceToPad = 0x8 - (writeDistance & 0x7);
|
||||
while (distanceToPad)
|
||||
{
|
||||
*coreCBState.currentWritePtr = pm4HeaderType2Filler();
|
||||
coreCBState.currentWritePtr++;
|
||||
distanceToPad--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GX2Command_Flush(uint32 numU32sForNextBuffer, bool triggerMarkerInterrupt)
|
||||
{
|
||||
uint32 coreIndex = coreinit::OSGetCoreId();
|
||||
auto& coreCBState = s_perCoreCBState[coreIndex];
|
||||
if (coreCBState.isDisplayList)
|
||||
{
|
||||
// display list
|
||||
cemu_assert_debug((uint32)(coreCBState.currentWritePtr - coreCBState.bufferPtr) < coreCBState.bufferSizeInU32s);
|
||||
cemuLog_logDebugOnce(LogType::Force, "GX2 flush called on display list");
|
||||
}
|
||||
else
|
||||
{
|
||||
// command buffer
|
||||
if (coreCBState.currentWritePtr != coreCBState.bufferPtr)
|
||||
{
|
||||
// pad the command buffer to 32 byte alignment
|
||||
GX2Command_PadCurrentBuffer();
|
||||
// submit it to the GPU
|
||||
uint32 bufferLength = (uint32)(coreCBState.currentWritePtr - coreCBState.bufferPtr);
|
||||
cemu_assert_debug(bufferLength <= coreCBState.bufferSizeInU32s);
|
||||
GX2Command_SubmitCommandBuffer(coreCBState.bufferPtr, bufferLength, &s_commandState->gpuCommandReadPtr, triggerMarkerInterrupt);
|
||||
GX2Command_StartNewCommandBuffer(numU32sForNextBuffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
// current buffer is empty so we dont need to queue it
|
||||
if (numU32sForNextBuffer > s_commandState->commandPoolSizeInU32s)
|
||||
GX2Command_StartNewCommandBuffer(numU32sForNextBuffer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GX2Flush()
|
||||
{
|
||||
GX2Command_Flush(256, true);
|
||||
}
|
||||
|
||||
uint64 GX2GetLastSubmittedTimeStamp()
|
||||
{
|
||||
stdx::atomic_ref<uint64be> _lastSubmissionTime(s_commandState->lastSubmissionTime);
|
||||
return _lastSubmissionTime.load();
|
||||
}
|
||||
|
||||
uint64 GX2GetRetiredTimeStamp()
|
||||
{
|
||||
uint64be ts = 0;
|
||||
TCL::TCLTimestamp(TCL::TCLTimestampId::TIMESTAMP_LAST_BUFFER_RETIRED, &ts);
|
||||
return ts;
|
||||
}
|
||||
|
||||
bool GX2WaitTimeStamp(uint64 tsWait)
|
||||
{
|
||||
// handle GPU timeout here? But for now we timeout after 60 seconds
|
||||
TCL::TCLWaitTimestamp(TCL::TCLTimestampId::TIMESTAMP_LAST_BUFFER_RETIRED, tsWait, Espresso::TIMER_CLOCK * 60);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Guarantees that the requested amount of space is available on the current command buffer
|
||||
* If the space is not available, the current command buffer is pushed to the GPU and a new one is allocated
|
||||
*/
|
||||
void GX2ReserveCmdSpace(uint32 reservedFreeSpaceInU32)
|
||||
{
|
||||
uint32 coreIndex = coreinit::OSGetCoreId();
|
||||
auto& coreCBState = s_perCoreCBState[coreIndex];
|
||||
if (coreCBState.currentWritePtr == nullptr)
|
||||
return;
|
||||
uint32 writeDistance = (uint32)(coreCBState.currentWritePtr - coreCBState.bufferPtr);
|
||||
if (writeDistance + reservedFreeSpaceInU32 > coreCBState.bufferSizeInU32s)
|
||||
{
|
||||
GX2Command_Flush(reservedFreeSpaceInU32, true);
|
||||
}
|
||||
gx2WriteGatherCurrentMainCoreIndex = sGX2MainCoreIndex;
|
||||
gx2WriteGatherInited = true;
|
||||
}
|
||||
|
||||
void GX2WriteGather_beginDisplayList(PPCInterpreter_t* hCPU, MPTR buffer, uint32 maxSize)
|
||||
{
|
||||
uint32 coreIndex = PPCInterpreter_getCoreIndex(hCPU);
|
||||
gx2WriteGatherPipe.displayListStart[coreIndex] = buffer;
|
||||
gx2WriteGatherPipe.displayListMaxSize[coreIndex] = maxSize;
|
||||
// set new write gather ptr
|
||||
gx2WriteGatherPipe.writeGatherPtrDisplayList[coreIndex] = memory_getPointerFromVirtualOffset(gx2WriteGatherPipe.displayListStart[coreIndex]);
|
||||
gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] = &gx2WriteGatherPipe.writeGatherPtrDisplayList[coreIndex];
|
||||
if (coreIndex == sGX2MainCoreIndex)
|
||||
{
|
||||
GX2Command_PadCurrentBuffer();
|
||||
cemu_assert_debug(!s_perCoreCBState[coreIndex].isDisplayList);
|
||||
s_mainCoreLastCommandState = s_perCoreCBState[coreIndex];
|
||||
}
|
||||
GX2Command_SetupCoreCommandBuffer(MEMPTR<uint32be>(buffer), maxSize/4, true);
|
||||
}
|
||||
|
||||
uint32 GX2WriteGather_getDisplayListWriteDistance(sint32 coreIndex)
|
||||
{
|
||||
return (uint32)(*gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] - memory_getPointerFromVirtualOffset(gx2WriteGatherPipe.displayListStart[coreIndex]));
|
||||
}
|
||||
|
||||
uint32 GX2WriteGather_getFifoWriteDistance(uint32 coreIndex)
|
||||
{
|
||||
uint32 writeDistance = (uint32)(gx2WriteGatherPipe.writeGatherPtrGxBuffer[coreIndex] - gx2WriteGatherPipe.gxRingBuffer);
|
||||
return writeDistance;
|
||||
auto& coreCBState = s_perCoreCBState[coreIndex];
|
||||
cemu_assert_debug(coreCBState.isDisplayList);
|
||||
if (coreCBState.currentWritePtr == nullptr)
|
||||
return 0;
|
||||
return (uint32)(coreCBState.currentWritePtr - coreCBState.bufferPtr) * 4;
|
||||
}
|
||||
|
||||
uint32 GX2WriteGather_endDisplayList(PPCInterpreter_t* hCPU, MPTR buffer)
|
||||
{
|
||||
uint32 coreIndex = PPCInterpreter_getCoreIndex(hCPU);
|
||||
if (gx2WriteGatherPipe.displayListStart[coreIndex] != MPTR_NULL)
|
||||
uint32 coreIndex = coreinit::OSGetCoreId();
|
||||
auto& coreCBState = s_perCoreCBState[coreIndex];
|
||||
GX2Command_PadCurrentBuffer();
|
||||
uint32 finalWriteIndex = coreCBState.currentWritePtr - coreCBState.bufferPtr;
|
||||
cemu_assert_debug(finalWriteIndex <= coreCBState.bufferSizeInU32s);
|
||||
// if we are on the main GX2 core then restore the GPU command buffer
|
||||
if (coreIndex == sGX2MainCoreIndex)
|
||||
{
|
||||
uint32 currentWriteSize = GX2WriteGather_getDisplayListWriteDistance(coreIndex);
|
||||
// pad to 32 byte
|
||||
if (gx2WriteGatherPipe.displayListMaxSize[coreIndex] >= ((gx2WriteGatherPipe.displayListMaxSize[coreIndex] + 0x1F) & ~0x1F))
|
||||
{
|
||||
while ((currentWriteSize & 0x1F) != 0)
|
||||
{
|
||||
gx2WriteGather_submitU32AsBE(pm4HeaderType2Filler());
|
||||
currentWriteSize += 4;
|
||||
}
|
||||
}
|
||||
// get size of written data
|
||||
currentWriteSize = GX2WriteGather_getDisplayListWriteDistance(coreIndex);
|
||||
// disable current display list and restore write gather ptr
|
||||
gx2WriteGatherPipe.displayListStart[coreIndex] = MPTR_NULL;
|
||||
if (sGX2MainCoreIndex == coreIndex)
|
||||
gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] = &gx2WriteGatherPipe.writeGatherPtrGxBuffer[coreIndex];
|
||||
else
|
||||
gx2WriteGatherPipe.writeGatherPtrWrite[coreIndex] = NULL;
|
||||
// return size of (written) display list
|
||||
return currentWriteSize;
|
||||
coreCBState = s_mainCoreLastCommandState;
|
||||
}
|
||||
else
|
||||
{
|
||||
// no active display list
|
||||
// return a size of 0
|
||||
return 0;
|
||||
coreCBState.bufferPtr = nullptr;
|
||||
coreCBState.currentWritePtr = nullptr;
|
||||
coreCBState.bufferSizeInU32s = 0;
|
||||
coreCBState.isDisplayList = false;
|
||||
}
|
||||
return finalWriteIndex * 4;
|
||||
}
|
||||
|
||||
bool GX2GetCurrentDisplayList(betype<MPTR>* displayListAddr, uint32be* displayListSize)
|
||||
bool GX2GetCurrentDisplayList(MEMPTR<uint32be>* displayListAddr, uint32be* displayListSize)
|
||||
{
|
||||
uint32 coreIndex = coreinit::OSGetCoreId();
|
||||
if (gx2WriteGatherPipe.displayListStart[coreIndex] == MPTR_NULL)
|
||||
auto& coreCBState = s_perCoreCBState[coreIndex];
|
||||
if (!coreCBState.isDisplayList)
|
||||
return false;
|
||||
|
||||
if (displayListAddr)
|
||||
*displayListAddr = gx2WriteGatherPipe.displayListStart[coreIndex];
|
||||
*displayListAddr = coreCBState.bufferPtr;
|
||||
if (displayListSize)
|
||||
*displayListSize = gx2WriteGatherPipe.displayListMaxSize[coreIndex];
|
||||
|
||||
*displayListSize = coreCBState.bufferSizeInU32s * sizeof(uint32be);
|
||||
return true;
|
||||
}
|
||||
|
||||
// returns true if we are writing to a display list
|
||||
bool GX2GetDisplayListWriteStatus()
|
||||
{
|
||||
// returns true if we are writing to a display list
|
||||
uint32 coreIndex = coreinit::OSGetCoreId();
|
||||
return gx2WriteGatherPipe.displayListStart[coreIndex] != MPTR_NULL;
|
||||
}
|
||||
|
||||
uint32 GX2WriteGather_getReadWriteDistance()
|
||||
{
|
||||
uint32 coreIndex = sGX2MainCoreIndex;
|
||||
uint32 writeDistance = (uint32)(gx2WriteGatherPipe.writeGatherPtrGxBuffer[coreIndex] + GX2_COMMAND_RING_BUFFER_SIZE - gxRingBufferReadPtr);
|
||||
writeDistance %= GX2_COMMAND_RING_BUFFER_SIZE;
|
||||
return writeDistance;
|
||||
}
|
||||
|
||||
void GX2WriteGather_checkAndInsertWrapAroundMark()
|
||||
{
|
||||
uint32 coreIndex = coreinit::OSGetCoreId();
|
||||
if (coreIndex != sGX2MainCoreIndex) // only if main gx2 core
|
||||
return;
|
||||
if (gx2WriteGatherPipe.displayListStart[coreIndex] != MPTR_NULL)
|
||||
return;
|
||||
uint32 writeDistance = GX2WriteGather_getFifoWriteDistance(coreIndex);
|
||||
if (writeDistance >= (GX2_COMMAND_RING_BUFFER_SIZE * 3 / 5))
|
||||
{
|
||||
gx2WriteGather_submitU32AsBE(pm4HeaderType3(IT_HLE_FIFO_WRAP_AROUND, 1));
|
||||
gx2WriteGather_submitU32AsBE(0); // empty word since we can't send commands with zero data words
|
||||
gx2WriteGatherPipe.writeGatherPtrGxBuffer[coreIndex] = gx2WriteGatherPipe.gxRingBuffer;
|
||||
}
|
||||
return s_perCoreCBState[coreIndex].isDisplayList;
|
||||
}
|
||||
|
||||
void GX2BeginDisplayList(MEMPTR<void> displayListAddr, uint32 size)
|
||||
|
@ -204,28 +423,23 @@ namespace GX2
|
|||
memory_virtualToPhysical(addr),
|
||||
0, // high address bits
|
||||
size / 4);
|
||||
GX2::GX2WriteGather_checkAndInsertWrapAroundMark();
|
||||
}
|
||||
|
||||
void GX2DirectCallDisplayList(void* addr, uint32 size)
|
||||
{
|
||||
// this API submits to TCL directly and bypasses write-gatherer
|
||||
// its basically a way to manually submit a command buffer to the GPU
|
||||
// as such it also affects the submission and retire timestamps
|
||||
|
||||
uint32 coreIndex = PPCInterpreter_getCoreIndex(PPCInterpreter_getCurrentInstance());
|
||||
cemu_assert_debug(coreIndex == sGX2MainCoreIndex);
|
||||
coreIndex = sGX2MainCoreIndex; // always submit to main queue which is owned by GX2 main core (TCLSubmitToRing does not need this workaround)
|
||||
|
||||
uint32be* cmdStream = (uint32be*)(gx2WriteGatherPipe.writeGatherPtrGxBuffer[coreIndex]);
|
||||
cmdStream[0] = pm4HeaderType3(IT_INDIRECT_BUFFER_PRIV, 3);
|
||||
cmdStream[1] = memory_virtualToPhysical(MEMPTR<void>(addr).GetMPTR());
|
||||
cmdStream[2] = 0;
|
||||
cmdStream[3] = size / 4;
|
||||
gx2WriteGatherPipe.writeGatherPtrGxBuffer[coreIndex] += 16;
|
||||
|
||||
// update submission timestamp and retired timestamp
|
||||
_GX2SubmitToTCL();
|
||||
uint32 coreIndex = coreinit::OSGetCoreId();
|
||||
if (coreIndex != sGX2MainCoreIndex)
|
||||
{
|
||||
cemuLog_logDebugOnce(LogType::Force, "GX2DirectCallDisplayList() called on non-main GX2 core");
|
||||
}
|
||||
if (!s_perCoreCBState[coreIndex].isDisplayList)
|
||||
{
|
||||
// make sure any preceeding commands are submitted first
|
||||
GX2Command_Flush(0x100, false);
|
||||
}
|
||||
GX2Command_SubmitCommandBuffer(static_cast<uint32be*>(addr), size / 4, nullptr, false);
|
||||
}
|
||||
|
||||
void GX2CopyDisplayList(MEMPTR<uint32be*> addr, uint32 size)
|
||||
|
@ -288,6 +502,12 @@ namespace GX2
|
|||
|
||||
void GX2CommandInit()
|
||||
{
|
||||
cafeExportRegister("gx2", GX2Flush, LogType::GX2);
|
||||
|
||||
cafeExportRegister("gx2", GX2GetLastSubmittedTimeStamp, LogType::GX2);
|
||||
cafeExportRegister("gx2", GX2GetRetiredTimeStamp, LogType::GX2);
|
||||
cafeExportRegister("gx2", GX2WaitTimeStamp, LogType::GX2);
|
||||
|
||||
cafeExportRegister("gx2", GX2BeginDisplayList, LogType::GX2);
|
||||
cafeExportRegister("gx2", GX2BeginDisplayListEx, LogType::GX2);
|
||||
cafeExportRegister("gx2", GX2EndDisplayList, LogType::GX2);
|
||||
|
@ -295,7 +515,6 @@ namespace GX2
|
|||
cafeExportRegister("gx2", GX2GetCurrentDisplayList, LogType::GX2);
|
||||
cafeExportRegister("gx2", GX2GetDisplayListWriteStatus, LogType::GX2);
|
||||
|
||||
|
||||
cafeExportRegister("gx2", GX2CallDisplayList, LogType::GX2);
|
||||
cafeExportRegister("gx2", GX2DirectCallDisplayList, LogType::GX2);
|
||||
cafeExportRegister("gx2", GX2CopyDisplayList, LogType::GX2);
|
||||
|
@ -305,7 +524,10 @@ namespace GX2
|
|||
|
||||
void GX2CommandResetToDefaultState()
|
||||
{
|
||||
GX2WriteGather_ResetToDefaultState();
|
||||
s_commandState->commandPoolBase = nullptr;
|
||||
s_commandState->commandPoolSizeInU32s = 0;
|
||||
s_commandState->gpuCommandReadPtr = nullptr;
|
||||
s_cbBufferIsInternallyAllocated = false;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue