mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-03 13:31:18 +12:00
GX2+TCL: Reimplement command buffer submission
- GX2 utilizes TCL(.rpl) API for command submission instead of directly writing to an internal GPU fifo - Submission & retire timestamps are correctly implemented as incremental counters - Command buffering behaviour matches console - Fixes race conditions on aarch64
This commit is contained in:
parent
96765e4ac6
commit
28ea70b6d8
21 changed files with 761 additions and 472 deletions
|
@ -1,28 +1,161 @@
|
|||
#include "Cafe/OS/common/OSCommon.h"
|
||||
#include "Cafe/OS/libs/TCL/TCL.h"
|
||||
|
||||
#include "HW/Latte/Core/LattePM4.h"
|
||||
|
||||
namespace TCL
|
||||
{
|
||||
SysAllocator<coreinit::OSEvent> s_updateRetirementEvent;
|
||||
uint64 s_currentRetireMarker = 0;
|
||||
|
||||
enum class TCL_SUBMISSION_FLAG : uint32
|
||||
struct TCLStatePPC // mapped into PPC space
|
||||
{
|
||||
SURFACE_SYNC = 0x400000, // submit surface sync packet before cmd
|
||||
TRIGGER_INTERRUPT = 0x200000, // probably
|
||||
UKN_20000000 = 0x20000000,
|
||||
uint64be gpuRetireMarker; // written by GPU
|
||||
};
|
||||
|
||||
int TCLSubmitToRing(uint32be* cmd, uint32 cmdLen, uint32be* controlFlags, uint64* submissionTimestamp)
|
||||
SysAllocator<TCLStatePPC> s_tclStatePPC;
|
||||
|
||||
// called from GPU for timestamp EOP event
|
||||
void TCLGPUNotifyNewRetirementTimestamp()
|
||||
{
|
||||
// todo - figure out all the bits of *controlFlags
|
||||
// if submissionTimestamp != nullptr then set it to the timestamp of the submission. Note: We should make sure that uint64's are written atomically by the GPU command processor
|
||||
// gpuRetireMarker is updated via event eop command
|
||||
__OSLockScheduler();
|
||||
coreinit::OSSignalEventAllInternal(s_updateRetirementEvent.GetPtr());
|
||||
__OSUnlockScheduler();
|
||||
}
|
||||
|
||||
cemu_assert_debug(false);
|
||||
int TCLTimestamp(TCLTimestampId id, uint64be* timestampOut)
|
||||
{
|
||||
if (id == TCLTimestampId::TIMESTAMP_LAST_BUFFER_RETIRED)
|
||||
{
|
||||
MEMPTR<uint32> b;
|
||||
// this is the timestamp of the last buffer that was retired by the GPU
|
||||
stdx::atomic_ref<uint64be> retireTimestamp(s_tclStatePPC->gpuRetireMarker);
|
||||
*timestampOut = retireTimestamp.load();
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cemuLog_log(LogType::Force, "TCLTimestamp(): Unsupported timestamp ID {}", (uint32)id);
|
||||
*timestampOut = 0;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
int TCLWaitTimestamp(TCLTimestampId id, uint64 waitTs, uint64 timeout)
|
||||
{
|
||||
if (id == TCLTimestampId::TIMESTAMP_LAST_BUFFER_RETIRED)
|
||||
{
|
||||
while ( true )
|
||||
{
|
||||
stdx::atomic_ref<uint64be> retireTimestamp(s_tclStatePPC->gpuRetireMarker);
|
||||
uint64 currentTimestamp = retireTimestamp.load();
|
||||
if (currentTimestamp >= waitTs)
|
||||
return 0;
|
||||
coreinit::OSWaitEvent(s_updateRetirementEvent.GetPtr());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cemuLog_log(LogType::Force, "TCLWaitTimestamp(): Unsupported timestamp ID {}", (uint32)id);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static constexpr uint32 TCL_RING_BUFFER_SIZE = 4096; // in U32s
|
||||
|
||||
std::atomic<uint32> tclRingBufferA[TCL_RING_BUFFER_SIZE];
|
||||
std::atomic<uint32> tclRingBufferA_readIndex{0};
|
||||
uint32 tclRingBufferA_writeIndex{0};
|
||||
|
||||
// GPU code calls this to grab the next command word
|
||||
bool TCLGPUReadRBWord(uint32& cmdWord)
|
||||
{
|
||||
if (tclRingBufferA_readIndex == tclRingBufferA_writeIndex)
|
||||
return false;
|
||||
cmdWord = tclRingBufferA[tclRingBufferA_readIndex];
|
||||
tclRingBufferA_readIndex = (tclRingBufferA_readIndex+1) % TCL_RING_BUFFER_SIZE;
|
||||
return true;
|
||||
}
|
||||
|
||||
void TCLWaitForRBSpace(uint32be numU32s)
|
||||
{
|
||||
while ( true )
|
||||
{
|
||||
uint32 distance = (tclRingBufferA_readIndex + TCL_RING_BUFFER_SIZE - tclRingBufferA_writeIndex) & (TCL_RING_BUFFER_SIZE - 1);
|
||||
if (tclRingBufferA_writeIndex == tclRingBufferA_readIndex) // buffer completely empty
|
||||
distance = TCL_RING_BUFFER_SIZE;
|
||||
if (distance >= numU32s+1) // assume distance minus one, because we are never allowed to completely wrap around
|
||||
break;
|
||||
_mm_pause();
|
||||
}
|
||||
}
|
||||
|
||||
// this function assumes that TCLWaitForRBSpace was called and that there is enough space
|
||||
void TCLWriteCmd(uint32be* cmd, uint32 cmdLen)
|
||||
{
|
||||
while (cmdLen > 0)
|
||||
{
|
||||
tclRingBufferA[tclRingBufferA_writeIndex] = *cmd;
|
||||
tclRingBufferA_writeIndex++;
|
||||
tclRingBufferA_writeIndex &= (TCL_RING_BUFFER_SIZE - 1);
|
||||
cmd++;
|
||||
cmdLen--;
|
||||
}
|
||||
}
|
||||
|
||||
#define EVENT_TYPE_TS 5
|
||||
|
||||
void TCLSubmitRetireMarker(bool triggerEventInterrupt)
|
||||
{
|
||||
s_currentRetireMarker++;
|
||||
uint32be cmd[6];
|
||||
cmd[0] = pm4HeaderType3(IT_EVENT_WRITE_EOP, 5);
|
||||
cmd[1] = (4 | (EVENT_TYPE_TS << 8)); // event type (bits 8-15) and event index (bits 0-7).
|
||||
cmd[2] = MEMPTR<void>(&s_tclStatePPC->gpuRetireMarker).GetMPTR(); // address lower 32bits + data sel bits
|
||||
cmd[3] = 0x40000000; // select 64bit write, lower 16 bits are the upper bits of the address
|
||||
if (triggerEventInterrupt)
|
||||
cmd[3] |= 0x2000000; // trigger interrupt after value has been written
|
||||
cmd[4] = (uint32)s_currentRetireMarker; // data lower 32 bits
|
||||
cmd[5] = (uint32)(s_currentRetireMarker>>32); // data higher 32 bits
|
||||
TCLWriteCmd(cmd, 6);
|
||||
}
|
||||
|
||||
int TCLSubmitToRing(uint32be* cmd, uint32 cmdLen, betype<TCLSubmissionFlag>* controlFlags, uint64be* timestampValueOut)
|
||||
{
|
||||
TCLSubmissionFlag flags = *controlFlags;
|
||||
cemu_assert_debug(timestampValueOut); // handle case where this is null
|
||||
|
||||
// make sure there is enough space to submit all commands at one
|
||||
uint32 totalCommandLength = cmdLen;
|
||||
totalCommandLength += 6; // space needed for TCLSubmitRetireMarker
|
||||
|
||||
TCLWaitForRBSpace(totalCommandLength);
|
||||
|
||||
// submit command buffer
|
||||
TCLWriteCmd(cmd, cmdLen);
|
||||
|
||||
// create new marker timestamp and tell GPU to write it to our variable after its done processing the command
|
||||
if ((HAS_FLAG(flags, TCLSubmissionFlag::USE_RETIRED_MARKER)))
|
||||
{
|
||||
TCLSubmitRetireMarker(!HAS_FLAG(flags, TCLSubmissionFlag::NO_MARKER_INTERRUPT));
|
||||
*timestampValueOut = s_currentRetireMarker; // incremented before each submit
|
||||
}
|
||||
else
|
||||
{
|
||||
cemu_assert_unimplemented();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Initialize()
|
||||
{
|
||||
cafeExportRegister("TCL", TCLSubmitToRing, LogType::Placeholder);
|
||||
cafeExportRegister("TCL", TCLTimestamp, LogType::Placeholder);
|
||||
cafeExportRegister("TCL", TCLWaitTimestamp, LogType::Placeholder);
|
||||
|
||||
s_currentRetireMarker = 0;
|
||||
s_tclStatePPC->gpuRetireMarker = 0;
|
||||
coreinit::OSInitEvent(s_updateRetirementEvent.GetPtr(), coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_AUTO);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue