#include "Cafe/OS/common/OSCommon.h" #include "Cafe/OS/libs/TCL/TCL.h" #include "HW/Latte/Core/LattePM4.h" namespace TCL { SysAllocator s_updateRetirementEvent; uint64 s_currentRetireMarker = 0; struct TCLStatePPC // mapped into PPC space { uint64be gpuRetireMarker; // written by GPU }; SysAllocator s_tclStatePPC; // called from GPU for timestamp EOP event void TCLGPUNotifyNewRetirementTimestamp() { // gpuRetireMarker is updated via event eop command __OSLockScheduler(); coreinit::OSSignalEventAllInternal(s_updateRetirementEvent.GetPtr()); __OSUnlockScheduler(); } int TCLTimestamp(TCLTimestampId id, uint64be* timestampOut) { if (id == TCLTimestampId::TIMESTAMP_LAST_BUFFER_RETIRED) { MEMPTR b; // this is the timestamp of the last buffer that was retired by the GPU stdx::atomic_ref retireTimestamp(s_tclStatePPC->gpuRetireMarker); *timestampOut = retireTimestamp.load(); return 0; } else { cemuLog_log(LogType::Force, "TCLTimestamp(): Unsupported timestamp ID {}", (uint32)id); *timestampOut = 0; return 0; } } int TCLWaitTimestamp(TCLTimestampId id, uint64 waitTs, uint64 timeout) { if (id == TCLTimestampId::TIMESTAMP_LAST_BUFFER_RETIRED) { while ( true ) { stdx::atomic_ref retireTimestamp(s_tclStatePPC->gpuRetireMarker); uint64 currentTimestamp = retireTimestamp.load(); if (currentTimestamp >= waitTs) return 0; coreinit::OSWaitEvent(s_updateRetirementEvent.GetPtr()); } } else { cemuLog_log(LogType::Force, "TCLWaitTimestamp(): Unsupported timestamp ID {}", (uint32)id); } return 0; } static constexpr uint32 TCL_RING_BUFFER_SIZE = 4096; // in U32s std::atomic tclRingBufferA[TCL_RING_BUFFER_SIZE]; std::atomic tclRingBufferA_readIndex{0}; uint32 tclRingBufferA_writeIndex{0}; // GPU code calls this to grab the next command word bool TCLGPUReadRBWord(uint32& cmdWord) { if (tclRingBufferA_readIndex == tclRingBufferA_writeIndex) return false; cmdWord = tclRingBufferA[tclRingBufferA_readIndex]; tclRingBufferA_readIndex = (tclRingBufferA_readIndex+1) % TCL_RING_BUFFER_SIZE; return true; } void TCLWaitForRBSpace(uint32be numU32s) { while ( true ) { uint32 distance = (tclRingBufferA_readIndex + TCL_RING_BUFFER_SIZE - tclRingBufferA_writeIndex) & (TCL_RING_BUFFER_SIZE - 1); if (tclRingBufferA_writeIndex == tclRingBufferA_readIndex) // buffer completely empty distance = TCL_RING_BUFFER_SIZE; if (distance >= numU32s+1) // assume distance minus one, because we are never allowed to completely wrap around break; _mm_pause(); } } // this function assumes that TCLWaitForRBSpace was called and that there is enough space void TCLWriteCmd(uint32be* cmd, uint32 cmdLen) { while (cmdLen > 0) { tclRingBufferA[tclRingBufferA_writeIndex] = *cmd; tclRingBufferA_writeIndex++; tclRingBufferA_writeIndex &= (TCL_RING_BUFFER_SIZE - 1); cmd++; cmdLen--; } } #define EVENT_TYPE_TS 5 void TCLSubmitRetireMarker(bool triggerEventInterrupt) { s_currentRetireMarker++; uint32be cmd[6]; cmd[0] = pm4HeaderType3(IT_EVENT_WRITE_EOP, 5); cmd[1] = (4 | (EVENT_TYPE_TS << 8)); // event type (bits 8-15) and event index (bits 0-7). cmd[2] = MEMPTR(&s_tclStatePPC->gpuRetireMarker).GetMPTR(); // address lower 32bits + data sel bits cmd[3] = 0x40000000; // select 64bit write, lower 16 bits are the upper bits of the address if (triggerEventInterrupt) cmd[3] |= 0x2000000; // trigger interrupt after value has been written cmd[4] = (uint32)s_currentRetireMarker; // data lower 32 bits cmd[5] = (uint32)(s_currentRetireMarker>>32); // data higher 32 bits TCLWriteCmd(cmd, 6); } int TCLSubmitToRing(uint32be* cmd, uint32 cmdLen, betype* controlFlags, uint64be* timestampValueOut) { TCLSubmissionFlag flags = *controlFlags; cemu_assert_debug(timestampValueOut); // handle case where this is null // make sure there is enough space to submit all commands at one uint32 totalCommandLength = cmdLen; totalCommandLength += 6; // space needed for TCLSubmitRetireMarker TCLWaitForRBSpace(totalCommandLength); // submit command buffer TCLWriteCmd(cmd, cmdLen); // create new marker timestamp and tell GPU to write it to our variable after its done processing the command if ((HAS_FLAG(flags, TCLSubmissionFlag::USE_RETIRED_MARKER))) { TCLSubmitRetireMarker(!HAS_FLAG(flags, TCLSubmissionFlag::NO_MARKER_INTERRUPT)); *timestampValueOut = s_currentRetireMarker; // incremented before each submit } else { cemu_assert_unimplemented(); } return 0; } void Initialize() { cafeExportRegister("TCL", TCLSubmitToRing, LogType::Placeholder); cafeExportRegister("TCL", TCLTimestamp, LogType::Placeholder); cafeExportRegister("TCL", TCLWaitTimestamp, LogType::Placeholder); s_currentRetireMarker = 0; s_tclStatePPC->gpuRetireMarker = 0; coreinit::OSInitEvent(s_updateRetirementEvent.GetPtr(), coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_AUTO); } }