From 62e2d8d9a7b8aaa110673862b80733b4e93b42c7 Mon Sep 17 00:00:00 2001 From: S Gopal Rajagopal Date: Thu, 29 Jan 2015 20:20:34 +0530 Subject: [PATCH] SPURS: Update kernel to use lock line reservations --- rpcs3/Emu/Cell/MFC.h | 8 + rpcs3/Emu/Cell/SPUThread.cpp | 9 +- rpcs3/Emu/Cell/SPUThread.h | 31 +- rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp | 19 +- rpcs3/Emu/SysCalls/Modules/cellSpurs.h | 19 +- rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp | 623 +++++++++++--------- rpcs3/stdafx.h | 2 + 7 files changed, 414 insertions(+), 297 deletions(-) diff --git a/rpcs3/Emu/Cell/MFC.h b/rpcs3/Emu/Cell/MFC.h index a6c731d3da..0b669deb97 100644 --- a/rpcs3/Emu/Cell/MFC.h +++ b/rpcs3/Emu/Cell/MFC.h @@ -35,6 +35,14 @@ enum MFC_GETLLAR_SUCCESS = 4, }; +// MFC Write Tag Status Update Request Channel (ch23) operations +enum +{ + MFC_TAG_UPDATE_IMMEDIATE = 0, + MFC_TAG_UPDATE_ANY = 1, + MFC_TAG_UPDATE_ALL = 2, +}; + enum { MFC_SPU_TO_PPU_MAILBOX_STATUS_MASK = 0x000000FF, diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index 95f678bf02..fcb9b012e9 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1060,7 +1060,14 @@ void SPUThread::StopAndSignal(u32 code) case 0x003: { - m_code3_func(*this); + auto iter = m_addr_to_hle_function_map.find(PC); + assert(iter != m_addr_to_hle_function_map.end()); + + auto return_to_caller = iter->second(*this); + if (return_to_caller) + { + SetBranch(GPR[0]._u32[3] & 0x3fffc); + } break; } diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index f880e5ca6e..d6ecbe64b0 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -290,6 +290,8 @@ public: u32 m_event_mask; u32 m_events; + std::unordered_map> m_addr_to_hle_function_map; + struct IntrTag { u32 enabled; // 1 == true @@ -509,8 +511,35 @@ public: void WriteLS64 (const u32 lsa, const u64& data) const { vm::write64 (lsa + m_offset, data); } void WriteLS128(const u32 lsa, const u128& data) const { vm::write128(lsa + m_offset, data); } + void RegisterHleFuncion(u32 addr, std::function function) + { + m_addr_to_hle_function_map[addr] = function; + WriteLS32(addr, 0x00000003); // STOP 3 + } + + void UnregisterHleFunction(u32 addr) + { + WriteLS32(addr, 0x00200000); // NOP + m_addr_to_hle_function_map.erase(addr); + } + + void UnregisterHleFunctions(u32 start_addr, u32 end_addr) + { + for (auto iter = m_addr_to_hle_function_map.begin(); iter != m_addr_to_hle_function_map.end();) + { + if (iter->first >= start_addr && iter->first <= end_addr) + { + WriteLS32(iter->first, 0x00200000); // NOP + m_addr_to_hle_function_map.erase(iter++); + } + else + { + iter++; + } + } + } + std::function m_custom_task; - std::function m_code3_func; public: SPUThread(CPUThreadType type = CPU_THREAD_SPU); diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index c9795203f8..62349f276d 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -26,7 +26,7 @@ extern u32 libsre; extern u32 libsre_rtoc; #endif -void spursKernelMain(SPUThread & spu); +bool spursKernelMain(SPUThread & spu); s64 cellSpursLookUpTasksetAddress(vm::ptr spurs, vm::ptr taskset, u32 id); s64 _cellSpursSendSignal(vm::ptr taskset, u32 taskID); @@ -155,7 +155,8 @@ s64 spursInit( assert(!"spu_image_import() failed"); } #else - spurs->m.spuImg.addr = (u32)Memory.Alloc(0x40000, 4096); + spurs->m.spuImg.addr = (u32)Memory.Alloc(0x40000, 4096); + spurs->m.spuImg.entry_point = isSecond ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR; #endif s32 tgt = SYS_SPU_THREAD_GROUP_TYPE_NORMAL; @@ -179,17 +180,11 @@ s64 spursInit( name += "CellSpursKernel0"; for (s32 num = 0; num < nSpus; num++, name[name.size() - 1]++) { - spurs->m.spus[num] = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, 0, 0, 0, 0, [spurs, num](SPUThread& SPU) - { - SPU.GPR[3]._u32[3] = num; - SPU.GPR[4]._u64[1] = spurs.addr(); - -#ifdef PRX_DEBUG_XXX - return SPU.FastCall(SPU.PC); + auto spu = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, num, spurs.addr(), 0, 0); +#ifndef PRX_DEBUG_XXX + spu->RegisterHleFuncion(spurs->m.spuImg.entry_point, spursKernelMain); #endif - - spursKernelMain(SPU); - })->GetId(); + spurs->m.spus[num] = spu->GetId(); } if (flags & SAF_SPU_PRINTF_ENABLED) diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 4ccf224ec7..4d77a06402 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -102,6 +102,12 @@ enum SPURSKernelInterfaces CELL_SPURS_INTERRUPT_VECTOR = 0x0, CELL_SPURS_LOCK_LINE = 0x80, CELL_SPURS_KERNEL_DMA_TAG_ID = 31, + CELL_SPURS_KERNEL1_ENTRY_ADDR = 0x818, + CELL_SPURS_KERNEL2_ENTRY_ADDR = 0x848, + CELL_SPURS_KERNEL1_YIELD_ADDR = 0x808, + CELL_SPURS_KERNEL2_YIELD_ADDR = 0x838, + CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR = 0x290, + CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR = 0x290, }; enum RangeofEventQueuePortNumbers @@ -885,14 +891,23 @@ struct SpursKernelMgmtData u8 spuIdling; // 0x1EB be_t wklRunnable1; // 0x1EC be_t wklRunnable2; // 0x1EE - u8 x1F0[0x210 - 0x1F0]; // 0x1F0 + be_t x1F0; // 0x1F0 + be_t x1F4; // 0x1F4 + be_t x1F8; // 0x1F8 + be_t x1FC; // 0x1FC + be_t x200; // 0x200 + be_t x204; // 0x204 + be_t x208; // 0x208 + be_t x20C; // 0x20C be_t traceBuffer; // 0x210 be_t traceMsgCount; // 0x218 be_t traceMaxCount; // 0x21C u8 wklUniqueId[0x10]; // 0x220 + u8 x230[0x280 - 0x230]; // 0x230 + be_t guid[4]; // 0x280 }; -static_assert(sizeof(SpursKernelMgmtData) == 0x130, "Incorrect size for SpursKernelMgmtData"); +static_assert(sizeof(SpursKernelMgmtData) == 0x190, "Incorrect size for SpursKernelMgmtData"); // The SPURS taskset policy module data store. This resides at 0x2700 of the LS. struct SpursTasksetPmMgmtData diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp index e4ebca84a5..898638894c 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -14,11 +14,15 @@ void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag); u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status); +bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag); +u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask); +u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll = true); + // // SPURS Kernel functions // -void spursKernelSelectWorkload(SPUThread & spu); -void spursKernelSelectWorkload2(SPUThread & spu); +bool spursKernel1SelectWorkload(SPUThread & spu); +bool spursKernel2SelectWorkload(SPUThread & spu); // // SPURS system service workload functions @@ -31,7 +35,7 @@ void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt); void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt); void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt); void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus); -void spursSysServiceWorkloadEntry(SPUThread & spu); +bool spursSysServiceWorkloadEntry(SPUThread & spu); // // SPURS taskset polict module functions @@ -54,9 +58,9 @@ u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) { spu.GPR[3]._u32[3] = 1; if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) { - spursKernelSelectWorkload2(spu); + spursKernel2SelectWorkload(spu); } else { - spursKernelSelectWorkload(spu); + spursKernel1SelectWorkload(spu); } auto result = spu.GPR[3]._u64[1]; @@ -68,14 +72,51 @@ u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) { return wklId == mgmt->wklCurrentId ? 0 : 1; } +/// Execute a DMA operation +bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag) { + spu.WriteChannel(MFC_LSA, u128::from32r(lsa)); + spu.WriteChannel(MFC_EAH, u128::from32r((u32)(ea >> 32))); + spu.WriteChannel(MFC_EAL, u128::from32r((u32)ea)); + spu.WriteChannel(MFC_Size, u128::from32r(size)); + spu.WriteChannel(MFC_TagID, u128::from32r(tag)); + spu.WriteChannel(MFC_Cmd, u128::from32r(cmd)); + + if (cmd == MFC_GETLLAR_CMD || cmd == MFC_PUTLLC_CMD || cmd == MFC_PUTLLUC_CMD) { + u128 rv; + + spu.ReadChannel(rv, MFC_RdAtomicStat); + return rv._u32[3] ? true : false; + } + + return true; +} + +/// Get the status of DMA operations +u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask) { + u128 rv; + + spu.WriteChannel(MFC_WrTagMask, u128::from32r(tagMask)); + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_IMMEDIATE)); + spu.ReadChannel(rv, MFC_RdTagStat); + return rv._u32[3]; +} + +/// Wait for DMA operations to complete +u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll) { + u128 rv; + + spu.WriteChannel(MFC_WrTagMask, u128::from32r(tagMask)); + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(waitForAll ? MFC_TAG_UPDATE_ALL : MFC_TAG_UPDATE_ANY)); + spu.ReadChannel(rv, MFC_RdTagStat); + return rv._u32[3]; +} + ////////////////////////////////////////////////////////////////////////////// // SPURS kernel functions ////////////////////////////////////////////////////////////////////////////// /// Select a workload to run -void spursKernelSelectWorkload(SPUThread & spu) { - LV2_LOCK(0); // TODO: lock-free implementation if possible - +bool spursKernel1SelectWorkload(SPUThread & spu) { auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); // The first and only argument to this function is a boolean that is set to false if the function @@ -83,140 +124,148 @@ void spursKernelSelectWorkload(SPUThread & spu) { // If the first argument is true then the shared data is not updated with the result. const auto isPoll = spu.GPR[3]._u32[3]; - // Calculate the contention (number of SPUs used) for each workload - u8 contention[CELL_SPURS_MAX_WORKLOAD]; - u8 pendingContention[CELL_SPURS_MAX_WORKLOAD]; - for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - contention[i] = mgmt->spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i]; + u32 wklSelectedId; + u32 pollStatus; - // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably - // to prevent unnecessary jumps to the kernel - if (isPoll) { - pendingContention[i] = mgmt->spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i]; - if (i != mgmt->wklCurrentId) { - contention[i] += pendingContention[i]; - } - } - } + do { + // DMA and lock the first 0x80 bytes of spurs + spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + CellSpurs * spurs = (CellSpurs *)mgmt->tempArea; - u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; - u32 pollStatus = 0; - - // The system service workload has the highest priority. Select the system service workload if - // the system service message bit for this SPU is set. - if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { - mgmt->spuIdling = 0; - if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - // Clear the message bit - mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); - } - } else { - // Caclulate the scheduling weight for each workload - u16 maxWeight = 0; + // Calculate the contention (number of SPUs used) for each workload + u8 contention[CELL_SPURS_MAX_WORKLOAD]; + u8 pendingContention[CELL_SPURS_MAX_WORKLOAD]; for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - u16 runnable = mgmt->wklRunnable1 & (0x8000 >> i); - u16 wklSignal = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); - u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; - u8 readyCount = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed(); - u8 idleSpuCount = mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed(); - u8 requestCount = readyCount + idleSpuCount; + contention[i] = spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i]; - // For a workload to be considered for scheduling: - // 1. Its priority must not be 0 - // 2. The number of SPUs used by it must be less than the max contention for that workload - // 3. The workload should be in runnable state - // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) - // OR the workload must be signalled - // OR the workload flag is 0 and the workload is configured as the wokload flag receiver - if (runnable && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) { - if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) { - // The scheduling weight of the workload is formed from the following parameters in decreasing order of priority: - // 1. Wokload signal set or workload flag or ready count > contention - // 2. Priority of the workload on the SPU - // 3. Is the workload the last selected workload - // 4. Minimum contention of the workload - // 5. Number of SPUs that are being used by the workload (lesser the number, more the weight) - // 6. Is the workload executable same as the currently loaded executable - // 7. The workload id (lesser the number, more the weight) - u16 weight = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0; - weight |= (u16)(mgmt->priority[i] & 0x7F) << 16; - weight |= i == mgmt->wklCurrentId ? 0x80 : 0x00; - weight |= (contention[i] > 0 && mgmt->spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00; - weight |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2; - weight |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00; - weight |= 0x01; - - // In case of a tie the lower numbered workload is chosen - if (weight > maxWeight) { - wklSelectedId = i; - maxWeight = weight; - pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; - pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; - pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; - } + // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably + // to prevent unnecessary jumps to the kernel + if (isPoll) { + pendingContention[i] = spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i]; + if (i != mgmt->wklCurrentId) { + contention[i] += pendingContention[i]; } } } - // Not sure what this does. Possibly mark the SPU as idle/in use. - mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; + wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; + pollStatus = 0; - if (!isPoll || wklSelectedId == mgmt->wklCurrentId) { - // Clear workload signal for the selected workload - mgmt->spurs->m.wklSignal1.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); - mgmt->spurs->m.wklSignal2.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); + // The system service workload has the highest priority. Select the system service workload if + // the system service message bit for this SPU is set. + if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { + mgmt->spuIdling = 0; + if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + // Clear the message bit + spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); + } + } else { + // Caclulate the scheduling weight for each workload + u16 maxWeight = 0; + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + u16 runnable = mgmt->wklRunnable1 & (0x8000 >> i); + u16 wklSignal = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); + u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed(); + u8 idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed(); + u8 requestCount = readyCount + idleSpuCount; - // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s - if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) { - mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); + // For a workload to be considered for scheduling: + // 1. Its priority must not be 0 + // 2. The number of SPUs used by it must be less than the max contention for that workload + // 3. The workload should be in runnable state + // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) + // OR the workload must be signalled + // OR the workload flag is 0 and the workload is configured as the wokload flag receiver + if (runnable && mgmt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) { + if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) { + // The scheduling weight of the workload is formed from the following parameters in decreasing order of priority: + // 1. Wokload signal set or workload flag or ready count > contention + // 2. Priority of the workload on the SPU + // 3. Is the workload the last selected workload + // 4. Minimum contention of the workload + // 5. Number of SPUs that are being used by the workload (lesser the number, more the weight) + // 6. Is the workload executable same as the currently loaded executable + // 7. The workload id (lesser the number, more the weight) + u16 weight = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0; + weight |= (u16)(mgmt->priority[i] & 0x7F) << 16; + weight |= i == mgmt->wklCurrentId ? 0x80 : 0x00; + weight |= (contention[i] > 0 && spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00; + weight |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2; + weight |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00; + weight |= 0x01; + + // In case of a tie the lower numbered workload is chosen + if (weight > maxWeight) { + wklSelectedId = i; + maxWeight = weight; + pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; + pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; + pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; + } + } + } + } + + // Not sure what this does. Possibly mark the SPU as idle/in use. + mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; + + if (!isPoll || wklSelectedId == mgmt->wklCurrentId) { + // Clear workload signal for the selected workload + spurs->m.wklSignal1.write_relaxed(be_t::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); + spurs->m.wklSignal2.write_relaxed(be_t::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); + + // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s + if (wklSelectedId == spurs->m.wklFlagReceiver.read_relaxed()) { + spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); + } } } - } - if (!isPoll) { - // Called by kernel - // Increment the contention for the selected workload - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - contention[wklSelectedId]++; - } + if (!isPoll) { + // Called by kernel + // Increment the contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + contention[wklSelectedId]++; + } - for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - mgmt->spurs->m.wklCurrentContention[i] = contention[i]; - mgmt->wklLocContention[i] = 0; - mgmt->wklLocPendingContention[i] = 0; - } + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + spurs->m.wklCurrentContention[i] = contention[i]; + mgmt->wklLocContention[i] = 0; + mgmt->wklLocPendingContention[i] = 0; + } - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - mgmt->wklLocContention[wklSelectedId] = 1; - } + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + mgmt->wklLocContention[wklSelectedId] = 1; + } - mgmt->wklCurrentId = wklSelectedId; - } else if (wklSelectedId != mgmt->wklCurrentId) { - // Not called by kernel but a context switch is required - // Increment the pending contention for the selected workload - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - pendingContention[wklSelectedId]++; - } + mgmt->wklCurrentId = wklSelectedId; + } else if (wklSelectedId != mgmt->wklCurrentId) { + // Not called by kernel but a context switch is required + // Increment the pending contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + pendingContention[wklSelectedId]++; + } - for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { - mgmt->spurs->m.wklPendingContention[i] = pendingContention[i]; - mgmt->wklLocPendingContention[i] = 0; - } + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + spurs->m.wklPendingContention[i] = pendingContention[i]; + mgmt->wklLocPendingContention[i] = 0; + } - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - mgmt->wklLocPendingContention[wklSelectedId] = 1; + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + mgmt->wklLocPendingContention[wklSelectedId] = 1; + } } - } + } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); u64 result = (u64)wklSelectedId << 32; result |= pollStatus; spu.GPR[3]._u64[1] = result; + return true; } /// Select a workload to run -void spursKernelSelectWorkload2(SPUThread & spu) { - LV2_LOCK(0); // TODO: lock-free implementation if possible - +bool spursKernel2SelectWorkload(SPUThread & spu) { auto mgmt = vm::get_ptr(spu.ls_offset + 0x100); // The first and only argument to this function is a boolean that is set to false if the function @@ -224,202 +273,214 @@ void spursKernelSelectWorkload2(SPUThread & spu) { // If the first argument is true then the shared data is not updated with the result. const auto isPoll = spu.GPR[3]._u32[3]; - // Calculate the contention (number of SPUs used) for each workload - u8 contention[CELL_SPURS_MAX_WORKLOAD2]; - u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2]; - for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { - contention[i] = mgmt->spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F]; - contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4; + u32 wklSelectedId; + u32 pollStatus; - // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably - // to prevent unnecessary jumps to the kernel - if (isPoll) { - pendingContention[i] = mgmt->spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F]; - pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4; - if (i != mgmt->wklCurrentId) { - contention[i] += pendingContention[i]; - } - } - } + do { + // DMA and lock the first 0x80 bytes of spurs + spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + CellSpurs * spurs = (CellSpurs *)mgmt->tempArea; - u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; - u32 pollStatus = 0; - - // The system service workload has the highest priority. Select the system service workload if - // the system service message bit for this SPU is set. - if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { - // Not sure what this does. Possibly Mark the SPU as in use. - mgmt->spuIdling = 0; - if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - // Clear the message bit - mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); - } - } else { - // Caclulate the scheduling weight for each workload - u8 maxWeight = 0; + // Calculate the contention (number of SPUs used) for each workload + u8 contention[CELL_SPURS_MAX_WORKLOAD2]; + u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2]; for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { - auto j = i & 0x0F; - u16 runnable = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j); - u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4; - u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4; - u16 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); - u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; - u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklReadyCount1[j].read_relaxed() : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed(); + contention[i] = spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F]; + contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4; - // For a workload to be considered for scheduling: - // 1. Its priority must be greater than 0 - // 2. The number of SPUs used by it must be less than the max contention for that workload - // 3. The workload should be in runnable state - // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) - // OR the workload must be signalled - // OR the workload flag is 0 and the workload is configured as the wokload receiver - if (runnable && priority > 0 && maxContention > contention[i]) { - if (wklFlag || wklSignal || readyCount > contention[i]) { - // The scheduling weight of the workload is equal to the priority of the workload for the SPU. - // The current workload is given a sligtly higher weight presumably to reduce the number of context switches. - // In case of a tie the lower numbered workload is chosen. - u8 weight = priority << 4; - if (mgmt->wklCurrentId == i) { - weight |= 0x04; - } - - if (weight > maxWeight) { - wklSelectedId = i; - maxWeight = weight; - pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; - pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; - pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; - } + // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably + // to prevent unnecessary jumps to the kernel + if (isPoll) { + pendingContention[i] = spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F]; + pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4; + if (i != mgmt->wklCurrentId) { + contention[i] += pendingContention[i]; } } } - // Not sure what this does. Possibly mark the SPU as idle/in use. - mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; + wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; + pollStatus = 0; - if (!isPoll || wklSelectedId == mgmt->wklCurrentId) { - // Clear workload signal for the selected workload - mgmt->spurs->m.wklSignal1.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); - mgmt->spurs->m.wklSignal2.write_relaxed(be_t::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); + // The system service workload has the highest priority. Select the system service workload if + // the system service message bit for this SPU is set. + if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) { + // Not sure what this does. Possibly Mark the SPU as in use. + mgmt->spuIdling = 0; + if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + // Clear the message bit + spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum)); + } + } else { + // Caclulate the scheduling weight for each workload + u8 maxWeight = 0; + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { + auto j = i & 0x0F; + u16 runnable = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j); + u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4; + u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4; + u16 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); + u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed(); - // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s - if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) { - mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); + // For a workload to be considered for scheduling: + // 1. Its priority must be greater than 0 + // 2. The number of SPUs used by it must be less than the max contention for that workload + // 3. The workload should be in runnable state + // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) + // OR the workload must be signalled + // OR the workload flag is 0 and the workload is configured as the wokload receiver + if (runnable && priority > 0 && maxContention > contention[i]) { + if (wklFlag || wklSignal || readyCount > contention[i]) { + // The scheduling weight of the workload is equal to the priority of the workload for the SPU. + // The current workload is given a sligtly higher weight presumably to reduce the number of context switches. + // In case of a tie the lower numbered workload is chosen. + u8 weight = priority << 4; + if (mgmt->wklCurrentId == i) { + weight |= 0x04; + } + + if (weight > maxWeight) { + wklSelectedId = i; + maxWeight = weight; + pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; + pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; + pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; + } + } + } + } + + // Not sure what this does. Possibly mark the SPU as idle/in use. + mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; + + if (!isPoll || wklSelectedId == mgmt->wklCurrentId) { + // Clear workload signal for the selected workload + spurs->m.wklSignal1.write_relaxed(be_t::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); + spurs->m.wklSignal2.write_relaxed(be_t::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); + + // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s + if (wklSelectedId == spurs->m.wklFlagReceiver.read_relaxed()) { + spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); + } } } - } - if (!isPoll) { - // Called by kernel - // Increment the contention for the selected workload - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - contention[wklSelectedId]++; + if (!isPoll) { + // Called by kernel + // Increment the contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + contention[wklSelectedId]++; + } + + for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { + spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4); + mgmt->wklLocContention[i] = 0; + mgmt->wklLocPendingContention[i] = 0; + } + + mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; + mgmt->wklCurrentId = wklSelectedId; + } else if (wklSelectedId != mgmt->wklCurrentId) { + // Not called by kernel but a context switch is required + // Increment the pending contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + pendingContention[wklSelectedId]++; + } + + for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { + spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4); + mgmt->wklLocPendingContention[i] = 0; + } + + mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; } - - for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { - mgmt->spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4); - mgmt->wklLocContention[i] = 0; - mgmt->wklLocPendingContention[i] = 0; - } - - mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; - mgmt->wklCurrentId = wklSelectedId; - } else if (wklSelectedId != mgmt->wklCurrentId) { - // Not called by kernel but a context switch is required - // Increment the pending contention for the selected workload - if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { - pendingContention[wklSelectedId]++; - } - - for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { - mgmt->spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4); - mgmt->wklLocPendingContention[i] = 0; - } - - mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; - } + } while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); u64 result = (u64)wklSelectedId << 32; result |= pollStatus; spu.GPR[3]._u64[1] = result; + return true; } -/// Entry point of the SPURS kernel -void spursKernelMain(SPUThread & spu) { +/// SPURS kernel main +bool spursKernelMain(SPUThread & spu) { SpursKernelMgmtData * mgmt = vm::get_ptr(spu.ls_offset + 0x100); - mgmt->spuNum = spu.GPR[3]._u32[3]; - mgmt->dmaTagId = 0x1F; - mgmt->spurs.set(spu.GPR[4]._u64[1]); - mgmt->wklCurrentId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; - mgmt->wklCurrentUniqueId = 0x20; - bool isSecond = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; - mgmt->yieldToKernelAddr = isSecond ? 0x838 : 0x808; - mgmt->selectWorkloadAddr = 0x290; - spu.WriteLS32(mgmt->yieldToKernelAddr, 2); // hack for cellSpursModuleExit - spu.WriteLS32(mgmt->selectWorkloadAddr, 3); // hack for cellSpursModulePollStatus - spu.WriteLS32(mgmt->selectWorkloadAddr + 4, 0x35000000); // bi $0 - spu.m_code3_func = isSecond ? spursKernelSelectWorkload2 : spursKernelSelectWorkload; + bool isKernel2; + u32 pollStatus; + const CellSpurs::WorkloadInfo * wklInfo; + if (spu.PC == CELL_SPURS_KERNEL1_ENTRY_ADDR || spu.PC == CELL_SPURS_KERNEL2_ENTRY_ADDR) { + // Entry point of SPURS kernel + // Save arguments + mgmt->spuNum = spu.GPR[3]._u32[3]; + mgmt->spurs.set(spu.GPR[4]._u64[1]); - u32 wid = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; - u32 pollStatus = 0; - while (true) { - if (Emu.IsStopped()) { - cellSpurs->Warning("Spurs Kernel aborted"); - return; - } + isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; - // Get current workload info - auto & wkl = wid < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklInfo1[wid] : (wid < CELL_SPURS_MAX_WORKLOAD2 && isSecond ? mgmt->spurs->m.wklInfo2[wid & 0xf] : mgmt->spurs->m.wklInfoSysSrv); + memset(mgmt, 0, sizeof(SpursKernelMgmtData)); - if (mgmt->wklCurrentAddr != wkl.addr) { - if (wkl.addr.addr() != SPURS_IMG_ADDR_SYS_SRV_WORKLOAD) { - // Load executable code - memcpy(vm::get_ptr(spu.ls_offset + 0xA00), wkl.addr.get_ptr(), wkl.size); - } - mgmt->wklCurrentAddr = wkl.addr; - mgmt->wklCurrentUniqueId = wkl.uniqueId.read_relaxed(); - } - - if (!isSecond) { - mgmt->moduleId[0] = 0; - mgmt->moduleId[1] = 0; - } - - // Run workload - spu.GPR[1]._u32[3] = 0x3FFB0; - spu.GPR[3]._u32[3] = 0x100; - spu.GPR[4]._u64[1] = wkl.arg; - spu.GPR[5]._u32[3] = pollStatus; - spu.SetPc(0xA00); - switch (mgmt->wklCurrentAddr.addr()) { - case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD: - spursSysServiceWorkloadEntry(spu); - break; - default: - spu.FastCall(0xA00); - break; - } - - // Check status - auto status = spu.SPU.Status.GetValue(); - if (status == SPU_STATUS_STOPPED_BY_STOP) { - return; + // Initialise the SPURS management area to its initial values + mgmt->dmaTagId = CELL_SPURS_KERNEL_DMA_TAG_ID; + mgmt->wklCurrentUniqueId = 0x20; + mgmt->wklCurrentId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; + mgmt->yieldToKernelAddr = isKernel2 ? CELL_SPURS_KERNEL2_YIELD_ADDR : CELL_SPURS_KERNEL1_YIELD_ADDR; + mgmt->selectWorkloadAddr = isKernel2 ? CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR : CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR; + if (!isKernel2) { + mgmt->x1F0 = 0xF0020000; + mgmt->x200 = 0x20000; + mgmt->guid[0] = 0x423A3A02; + mgmt->guid[1] = 0x43F43A82; + mgmt->guid[2] = 0x43F26502; + mgmt->guid[3] = 0x420EB382; } else { - assert(status == SPU_STATUS_RUNNING); + mgmt->guid[0] = 0x43A08402; + mgmt->guid[1] = 0x43FB0A82; + mgmt->guid[2] = 0x435E9302; + mgmt->guid[3] = 0x43A3C982; } + spu.UnregisterHleFunctions(0, 0x40000); // TODO: use a symbolic constant + spu.RegisterHleFuncion(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelMain); + spu.RegisterHleFuncion(mgmt->yieldToKernelAddr, spursKernelMain); + spu.RegisterHleFuncion(mgmt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload); + + // Start the system service workload + spu.RegisterHleFuncion(0xA00, spursSysServiceWorkloadEntry); + wklInfo = &mgmt->spurs->m.wklInfoSysSrv; + pollStatus = 0; + } else if (spu.PC == mgmt->yieldToKernelAddr) { + isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; + // Select next workload to run spu.GPR[3].clear(); - if (isSecond) { - spursKernelSelectWorkload2(spu); + if (isKernel2) { + spursKernel2SelectWorkload(spu); } else { - spursKernelSelectWorkload(spu); + spursKernel1SelectWorkload(spu); } - u64 res = spu.GPR[3]._u64[1]; - pollStatus = (u32)(res); - wid = (u32)(res >> 32); + + pollStatus = (u32)(spu.GPR[3]._u64[1]); + auto wid = (u32)(spu.GPR[3]._u64[1] >> 32); + wklInfo = wid < CELL_SPURS_MAX_WORKLOAD ? &mgmt->spurs->m.wklInfo1[wid] : + (wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? &mgmt->spurs->m.wklInfo2[wid & 0xf] : &mgmt->spurs->m.wklInfoSysSrv); + } else { + assert(0); } + + if (!isKernel2) { + mgmt->moduleId[0] = 0; + mgmt->moduleId[1] = 0; + } + + // Run workload + spu.GPR[0]._u32[3] = mgmt->yieldToKernelAddr; + spu.GPR[1]._u32[3] = 0x3FFB0; + spu.GPR[3]._u32[3] = 0x100; + spu.GPR[4]._u64[1] = wklInfo->arg; + spu.GPR[5]._u32[3] = pollStatus; + spu.SetBranch(0xA00); + return false; } ////////////////////////////////////////////////////////////////////////////// @@ -783,7 +844,7 @@ poll: } /// Entry point of the system service workload -void spursSysServiceWorkloadEntry(SPUThread & spu) { +bool spursSysServiceWorkloadEntry(SPUThread & spu) { auto mgmt = vm::get_ptr(spu.ls_offset + spu.GPR[3]._u32[3]); auto arg = spu.GPR[4]._u64[1]; auto pollStatus = spu.GPR[5]._u32[3]; @@ -800,7 +861,7 @@ void spursSysServiceWorkloadEntry(SPUThread & spu) { } // TODO: Ensure that this function always returns to the SPURS kernel - return; + return false; } ////////////////////////////////////////////////////////////////////////////// diff --git a/rpcs3/stdafx.h b/rpcs3/stdafx.h index 825c1c4007..4581c27650 100644 --- a/rpcs3/stdafx.h +++ b/rpcs3/stdafx.h @@ -34,6 +34,8 @@ #include #include #include +#include +#include #include #include "Utilities/GNU.h"