diff --git a/Utilities/BEType.h b/Utilities/BEType.h index 4965eac336..8cf5e9a89c 100644 --- a/Utilities/BEType.h +++ b/Utilities/BEType.h @@ -1,5 +1,7 @@ #pragma once +#define IS_LE_MACHINE + union _CRT_ALIGN(16) u128 { u64 _u64[2]; @@ -136,16 +138,28 @@ union _CRT_ALIGN(16) u128 } }; + // Index 0 returns the MSB and index 127 returns the LSB bit_element operator [] (u32 index) { assert(index < 128); - return bit_element(data[index / 64], 1ull << (index % 64)); + +#ifdef IS_LE_MACHINE + return bit_element(data[1 - (index >> 6)], 0x8000000000000000ull >> (index & 0x3F)); +#else + return bit_element(data[index >> 6], 0x8000000000000000ull >> (index & 0x3F)); +#endif } + // Index 0 returns the MSB and index 127 returns the LSB const bool operator [] (u32 index) const { assert(index < 128); - return (data[index / 64] & (1ull << (index % 64))) != 0; + +#ifdef IS_LE_MACHINE + return (data[1 - (index >> 6)] & (0x8000000000000000ull >> (index & 0x3F))) != 0; +#else + return (data[index >> 6] & (0x8000000000000000ull >> (index & 0x3F))) != 0; +#endif } } _bit; @@ -509,8 +523,6 @@ struct be_storage_t typedef u128 type; }; -#define IS_LE_MACHINE - template class be_t { diff --git a/rpcs3/Emu/Cell/MFC.h b/rpcs3/Emu/Cell/MFC.h index a6c731d3da..0b669deb97 100644 --- a/rpcs3/Emu/Cell/MFC.h +++ b/rpcs3/Emu/Cell/MFC.h @@ -35,6 +35,14 @@ enum MFC_GETLLAR_SUCCESS = 4, }; +// MFC Write Tag Status Update Request Channel (ch23) operations +enum +{ + MFC_TAG_UPDATE_IMMEDIATE = 0, + MFC_TAG_UPDATE_ANY = 1, + MFC_TAG_UPDATE_ALL = 2, +}; + enum { MFC_SPU_TO_PPU_MAILBOX_STATUS_MASK = 0x000000FF, diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h index 151eb4e436..7c56b4c9ac 100644 --- a/rpcs3/Emu/Cell/SPUInterpreter.h +++ b/rpcs3/Emu/Cell/SPUInterpreter.h @@ -1316,10 +1316,7 @@ private: void FSCRRD(u32 rt) { - CPU.GPR[rt]._u32[3] = CPU.FPSCR._u32[3]; - CPU.GPR[rt]._u32[2] = CPU.FPSCR._u32[2]; - CPU.GPR[rt]._u32[1] = CPU.FPSCR._u32[1]; - CPU.GPR[rt]._u32[0] = CPU.FPSCR._u32[0]; + CPU.FPSCR.Read(CPU.GPR[rt]); } void FESD(u32 rt, u32 ra) { @@ -1373,10 +1370,7 @@ private: } void FSCRWR(u32 rt, u32 ra) { - CPU.FPSCR._u32[3] = CPU.GPR[ra]._u32[3] & 0x00000F07; - CPU.FPSCR._u32[2] = CPU.GPR[ra]._u32[2] & 0x00003F07; - CPU.FPSCR._u32[1] = CPU.GPR[ra]._u32[1] & 0x00003F07; - CPU.FPSCR._u32[0] = CPU.GPR[ra]._u32[0] & 0x00000F07; + CPU.FPSCR.Write(CPU.GPR[ra]); } void DFTSV(u32 rt, u32 ra, s32 i7) { diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index e944fcd60d..c4cababc01 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -1003,7 +1003,14 @@ void SPUThread::StopAndSignal(u32 code) case 0x003: { - GPR[3]._u64[1] = m_code3_func(*this); + auto iter = m_addr_to_hle_function_map.find(PC); + assert(iter != m_addr_to_hle_function_map.end()); + + auto return_to_caller = iter->second(*this); + if (return_to_caller) + { + SetBranch(GPR[0]._u32[3] & 0x3fffc); + } break; } diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h index bab5f1aa2d..9c0baa5335 100644 --- a/rpcs3/Emu/Cell/SPUThread.h +++ b/rpcs3/Emu/Cell/SPUThread.h @@ -248,6 +248,24 @@ public: { _u32[1+slice] |= exceptions; } + + // Write the FPSCR + void Write(u128 & r) + { + _u32[3] = r._u32[3] & 0x00000F07; + _u32[2] = r._u32[2] & 0x00003F07; + _u32[1] = r._u32[1] & 0x00003F07; + _u32[0] = r._u32[0] & 0x00000F07; + } + + // Read the FPSCR + void Read(u128 & r) + { + r._u32[3] = _u32[3]; + r._u32[2] = _u32[2]; + r._u32[1] = _u32[1]; + r._u32[0] = _u32[0]; + } }; union SPU_SNRConfig_hdr @@ -287,6 +305,8 @@ public: u32 m_event_mask; u32 m_events; + std::unordered_map> m_addr_to_hle_function_map; + struct IntrTag { u32 enabled; // 1 == true @@ -506,8 +526,35 @@ public: void WriteLS64 (const u32 lsa, const u64& data) const { vm::write64 (lsa + m_offset, data); } void WriteLS128(const u32 lsa, const u128& data) const { vm::write128(lsa + m_offset, data); } + void RegisterHleFunction(u32 addr, std::function function) + { + m_addr_to_hle_function_map[addr] = function; + WriteLS32(addr, 0x00000003); // STOP 3 + } + + void UnregisterHleFunction(u32 addr) + { + WriteLS32(addr, 0x00200000); // NOP + m_addr_to_hle_function_map.erase(addr); + } + + void UnregisterHleFunctions(u32 start_addr, u32 end_addr) + { + for (auto iter = m_addr_to_hle_function_map.begin(); iter != m_addr_to_hle_function_map.end();) + { + if (iter->first >= start_addr && iter->first <= end_addr) + { + WriteLS32(iter->first, 0x00200000); // NOP + m_addr_to_hle_function_map.erase(iter++); + } + else + { + iter++; + } + } + } + std::function m_custom_task; - std::function m_code3_func; public: SPUThread(CPUThreadType type = CPU_THREAD_SPU); @@ -606,7 +653,7 @@ public: for (auto &arg : values) { u32 arg_size = align(u32(arg.size() + 1), stack_align); - u32 arg_addr = Memory.MainMem.AllocAlign(arg_size, stack_align); + u32 arg_addr = (u32)Memory.MainMem.AllocAlign(arg_size, stack_align); std::strcpy(vm::get_ptr(arg_addr), arg.c_str()); diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp index f6f07dcfc6..18eaca3722 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp @@ -26,13 +26,17 @@ extern u32 libsre; extern u32 libsre_rtoc; #endif +bool spursKernelEntry(SPUThread & spu); +s64 cellSpursLookUpTasksetAddress(vm::ptr spurs, vm::ptr taskset, u32 id); +s64 _cellSpursSendSignal(vm::ptr taskset, u32 taskID); + s64 spursCreateLv2EventQueue(vm::ptr spurs, u32& queue_id, vm::ptr port, s32 size, u64 name_u64) { #ifdef PRX_DEBUG_XXX vm::var> queue; - s32 res = cb_call, vm::ptr, vm::ptr, s32, u32>(GetCurrentPPUThread(), libsre + 0xB14C, libsre_rtoc, + s32 res = cb_call, vm::ptr>, vm::ptr, s32, u32>(GetCurrentPPUThread(), libsre + 0xB14C, libsre_rtoc, spurs, queue, port, size, vm::read32(libsre_rtoc - 0x7E2C)); - queue_id = queue; + queue_id = queue.value(); return res; #endif @@ -42,7 +46,7 @@ s64 spursCreateLv2EventQueue(vm::ptr spurs, u32& queue_id, vm::ptrm.xCC = 0; spurs->m.xCD = 0; - spurs->m.xCE = 0; + spurs->m.sysSrvMsgUpdateTrace = 0; for (u32 i = 0; i < 8; i++) { - spurs->m.xC0[i] = -1; + spurs->m.sysSrvWorkload[i] = -1; } // default or system workload: #ifdef PRX_DEBUG - spurs->m.wklSysG.pm.set(be_t::make(vm::read32(libsre_rtoc - 0x7EA4))); - spurs->m.wklSysG.size = 0x2200; + spurs->m.wklInfoSysSrv.addr.set(be_t::make(vm::read32(libsre_rtoc - 0x7EA4))); + spurs->m.wklInfoSysSrv.size = 0x2200; #else - spurs->m.wklSysG.pm.set(be_t::make(0x100)); // wrong 64-bit address + spurs->m.wklInfoSysSrv.addr.set(be_t::make(SPURS_IMG_ADDR_SYS_SRV_WORKLOAD)); #endif - spurs->m.wklSysG.data = 0; - spurs->m.wklSysG.copy.write_relaxed(0xff); + spurs->m.wklInfoSysSrv.arg = 0; + spurs->m.wklInfoSysSrv.uniqueId.write_relaxed(0xff); u32 sem; for (u32 i = 0; i < 0x10; i++) { @@ -151,7 +155,8 @@ s64 spursInit( assert(!"spu_image_import() failed"); } #else - spurs->m.spuImg.addr = Memory.Alloc(0x40000, 4096); + spurs->m.spuImg.addr = (u32)Memory.Alloc(0x40000, 4096); + spurs->m.spuImg.entry_point = isSecond ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR; #endif s32 tgt = SYS_SPU_THREAD_GROUP_TYPE_NORMAL; @@ -175,322 +180,11 @@ s64 spursInit( name += "CellSpursKernel0"; for (s32 num = 0; num < nSpus; num++, name[name.size() - 1]++) { - spurs->m.spus[num] = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, 0, 0, 0, 0, [spurs, num, isSecond](SPUThread& SPU) - { -#ifdef PRX_DEBUG_XXX - SPU.GPR[3]._u32[3] = num; - SPU.GPR[4]._u64[1] = spurs.addr(); - return SPU.FastCall(SPU.PC); + auto spu = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, num, spurs.addr(), 0, 0); +#ifndef PRX_DEBUG_XXX + spu->RegisterHleFunction(spurs->m.spuImg.entry_point, spursKernelEntry); #endif - - // code replacement: - { - const u32 addr = /*SPU.ReadLS32(0x1e0) +*/ 8; //SPU.ReadLS32(0x1e4); - SPU.WriteLS32(addr + 0, 3); // hack for cellSpursModulePollStatus - SPU.WriteLS32(addr + 4, 0x35000000); // bi $0 - SPU.WriteLS32(0x1e4, addr); - - SPU.WriteLS32(SPU.ReadLS32(0x1e0), 2); // hack for cellSpursModuleExit - } - - if (!isSecond) SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // first kernel - { - LV2_LOCK(0); // TODO: lock-free implementation if possible - - const u32 arg1 = SPU.GPR[3]._u32[3]; - u32 var0 = SPU.ReadLS32(0x1d8); - u32 var1 = SPU.ReadLS32(0x1dc); - u128 wklA = vm::read128(spurs.addr() + 0x20); - u128 wklB = vm::read128(spurs.addr() + 0x30); - u128 savedA = SPU.ReadLS128(0x180); - u128 savedB = SPU.ReadLS128(0x190); - u128 vAA = u128::sub8(wklA, savedA); - u128 vBB = u128::sub8(wklB, savedB); - u128 vM1 = {}; if (var1 <= 15) vM1.u8r[var1] = 0xff; - u128 vAABB = (arg1 == 0) ? vAA : u128::add8(vAA, u128::andnot(vM1, vBB)); - - u32 vNUM = 0x20; - u64 vRES = 0x20ull << 32; - u128 vSET = {}; - - if (spurs->m.x72.read_relaxed() & (1 << num)) - { - SPU.WriteLS8(0x1eb, 0); // var4 - if (arg1 == 0 || var1 == 0x20) - { - spurs->m.x72._and_not(1 << num); - } - } - else - { - u128 wklReadyCount0 = vm::read128(spurs.addr() + 0x0); - u128 wklReadyCount1 = vm::read128(spurs.addr() + 0x10); - u128 savedC = SPU.ReadLS128(0x1A0); - u128 savedD = SPU.ReadLS128(0x1B0); - u128 vRC = u128::add8(u128::minu8(wklReadyCount0, u128::from8p(8)), u128::minu8(wklReadyCount1, u128::from8p(8))); - u32 wklFlag = spurs->m.wklFlag.flag.read_relaxed(); - u32 flagRecv = spurs->m.flagRecv.read_relaxed(); - u128 vFM = u128::fromV(g_imm_table.fsmb_table[(wklFlag == 0) && (flagRecv < 16) ? 0x8000 >> flagRecv : 0]); - u128 wklSet1 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSet1.read_relaxed()]); - u128 vFMS1 = vFM | wklSet1; - u128 vFMV1 = u128::fromV(g_imm_table.fsmb_table[(var1 < 16) ? 0x8000 >> var1 : 0]); - u32 var5 = SPU.ReadLS32(0x1ec); - u128 wklMinCnt = vm::read128(spurs.addr() + 0x40); - u128 wklMaxCnt = vm::read128(spurs.addr() + 0x50); - u128 vCC = u128::andnot(vFMS1, u128::eq8(wklReadyCount0, {}) | u128::leu8(vRC, vAABB)) | - u128::leu8(wklMaxCnt, vAABB) | - u128::eq8(savedC, {}) | - u128::fromV(g_imm_table.fsmb_table[(~var5) >> 16]); - u128 vCCH1 = u128::andnot(vCC, - u128::from8p(0x80) & (vFMS1 | u128::gtu8(wklReadyCount0, vAABB)) | - u128::from8p(0x7f) & savedC); - u128 vCCL1 = u128::andnot(vCC, - u128::from8p(0x80) & vFMV1 | - u128::from8p(0x40) & u128::gtu8(vAABB, {}) & u128::gtu8(wklMinCnt, vAABB) | - u128::from8p(0x3c) & u128::fromV(_mm_slli_epi32(u128::sub8(u128::from8p(8), vAABB).vi, 2)) | - u128::from8p(0x02) & u128::eq8(savedD, u128::from8p((u8)var0)) | - u128::from8p(0x01)); - u128 vSTAT = - u128::from8p(0x01) & u128::gtu8(wklReadyCount0, vAABB) | - u128::from8p(0x02) & wklSet1 | - u128::from8p(0x04) & vFM; - - for (s32 i = 0, max = -1; i < 0x10; i++) - { - const s32 value = ((s32)vCCH1.u8r[i] << 8) | ((s32)vCCL1.u8r[i]); - if (value > max && (vCC.u8r[i] & 1) == 0) - { - vNUM = i; - max = value; - } - } - - if (vNUM < 0x10) - { - vRES = ((u64)vNUM << 32) | vSTAT.u8r[vNUM]; - vSET.u8r[vNUM] = 0x01; - } - - SPU.WriteLS8(0x1eb, vNUM == 0x20); - - if (!arg1 || var1 == vNUM) - { - spurs->m.wklSet1._and_not(be_t::make((u16)(vNUM < 16 ? 0x8000 >> vNUM : 0))); - if (vNUM == flagRecv && wklFlag == 0) - { - spurs->m.wklFlag.flag.write_relaxed(be_t::make(-1)); - } - } - } - - if (arg1 == 0) - { - vm::write128(spurs.addr() + 0x20, u128::add8(vAA, vSET)); // update wklA - - SPU.WriteLS128(0x180, vSET); // update savedA - SPU.WriteLS32(0x1dc, vNUM); // update var1 - } - - if (arg1 == 1 && vNUM != var1) - { - vm::write128(spurs.addr() + 0x30, u128::add8(vBB, vSET)); // update wklB - - SPU.WriteLS128(0x190, vSET); // update savedB - } - else - { - vm::write128(spurs.addr() + 0x30, vBB); // update wklB - - SPU.WriteLS128(0x190, {}); // update savedB - } - - return vRES; - }; - else SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // second kernel - { - LV2_LOCK(0); // TODO: lock-free implementation if possible - - const u32 arg1 = SPU.GPR[3]._u32[3]; - u32 var0 = SPU.ReadLS32(0x1d8); - u32 var1 = SPU.ReadLS32(0x1dc); - u128 wklA = vm::read128(spurs.addr() + 0x20); - u128 wklB = vm::read128(spurs.addr() + 0x30); - u128 savedA = SPU.ReadLS128(0x180); - u128 savedB = SPU.ReadLS128(0x190); - u128 vAA = u128::sub8(wklA, savedA); - u128 vBB = u128::sub8(wklB, savedB); - u128 vM1 = {}; if (var1 <= 31) vM1.u8r[var1 & 0xf] = (var1 <= 15) ? 0xf : 0xf0; - u128 vAABB = (arg1 == 0) ? vAA : u128::add8(vAA, u128::andnot(vM1, vBB)); - - u32 vNUM = 0x20; - u64 vRES = 0x20ull << 32; - u128 vSET = {}; - - if (spurs->m.x72.read_relaxed() & (1 << num)) - { - SPU.WriteLS8(0x1eb, 0); // var4 - if (arg1 == 0 || var1 == 0x20) - { - spurs->m.x72._and_not(1 << num); - } - } - else - { - u128 wklReadyCount0 = vm::read128(spurs.addr() + 0x0); - u128 wklReadyCount1 = vm::read128(spurs.addr() + 0x10); - u128 savedC = SPU.ReadLS128(0x1A0); - u128 wklMaxCnt = vm::read128(spurs.addr() + 0x50); - u32 wklFlag = spurs->m.wklFlag.flag.read_relaxed(); - u32 flagRecv = spurs->m.flagRecv.read_relaxed(); - u128 wklSet1 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSet1.read_relaxed()]); - u128 wklSet2 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSet2.read_relaxed()]); - u128 vABL = vAABB & u128::from8p(0x0f); - u128 vABH = u128::fromV(_mm_srli_epi32((vAABB & u128::from8p(0xf0)).vi, 4)); - u32 var5 = SPU.ReadLS32(0x1ec); - u128 v5L = u128::fromV(g_imm_table.fsmb_table[var5 >> 16]); - u128 v5H = u128::fromV(g_imm_table.fsmb_table[(u16)var5]); - u128 vFML = u128::fromV(g_imm_table.fsmb_table[(wklFlag == 0) && (flagRecv < 16) ? 0x8000 >> flagRecv : 0]); - u128 vFMH = u128::fromV(g_imm_table.fsmb_table[(u16)((wklFlag == 0) && (flagRecv < 32) ? 0x80000000 >> flagRecv : 0)]); - u128 vCL = u128::fromV(_mm_slli_epi32((savedC & u128::from8p(0x0f)).vi, 4)); - u128 vCH = savedC & u128::from8p(0xf0); - u128 vABRL = u128::gtu8(wklReadyCount0, vABL); - u128 vABRH = u128::gtu8(wklReadyCount1, vABH); - u128 vCCL = v5L & u128::gtu8(vCL, {}) & u128::gtu8(wklMaxCnt & u128::from8p(0x0f), vABL) & (wklSet1 | vFML | vABRL); - u128 vCCH = v5H & u128::gtu8(vCH, {}) & u128::gtu8(u128::fromV(_mm_srli_epi32((wklMaxCnt & u128::from8p(0xf0)).vi, 4)), vABH) & (wklSet2 | vFMH | vABRH); - u128 v1H = {}; if (var1 <= 31 && var1 > 15) v1H.u8r[var1 & 0xf] = 4; - u128 v1L = {}; if (var1 <= 15) v1L.u8r[var1] = 4; - u128 vCH1 = (v1H | vCH & u128::from8p(0xFB)) & vCCH; - u128 vCL1 = (v1L | vCL & u128::from8p(0xFB)) & vCCL; - u128 vSTATL = vABRL & u128::from8p(1) | wklSet1 & u128::from8p(2) | vFML & u128::from8p(4); - u128 vSTATH = vABRH & u128::from8p(1) | wklSet2 & u128::from8p(2) | vFMH & u128::from8p(4); - - s32 max = -1; - for (u32 i = 0; i < 0x10; i++) - { - const s32 value = vCL1.u8r[i]; - if (value > max && (vCCL.u8r[i] & 1)) - { - vNUM = i; - max = value; - } - } - for (u32 i = 16; i < 0x20; i++) - { - const s32 value = vCH1.u8r[i]; - if (value > max && (vCCH.u8r[i] & 1)) - { - vNUM = i; - max = value; - } - } - - if (vNUM < 0x10) - { - vRES = ((u64)vNUM << 32) | vSTATL.u8r[vNUM]; - vSET.u8r[vNUM] = 0x01; - } - else if (vNUM < 0x20) - { - vRES = ((u64)vNUM << 32) | vSTATH.u8r[vNUM & 0xf]; - vSET.u8r[vNUM] = 0x10; - } - - SPU.WriteLS8(0x1eb, vNUM == 0x20); - - if (!arg1 || var1 == vNUM) - { - spurs->m.wklSet1._and_not(be_t::make((u16)(vNUM < 16 ? 0x8000 >> vNUM : 0))); - spurs->m.wklSet2._and_not(be_t::make((u16)(0x80000000 >> vNUM))); - if (vNUM == flagRecv && wklFlag == 0) - { - spurs->m.wklFlag.flag.write_relaxed(be_t::make(-1)); - } - } - } - - if (arg1 == 0) - { - vm::write128(spurs.addr() + 0x20, u128::add8(vAA, vSET)); // update wklA - - SPU.WriteLS128(0x180, vSET); // update savedA - SPU.WriteLS32(0x1dc, vNUM); // update var1 - } - - if (arg1 == 1 && vNUM != var1) - { - vm::write128(spurs.addr() + 0x30, u128::add8(vBB, vSET)); // update wklB - - SPU.WriteLS128(0x190, vSET); // update savedB - } - else - { - vm::write128(spurs.addr() + 0x30, vBB); // update wklB - - SPU.WriteLS128(0x190, {}); // update savedB - } - - return vRES; - }; - //SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // test - //{ - // LV2_LOCK(0); - // SPU.FastCall(0x290); - // u64 vRES = SPU.GPR[3]._u64[1]; - // return vRES; - //}; - - SPU.WriteLS128(0x1c0, u128::from32r(0, spurs.addr(), num, 0x1f)); - - u32 wid = 0x20; - u32 stat = 0; - while (true) - { - if (Emu.IsStopped()) - { - cellSpurs->Warning("Spurs Kernel aborted"); - return; - } - - // get current workload info: - auto& wkl = wid <= 15 ? spurs->m.wklG1[wid] : (wid <= 31 && isSecond ? spurs->m.wklG2[wid & 0xf] : spurs->m.wklSysG); - - if (SPU.ReadLS64(0x1d0) != wkl.pm.addr()) - { - // load executable code: - memcpy(vm::get_ptr(SPU.ls_offset + 0xa00), wkl.pm.get_ptr(), wkl.size); - SPU.WriteLS64(0x1d0, wkl.pm.addr()); - SPU.WriteLS32(0x1d8, wkl.copy.read_relaxed()); - } - - if (!isSecond) SPU.WriteLS16(0x1e8, 0); - - // run workload: - SPU.GPR[1]._u32[3] = 0x3FFB0; - SPU.GPR[3]._u32[3] = 0x100; - SPU.GPR[4]._u64[1] = wkl.data; - SPU.GPR[5]._u32[3] = stat; - SPU.FastCall(0xa00); - - // check status: - auto status = SPU.SPU.Status.GetValue(); - if (status == SPU_STATUS_STOPPED_BY_STOP) - { - return; - } - else - { - assert(status == SPU_STATUS_RUNNING); - } - - // get workload id: - SPU.GPR[3].clear(); - assert(SPU.m_code3_func); - u64 res = SPU.m_code3_func(SPU); - stat = (u32)(res); - wid = (u32)(res >> 32); - } - - })->GetId(); + spurs->m.spus[num] = spu->GetId(); } if (flags & SAF_SPU_PRINTF_ENABLED) @@ -512,8 +206,8 @@ s64 spursInit( assert(!"lwcond_create() failed"); } - spurs->m.flags1 = (flags & SAF_EXIT_IF_NO_WORK ? SF1_EXIT_IF_NO_WORK : 0) | (isSecond ? SF1_IS_SECOND : 0); - spurs->m.flagRecv.write_relaxed(0xff); + spurs->m.flags1 = (flags & SAF_EXIT_IF_NO_WORK ? SF1_EXIT_IF_NO_WORK : 0) | (isSecond ? SF1_32_WORKLOADS : 0); + spurs->m.wklFlagReceiver.write_relaxed(0xff); spurs->m.wklFlag.flag.write_relaxed(be_t::make(-1)); spurs->_u8[0xD64] = 0; spurs->_u8[0xD65] = 0; @@ -521,7 +215,7 @@ s64 spursInit( spurs->m.ppuPriority = ppuPriority; u32 queue; - if (s32 res = spursCreateLv2EventQueue(spurs, queue, vm::ptr::make(spurs.addr() + 0xc9), 0x2a, *(u64*)"_spuPrv")) + if (s32 res = (s32)spursCreateLv2EventQueue(spurs, queue, vm::ptr::make(spurs.addr() + 0xc9), 0x2a, *(u64*)"_spuPrv")) { assert(!"spursCreateLv2EventQueue() failed"); } @@ -581,15 +275,15 @@ s64 spursInit( bool do_break = false; for (u32 i = 0; i < 16; i++) { - if (spurs->m.wklStat1[i].read_relaxed() == 2 && - spurs->m.wklG1[i].priority.data() != 0 && - spurs->m.wklMaxCnt[i].read_relaxed() & 0xf + if (spurs->m.wklState1[i].read_relaxed() == 2 && + *((u64 *)spurs->m.wklInfo1[i].priority) != 0 && + spurs->m.wklMaxContention[i].read_relaxed() & 0xf ) { - if (spurs->m.wklReadyCount[i].read_relaxed() || - spurs->m.wklSet1.read_relaxed() & (0x8000u >> i) || + if (spurs->m.wklReadyCount1[i].read_relaxed() || + spurs->m.wklSignal1.read_relaxed() & (0x8000u >> i) || (spurs->m.wklFlag.flag.read_relaxed() == 0 && - spurs->m.flagRecv.read_relaxed() == (u8)i + spurs->m.wklFlagReceiver.read_relaxed() == (u8)i )) { do_break = true; @@ -597,17 +291,17 @@ s64 spursInit( } } } - if (spurs->m.flags1 & SF1_IS_SECOND) for (u32 i = 0; i < 16; i++) + if (spurs->m.flags1 & SF1_32_WORKLOADS) for (u32 i = 0; i < 16; i++) { - if (spurs->m.wklStat2[i].read_relaxed() == 2 && - spurs->m.wklG2[i].priority.data() != 0 && - spurs->m.wklMaxCnt[i].read_relaxed() & 0xf0 + if (spurs->m.wklState2[i].read_relaxed() == 2 && + *((u64 *)spurs->m.wklInfo2[i].priority) != 0 && + spurs->m.wklMaxContention[i].read_relaxed() & 0xf0 ) { - if (spurs->m.wklReadyCount[i + 0x10].read_relaxed() || - spurs->m.wklSet2.read_relaxed() & (0x8000u >> i) || + if (spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() || + spurs->m.wklSignal2.read_relaxed() & (0x8000u >> i) || (spurs->m.wklFlag.flag.read_relaxed() == 0 && - spurs->m.flagRecv.read_relaxed() == (u8)i + 0x10 + spurs->m.wklFlagReceiver.read_relaxed() == (u8)i + 0x10 )) { do_break = true; @@ -687,7 +381,7 @@ s64 spursInit( } } - spurs->m.unk22 = 0; + spurs->m.traceBuffer.set(0); // can also use cellLibprof if available (omitted) // some unknown subroutine @@ -1349,7 +1043,7 @@ s32 spursAddWorkload( } u32 wnum; - const u32 wmax = spurs->m.flags1 & SF1_IS_SECOND ? 0x20u : 0x10u; // TODO: check if can be changed + const u32 wmax = spurs->m.flags1 & SF1_32_WORKLOADS ? 0x20u : 0x10u; // TODO: check if can be changed spurs->m.wklMskA.atomic_op([spurs, wmax, &wnum](be_t& value) { wnum = cntlz32(~(u32)value); // found empty position @@ -1368,15 +1062,18 @@ s32 spursAddWorkload( u32 index = wnum & 0xf; if (wnum <= 15) { - assert((spurs->m.wklA[wnum] & 0xf) == 0); - assert((spurs->m.wklB[wnum] & 0xf) == 0); - spurs->m.wklStat1[wnum].write_relaxed(1); - spurs->m.wklD1[wnum] = 0; - spurs->m.wklE1[wnum] = 0; - spurs->m.wklG1[wnum].pm = pm; - spurs->m.wklG1[wnum].data = data; - spurs->m.wklG1[wnum].size = size; - spurs->m.wklG1[wnum].priority = *(be_t*)priorityTable; + assert((spurs->m.wklCurrentContention[wnum] & 0xf) == 0); + assert((spurs->m.wklPendingContention[wnum] & 0xf) == 0); + spurs->m.wklState1[wnum].write_relaxed(1); + spurs->m.wklStatus1[wnum] = 0; + spurs->m.wklEvent1[wnum] = 0; + spurs->m.wklInfo1[wnum].addr = pm; + spurs->m.wklInfo1[wnum].arg = data; + spurs->m.wklInfo1[wnum].size = size; + for (u32 i = 0; i < 8; i++) + { + spurs->m.wklInfo1[wnum].priority[i] = priorityTable[i]; + } spurs->m.wklH1[wnum].nameClass = nameClass; spurs->m.wklH1[wnum].nameInstance = nameInstance; memset(spurs->m.wklF1[wnum].unk0, 0, 0x20); // clear struct preserving semaphore id @@ -1385,25 +1082,29 @@ s32 spursAddWorkload( { spurs->m.wklF1[wnum].hook = hook; spurs->m.wklF1[wnum].hookArg = hookArg; - spurs->m.wklE1[wnum] |= 2; + spurs->m.wklEvent1[wnum] |= 2; } - if ((spurs->m.flags1 & SF1_IS_SECOND) == 0) + if ((spurs->m.flags1 & SF1_32_WORKLOADS) == 0) { - spurs->m.wklReadyCount[wnum + 16].write_relaxed(0); - spurs->m.wklMinCnt[wnum] = minContention > 8 ? 8 : minContention; + spurs->m.wklIdleSpuCountOrReadyCount2[wnum].write_relaxed(0); + spurs->m.wklMinContention[wnum] = minContention > 8 ? 8 : minContention; } + spurs->m.wklReadyCount1[wnum].write_relaxed(0); } else { - assert((spurs->m.wklA[index] & 0xf0) == 0); - assert((spurs->m.wklB[index] & 0xf0) == 0); - spurs->m.wklStat2[index].write_relaxed(1); - spurs->m.wklD2[index] = 0; - spurs->m.wklE2[index] = 0; - spurs->m.wklG2[index].pm = pm; - spurs->m.wklG2[index].data = data; - spurs->m.wklG2[index].size = size; - spurs->m.wklG2[index].priority = *(be_t*)priorityTable; + assert((spurs->m.wklCurrentContention[index] & 0xf0) == 0); + assert((spurs->m.wklPendingContention[index] & 0xf0) == 0); + spurs->m.wklState2[index].write_relaxed(1); + spurs->m.wklStatus2[index] = 0; + spurs->m.wklEvent2[index] = 0; + spurs->m.wklInfo2[index].addr = pm; + spurs->m.wklInfo2[index].arg = data; + spurs->m.wklInfo2[index].size = size; + for (u32 i = 0; i < 8; i++) + { + spurs->m.wklInfo2[index].priority[i] = priorityTable[i]; + } spurs->m.wklH2[index].nameClass = nameClass; spurs->m.wklH2[index].nameInstance = nameInstance; memset(spurs->m.wklF2[index].unk0, 0, 0x20); // clear struct preserving semaphore id @@ -1412,34 +1113,34 @@ s32 spursAddWorkload( { spurs->m.wklF2[index].hook = hook; spurs->m.wklF2[index].hookArg = hookArg; - spurs->m.wklE2[index] |= 2; + spurs->m.wklEvent2[index] |= 2; } + spurs->m.wklIdleSpuCountOrReadyCount2[wnum].write_relaxed(0); } - spurs->m.wklReadyCount[wnum].write_relaxed(0); if (wnum <= 15) { - spurs->m.wklMaxCnt[wnum].atomic_op([maxContention](u8& v) + spurs->m.wklMaxContention[wnum].atomic_op([maxContention](u8& v) { v &= ~0xf; v |= (maxContention > 8 ? 8 : maxContention); }); - spurs->m.wklSet1._and_not({ be_t::make(0x8000 >> index) }); // clear bit in wklFlag1 + spurs->m.wklSignal1._and_not({ be_t::make(0x8000 >> index) }); // clear bit in wklFlag1 } else { - spurs->m.wklMaxCnt[index].atomic_op([maxContention](u8& v) + spurs->m.wklMaxContention[index].atomic_op([maxContention](u8& v) { v &= ~0xf0; v |= (maxContention > 8 ? 8 : maxContention) << 4; }); - spurs->m.wklSet2._and_not({ be_t::make(0x8000 >> index) }); // clear bit in wklFlag2 + spurs->m.wklSignal2._and_not({ be_t::make(0x8000 >> index) }); // clear bit in wklFlag2 } - spurs->m.flagRecv.compare_and_swap(wnum, 0xff); + spurs->m.wklFlagReceiver.compare_and_swap(wnum, 0xff); u32 res_wkl; - CellSpurs::_sub_str3& wkl = wnum <= 15 ? spurs->m.wklG1[wnum] : spurs->m.wklG2[wnum & 0xf]; + CellSpurs::WorkloadInfo& wkl = wnum <= 15 ? spurs->m.wklInfo1[wnum] : spurs->m.wklInfo2[wnum & 0xf]; spurs->m.wklMskB.atomic_op_sync([spurs, &wkl, wnum, &res_wkl](be_t& v) { const u32 mask = v & ~(0x80000000u >> wnum); @@ -1449,29 +1150,29 @@ s32 spursAddWorkload( { if (mask & m) { - CellSpurs::_sub_str3& current = i <= 15 ? spurs->m.wklG1[i] : spurs->m.wklG2[i & 0xf]; - if (current.pm.addr() == wkl.pm.addr()) + CellSpurs::WorkloadInfo& current = i <= 15 ? spurs->m.wklInfo1[i] : spurs->m.wklInfo2[i & 0xf]; + if (current.addr.addr() == wkl.addr.addr()) { // if a workload with identical policy module found - res_wkl = current.copy.read_relaxed(); + res_wkl = current.uniqueId.read_relaxed(); break; } else { - k |= 0x80000000 >> current.copy.read_relaxed(); + k |= 0x80000000 >> current.uniqueId.read_relaxed(); res_wkl = cntlz32(~k); } } } - wkl.copy.exchange((u8)res_wkl); + wkl.uniqueId.exchange((u8)res_wkl); v = mask | (0x80000000u >> wnum); }); assert(res_wkl <= 31); - spurs->wklStat(wnum).exchange(2); - spurs->m.xBD.exchange(0xff); - spurs->m.x72.exchange(0xff); + spurs->wklState(wnum).exchange(2); + spurs->m.sysSrvMsgUpdateWorkload.exchange(0xff); + spurs->m.sysSrvMessage.exchange(0xff); return CELL_OK; } @@ -1598,7 +1299,7 @@ s64 cellSpursWorkloadAttributeSetShutdownCompletionEventHook(vm::ptr spurs, vm::ptr wid, vm::ptr attr) +s64 cellSpursAddWorkloadWithAttribute(vm::ptr spurs, const vm::ptr wid, vm::ptr attr) { cellSpurs->Warning("%s(spurs_addr=0x%x, wid_addr=0x%x, attr_addr=0x%x)", __FUNCTION__, spurs.addr(), wid.addr(), attr.addr()); #ifdef PRX_DEBUG_XXX @@ -1681,7 +1382,7 @@ s64 _cellSpursWorkloadFlagReceiver(vm::ptr spurs, u32 wid, u32 is_set { return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN; } - if (wid >= (spurs->m.flags1 & SF1_IS_SECOND ? 0x20u : 0x10u)) + if (wid >= (spurs->m.flags1 & SF1_32_WORKLOADS ? 0x20u : 0x10u)) { return CELL_SPURS_POLICY_MODULE_ERROR_INVAL; } @@ -1697,14 +1398,14 @@ s64 _cellSpursWorkloadFlagReceiver(vm::ptr spurs, u32 wid, u32 is_set { if (is_set) { - if (spurs->m.flagRecv.read_relaxed() != 0xff) + if (spurs->m.wklFlagReceiver.read_relaxed() != 0xff) { return CELL_SPURS_POLICY_MODULE_ERROR_BUSY; } } else { - if (spurs->m.flagRecv.read_relaxed() != wid) + if (spurs->m.wklFlagReceiver.read_relaxed() != wid) { return CELL_SPURS_POLICY_MODULE_ERROR_PERM; } @@ -1716,7 +1417,7 @@ s64 _cellSpursWorkloadFlagReceiver(vm::ptr spurs, u32 wid, u32 is_set return res; } - spurs->m.flagRecv.atomic_op([wid, is_set](u8& FR) + spurs->m.wklFlagReceiver.atomic_op([wid, is_set](u8& FR) { if (is_set) { @@ -1756,24 +1457,107 @@ s64 cellSpursGetWorkloadFlag(vm::ptr spurs, vm::ptr spurs, u32 workloadId) { + cellSpurs->Warning("%s(spurs=0x%x, workloadId=0x%x)", __FUNCTION__, spurs.addr(), workloadId); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0xA658, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (spurs.addr() == 0) + { + return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align) + { + return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN; + } + + if (workloadId >= CELL_SPURS_MAX_WORKLOAD2 || (workloadId >= CELL_SPURS_MAX_WORKLOAD && (spurs->m.flags1 & SF1_32_WORKLOADS) == 0)) + { + return CELL_SPURS_POLICY_MODULE_ERROR_INVAL; + } + + if ((spurs->m.wklMskA.read_relaxed() & (0x80000000u >> workloadId)) == 0) + { + return CELL_SPURS_POLICY_MODULE_ERROR_SRCH; + } + + if (spurs->m.exception) + { + return CELL_SPURS_POLICY_MODULE_ERROR_STAT; + } + + u8 state; + if (workloadId >= CELL_SPURS_MAX_WORKLOAD) + { + state = spurs->m.wklState2[workloadId & 0x0F].read_relaxed(); + } + else + { + state = spurs->m.wklState1[workloadId].read_relaxed(); + } + + if (state != SPURS_WKL_STATE_RUNNABLE) + { + return CELL_SPURS_POLICY_MODULE_ERROR_STAT; + } + + if (workloadId >= CELL_SPURS_MAX_WORKLOAD) + { + spurs->m.wklSignal2 |= be_t::make(0x8000 >> (workloadId & 0x0F)); + } + else + { + spurs->m.wklSignal1 |= be_t::make(0x8000 >> workloadId); + } + return CELL_OK; #endif } -s64 cellSpursGetWorkloadData() +s64 cellSpursGetWorkloadData(vm::ptr spurs, vm::ptr data, u32 workloadId) { + cellSpurs->Warning("%s(spurs_addr=0x%x, data=0x%x, workloadId=%d)", __FUNCTION__, spurs.addr(), data.addr(), workloadId); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0xA78C, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (spurs.addr() == 0 || data.addr() == 0) + { + return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align) + { + return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN; + } + + if (workloadId >= CELL_SPURS_MAX_WORKLOAD2 || (workloadId >= CELL_SPURS_MAX_WORKLOAD && (spurs->m.flags1 & SF1_32_WORKLOADS) == 0)) + { + return CELL_SPURS_POLICY_MODULE_ERROR_INVAL; + } + + if ((spurs->m.wklMskA.read_relaxed() & (0x80000000u >> workloadId)) == 0) + { + return CELL_SPURS_POLICY_MODULE_ERROR_SRCH; + } + + if (spurs->m.exception) + { + return CELL_SPURS_POLICY_MODULE_ERROR_STAT; + } + + if (workloadId >= CELL_SPURS_MAX_WORKLOAD) + { + *data = spurs->m.wklInfo2[workloadId & 0x0F].arg; + } + else + { + *data = spurs->m.wklInfo1[workloadId].arg; + } + return CELL_OK; #endif } @@ -1793,7 +1577,7 @@ s64 cellSpursReadyCountStore(vm::ptr spurs, u32 wid, u32 value) { return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN; } - if (wid >= (spurs->m.flags1 & SF1_IS_SECOND ? 0x20u : 0x10u) || value > 0xff) + if (wid >= (spurs->m.flags1 & SF1_32_WORKLOADS ? 0x20u : 0x10u) || value > 0xff) { return CELL_SPURS_POLICY_MODULE_ERROR_INVAL; } @@ -1801,12 +1585,19 @@ s64 cellSpursReadyCountStore(vm::ptr spurs, u32 wid, u32 value) { return CELL_SPURS_POLICY_MODULE_ERROR_SRCH; } - if (spurs->m.exception.data() || spurs->wklStat(wid).read_relaxed() != 2) + if (spurs->m.exception.data() || spurs->wklState(wid).read_relaxed() != 2) { return CELL_SPURS_POLICY_MODULE_ERROR_STAT; } - spurs->m.wklReadyCount[wid].exchange((u8)value); + if (wid < CELL_SPURS_MAX_WORKLOAD) + { + spurs->m.wklReadyCount1[wid].exchange((u8)value); + } + else + { + spurs->m.wklIdleSpuCountOrReadyCount2[wid].exchange((u8)value); + } return CELL_OK; } @@ -1900,111 +1691,581 @@ s64 cellSpursUnsetExceptionEventHandler() s64 _cellSpursEventFlagInitialize(vm::ptr spurs, vm::ptr taskset, vm::ptr eventFlag, u32 flagClearMode, u32 flagDirection) { -#ifdef PRX_DEBUG cellSpurs->Warning("_cellSpursEventFlagInitialize(spurs_addr=0x%x, taskset_addr=0x%x, eventFlag_addr=0x%x, flagClearMode=%d, flagDirection=%d)", spurs.addr(), taskset.addr(), eventFlag.addr(), flagClearMode, flagDirection); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x1564C, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (taskset.addr() == 0 && spurs.addr() == 0) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() == 0) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align || taskset.addr() % CellSpursTaskset::align || eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (taskset.addr() && taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD2) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + if (flagDirection > CELL_SPURS_EVENT_FLAG_LAST || flagClearMode > CELL_SPURS_EVENT_FLAG_CLEAR_LAST) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + memset(eventFlag.get_ptr(), 0, CellSpursEventFlag::size); + eventFlag->m.direction = flagDirection; + eventFlag->m.clearMode = flagClearMode; + eventFlag->m.spuPort = CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT; + + if (taskset.addr()) + { + eventFlag->m.addr = taskset.addr(); + } + else + { + eventFlag->m.isIwl = 1; + eventFlag->m.addr = spurs.addr(); + } + return CELL_OK; #endif } s64 cellSpursEventFlagAttachLv2EventQueue(vm::ptr eventFlag) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursEventFlagAttachLv2EventQueue(eventFlag_addr=0x%x)", eventFlag.addr()); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x157B8, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!eventFlag) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_AGAIN; + } + + if (eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_SPU2PPU && eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + return CELL_SPURS_TASK_ERROR_PERM; + } + + if (eventFlag->m.spuPort != CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT) + { + return CELL_SPURS_TASK_ERROR_STAT; + } + + vm::ptr spurs; + if (eventFlag->m.isIwl == 1) + { + spurs.set((u32)eventFlag->m.addr); + } + else + { + auto taskset = vm::ptr::make((u32)eventFlag->m.addr); + spurs.set((u32)taskset->m.spurs.addr()); + } + + u32 eventQueueId; + vm::var port; + auto rc = spursCreateLv2EventQueue(spurs, eventQueueId, port, 1, *((u64 *)"_spuEvF")); + if (rc != CELL_OK) + { + // Return rc if its an error code from SPURS otherwise convert the error code to a SPURS task error code + return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF)); + } + + if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + vm::var> eventPortId; + rc = sys_event_port_create(vm::ptr::make(eventPortId.addr()), SYS_EVENT_PORT_LOCAL, 0); + if (rc == CELL_OK) + { + rc = sys_event_port_connect_local(eventPortId.value(), eventQueueId); + if (rc == CELL_OK) + { + eventFlag->m.eventPortId = eventPortId; + goto success; + } + + sys_event_port_destroy(eventPortId.value()); + } + + // TODO: Implement the following + // if (spursDetachLv2EventQueue(spurs, port, 1) == CELL_OK) + // { + // sys_event_queue_destroy(eventQueueId, SYS_EVENT_QUEUE_DESTROY_FORCE); + // } + + // Return rc if its an error code from SPURS otherwise convert the error code to a SPURS task error code + return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF)); + } + +success: + eventFlag->m.eventQueueId = eventQueueId; + eventFlag->m.spuPort = port; return CELL_OK; #endif } s64 cellSpursEventFlagDetachLv2EventQueue(vm::ptr eventFlag) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursEventFlagDetachLv2EventQueue(eventFlag_addr=0x%x)", eventFlag.addr()); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x15998, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!eventFlag) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_AGAIN; + } + + if (eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_SPU2PPU && eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + return CELL_SPURS_TASK_ERROR_PERM; + } + + if (eventFlag->m.spuPort == CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT) + { + return CELL_SPURS_TASK_ERROR_STAT; + } + + if (eventFlag->m.ppuWaitMask || eventFlag->m.ppuPendingRecv) + { + return CELL_SPURS_TASK_ERROR_BUSY; + } + + auto port = eventFlag->m.spuPort; + eventFlag->m.spuPort = CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT; + + vm::ptr spurs; + if (eventFlag->m.isIwl == 1) + { + spurs.set((u32)eventFlag->m.addr); + } + else + { + auto taskset = vm::ptr::make((u32)eventFlag->m.addr); + spurs.set((u32)taskset->m.spurs.addr()); + } + + if(eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + sys_event_port_disconnect(eventFlag->m.eventPortId); + sys_event_port_destroy(eventFlag->m.eventPortId); + } + + s64 rc = CELL_OK; + // TODO: Implement the following + // auto rc = spursDetachLv2EventQueue(spurs, port, 1); + // if (rc == CELL_OK) + // { + // rc = sys_event_queue_destroy(eventFlag->m.eventQueueId, SYS_EVENT_QUEUE_DESTROY_FORCE); + // } + + if (rc != CELL_OK) + { + // Return rc if its an error code from SPURS otherwise convert the error code to a SPURS task error code + return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF)); + } + return CELL_OK; #endif } +s64 _cellSpursEventFlagWait(vm::ptr eventFlag, vm::ptr mask, u32 mode, u32 block) +{ + if (eventFlag.addr() == 0 || mask.addr() == 0) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (mode > CELL_SPURS_EVENT_FLAG_WAIT_MODE_LAST) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + if (eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_SPU2PPU && eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + return CELL_SPURS_TASK_ERROR_PERM; + } + + if (block && eventFlag->m.spuPort == CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT) + { + return CELL_SPURS_TASK_ERROR_STAT; + } + + if (eventFlag->m.ppuWaitMask || eventFlag->m.ppuPendingRecv) + { + return CELL_SPURS_TASK_ERROR_BUSY; + } + + u16 relevantEvents = eventFlag->m.events & *mask; + if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + // Make sure the wait mask and mode specified does not conflict with that of the already waiting tasks. + // Conflict scenarios: + // OR vs OR - A conflict never occurs + // OR vs AND - A conflict occurs if the masks for the two tasks overlap + // AND vs AND - A conflict occurs if the masks for the two tasks are not the same + + // Determine the set of all already waiting tasks whose wait mode/mask can possibly conflict with the specified wait mode/mask. + // This set is equal to 'set of all tasks waiting' - 'set of all tasks whose wait conditions have been met'. + // If the wait mode is OR, we prune the set of all tasks that are waiting in OR mode from the set since a conflict cannot occur + // with an already waiting task in OR mode. + u16 relevantWaitSlots = eventFlag->m.spuTaskUsedWaitSlots & ~eventFlag->m.spuTaskPendingRecv; + if (mode == CELL_SPURS_EVENT_FLAG_OR) + { + relevantWaitSlots &= eventFlag->m.spuTaskWaitMode; + } + + int i = CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS - 1; + while (relevantWaitSlots) + { + if (relevantWaitSlots & 0x0001) + { + if (eventFlag->m.spuTaskWaitMask[i] & *mask && eventFlag->m.spuTaskWaitMask[i] != *mask) + { + return CELL_SPURS_TASK_ERROR_AGAIN; + } + } + + relevantWaitSlots >>= 1; + i--; + } + } + + // There is no need to block if all bits required by the wait operation have already been set or + // if the wait mode is OR and atleast one of the bits required by the wait operation has been set. + bool recv; + if ((*mask & ~relevantEvents) == 0 || (mode == CELL_SPURS_EVENT_FLAG_OR && relevantEvents)) + { + // If the clear flag is AUTO then clear the bits comnsumed by this thread + if (eventFlag->m.clearMode == CELL_SPURS_EVENT_FLAG_CLEAR_AUTO) + { + eventFlag->m.events &= ~relevantEvents; + } + + recv = false; + } + else + { + // If we reach here it means that the conditions for this thread have not been met. + // If this is a try wait operation then do not block but return an error code. + if (block == 0) + { + return CELL_SPURS_TASK_ERROR_BUSY; + } + + eventFlag->m.ppuWaitSlotAndMode = 0; + if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + // Find an unsed wait slot + int i = 0; + u16 spuTaskUsedWaitSlots = eventFlag->m.spuTaskUsedWaitSlots; + while (spuTaskUsedWaitSlots & 0x0001) + { + spuTaskUsedWaitSlots >>= 1; + i++; + } + + if (i == CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS) + { + // Event flag has no empty wait slots + return CELL_SPURS_TASK_ERROR_BUSY; + } + + // Mark the found wait slot as used by this thread + eventFlag->m.ppuWaitSlotAndMode = (CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS - 1 - i) << 4; + } + + // Save the wait mask and mode for this thread + eventFlag->m.ppuWaitSlotAndMode |= mode; + eventFlag->m.ppuWaitMask = *mask; + recv = true; + } + + u16 receivedEventFlag; + if (recv) { + // Block till something happens + vm::var data; + auto rc = sys_event_queue_receive(eventFlag->m.eventQueueId, data, 0); + if (rc != CELL_OK) + { + assert(0); + } + + int i = 0; + if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + i = eventFlag->m.ppuWaitSlotAndMode >> 4; + } + + receivedEventFlag = eventFlag->m.pendingRecvTaskEvents[i]; + eventFlag->m.ppuPendingRecv = 0; + } + + *mask = receivedEventFlag; + return CELL_OK; +} + s64 cellSpursEventFlagWait(vm::ptr eventFlag, vm::ptr mask, u32 mode) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursEventFlagWait(eventFlag_addr=0x%x, mask_addr=0x%x, mode=%d)", eventFlag.addr(), mask.addr(), mode); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x15E68, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); - return CELL_OK; + return _cellSpursEventFlagWait(eventFlag, mask, mode, 1/*block*/); #endif } s64 cellSpursEventFlagClear(vm::ptr eventFlag, u16 bits) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursEventFlagClear(eventFlag_addr=0x%x, bits=0x%x)", eventFlag.addr(), bits); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x15E9C, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (eventFlag.addr() == 0) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + eventFlag->m.events &= ~bits; return CELL_OK; #endif } s64 cellSpursEventFlagSet(vm::ptr eventFlag, u16 bits) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursEventFlagSet(eventFlag_addr=0x%x, bits=0x%x)", eventFlag.addr(), bits); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x15F04, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (eventFlag.addr() == 0) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_PPU2SPU && eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_ANY2ANY) + { + return CELL_SPURS_TASK_ERROR_PERM; + } + + u16 ppuEventFlag = 0; + bool send = false; + int ppuWaitSlot = 0; + u16 eventsToClear = 0; + if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY && eventFlag->m.ppuWaitMask) + { + u16 ppuRelevantEvents = (eventFlag->m.events | bits) & eventFlag->m.ppuWaitMask; + + // Unblock the waiting PPU thread if either all the bits being waited by the thread have been set or + // if the wait mode of the thread is OR and atleast one bit the thread is waiting on has been set + if ((eventFlag->m.ppuWaitMask & ~ppuRelevantEvents) == 0 || + ((eventFlag->m.ppuWaitSlotAndMode & 0x0F) == CELL_SPURS_EVENT_FLAG_OR && ppuRelevantEvents != 0)) + { + eventFlag->m.ppuPendingRecv = 1; + eventFlag->m.ppuWaitMask = 0; + ppuEventFlag = ppuRelevantEvents; + eventsToClear = ppuRelevantEvents; + ppuWaitSlot = eventFlag->m.ppuWaitSlotAndMode >> 4; + send = true; + } + } + + int i = CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS - 1; + int j = 0; + u16 relevantWaitSlots = eventFlag->m.spuTaskUsedWaitSlots & ~eventFlag->m.spuTaskPendingRecv; + u16 spuTaskPendingRecv = 0; + u16 pendingRecvTaskEvents[16]; + while (relevantWaitSlots) + { + if (relevantWaitSlots & 0x0001) + { + u16 spuTaskRelevantEvents = (eventFlag->m.events | bits) & eventFlag->m.spuTaskWaitMask[i]; + + // Unblock the waiting SPU task if either all the bits being waited by the task have been set or + // if the wait mode of the task is OR and atleast one bit the thread is waiting on has been set + if ((eventFlag->m.spuTaskWaitMask[i] & ~spuTaskRelevantEvents) == 0 || + (((eventFlag->m.spuTaskWaitMode >> j) & 0x0001) == CELL_SPURS_EVENT_FLAG_OR && spuTaskRelevantEvents != 0)) + { + eventsToClear |= spuTaskRelevantEvents; + spuTaskPendingRecv |= 1 << j; + pendingRecvTaskEvents[j] = spuTaskRelevantEvents; + } + } + + relevantWaitSlots >>= 1; + i--; + j++; + } + + eventFlag->m.events |= bits; + eventFlag->m.spuTaskPendingRecv |= spuTaskPendingRecv; + + // If the clear flag is AUTO then clear the bits comnsumed by all tasks marked to be unblocked + if (eventFlag->m.clearMode == CELL_SPURS_EVENT_FLAG_CLEAR_AUTO) + { + eventFlag->m.events &= ~eventsToClear; + } + + if (send) + { + // Signal the PPU thread to be woken up + eventFlag->m.pendingRecvTaskEvents[ppuWaitSlot] = ppuEventFlag; + if (sys_event_port_send(eventFlag->m.eventPortId, 0, 0, 0) != CELL_OK) + { + assert(0); + } + } + + if (spuTaskPendingRecv) + { + // Signal each SPU task whose conditions have been met to be woken up + for (int i = 0; i < CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS; i++) + { + if (spuTaskPendingRecv & (0x8000 >> i)) + { + eventFlag->m.pendingRecvTaskEvents[i] = pendingRecvTaskEvents[i]; + vm::var taskset; + if (eventFlag->m.isIwl) + { + cellSpursLookUpTasksetAddress(vm::ptr::make((u32)eventFlag->m.addr), + vm::ptr::make(taskset.addr()), + eventFlag->m.waitingTaskWklId[i]); + } + else + { + taskset.value() = (u32)eventFlag->m.addr; + } + + auto rc = _cellSpursSendSignal(vm::ptr::make(taskset.addr()), eventFlag->m.waitingTaskId[i]); + if (rc == CELL_SPURS_TASK_ERROR_INVAL || rc == CELL_SPURS_TASK_ERROR_STAT) + { + return CELL_SPURS_TASK_ERROR_FATAL; + } + + if (rc != CELL_OK) + { + assert(0); + } + } + } + } + return CELL_OK; #endif } s64 cellSpursEventFlagTryWait(vm::ptr eventFlag, vm::ptr mask, u32 mode) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursEventFlagTryWait(eventFlag_addr=0x%x, mask_addr=0x%x, mode=0x%x)", eventFlag.addr(), mask.addr(), mode); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x15E70, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); - return CELL_OK; + return _cellSpursEventFlagWait(eventFlag, mask, mode, 0/*block*/); #endif } s64 cellSpursEventFlagGetDirection(vm::ptr eventFlag, vm::ptr direction) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursEventFlagGetDirection(eventFlag_addr=0x%x, direction_addr=0x%x)", eventFlag.addr(), direction.addr()); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x162C4, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (eventFlag.addr() == 0 || direction.addr() == 0) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + *direction = eventFlag->m.direction; return CELL_OK; #endif } s64 cellSpursEventFlagGetClearMode(vm::ptr eventFlag, vm::ptr clear_mode) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursEventFlagGetClearMode(eventFlag_addr=0x%x, clear_mode_addr=0x%x)", eventFlag.addr(), clear_mode.addr()); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x16310, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (eventFlag.addr() == 0 || clear_mode.addr() == 0) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + *clear_mode = eventFlag->m.clearMode; return CELL_OK; #endif } s64 cellSpursEventFlagGetTasksetAddress(vm::ptr eventFlag, vm::ptr taskset) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursEventFlagGetTasksetAddress(eventFlag_addr=0x%x, taskset_addr=0x%x)", eventFlag.addr(), taskset.addr()); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x1635C, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (eventFlag.addr() == 0 || taskset.addr() == 0) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (eventFlag.addr() % CellSpursEventFlag::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + taskset.set(eventFlag->m.isIwl ? 0 : eventFlag->m.addr); return CELL_OK; #endif } @@ -2306,36 +2567,107 @@ s64 cellSpursJobChainGetSpursAddress() #endif } -s64 cellSpursCreateTasksetWithAttribute() +s64 spursCreateTaskset(vm::ptr spurs, vm::ptr taskset, u64 args, vm::ptr priority, + u32 max_contention, vm::ptr name, u32 size, s32 enable_clear_ls) { -#ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); - return GetCurrentPPUThread().FastCall2(libsre + 0x14BEC, libsre_rtoc); -#else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!spurs || !taskset) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align || taskset.addr() % CellSpursTaskset::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + memset(taskset.get_ptr(), 0, size); + + taskset->m.spurs = spurs; + taskset->m.args = args; + taskset->m.enable_clear_ls = enable_clear_ls > 0 ? 1 : 0; + taskset->m.size = size; + + vm::var wkl_attr; + _cellSpursWorkloadAttributeInitialize(wkl_attr, 1 /*revision*/, 0x33 /*sdk_version*/, vm::ptr::make(SPURS_IMG_ADDR_TASKSET_PM), 0x1E40 /*pm_size*/, + taskset.addr(), priority, 8 /*min_contention*/, max_contention); + // TODO: Check return code + + cellSpursWorkloadAttributeSetName(wkl_attr, vm::ptr::make(0), name); + // TODO: Check return code + + // TODO: cellSpursWorkloadAttributeSetShutdownCompletionEventHook(wkl_attr, hook, taskset); + // TODO: Check return code + + vm::var> wid; + cellSpursAddWorkloadWithAttribute(spurs, vm::ptr::make(wid.addr()), vm::ptr::make(wkl_attr.addr())); + // TODO: Check return code + + taskset->m.wkl_flag_wait_task = 0x80; + taskset->m.wid = wid.value(); + // TODO: cellSpursSetExceptionEventHandler(spurs, wid, hook, taskset); + // TODO: Check return code + return CELL_OK; -#endif } -s64 cellSpursCreateTaskset(vm::ptr spurs, vm::ptr taskset, u64 args, vm::ptr priority, u32 maxContention) +s64 cellSpursCreateTasksetWithAttribute(vm::ptr spurs, vm::ptr taskset, vm::ptr attr) +{ + cellSpurs->Warning("%s(spurs=0x%x, taskset=0x%x, attr=0x%x)", __FUNCTION__, spurs.addr(), taskset.addr(), attr.addr()); + +#ifdef PRX_DEBUG + return GetCurrentPPUThread().FastCall2(libsre + 0x14BEC, libsre_rtoc); +#endif + + if (!attr) + { + CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (attr.addr() % CellSpursTasksetAttribute::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (attr->m.revision != CELL_SPURS_TASKSET_ATTRIBUTE_REVISION) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + auto rc = spursCreateTaskset(spurs, taskset, attr->m.args, vm::ptr::make(attr.addr() + offsetof(CellSpursTasksetAttribute, m.priority)), + attr->m.max_contention, vm::ptr::make(attr->m.name.addr()), attr->m.taskset_size, attr->m.enable_clear_ls); + + if (attr->m.taskset_size >= CellSpursTaskset2::size) + { + // TODO: Implement this + } + + return rc; +} + +s64 cellSpursCreateTaskset(vm::ptr spurs, vm::ptr taskset, u64 args, vm::ptr priority, u32 maxContention) { cellSpurs->Warning("cellSpursCreateTaskset(spurs_addr=0x%x, taskset_addr=0x%x, args=0x%llx, priority_addr=0x%x, maxContention=%d)", spurs.addr(), taskset.addr(), args, priority.addr(), maxContention); #ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x14CB8, libsre_rtoc); -#else +#endif + +#if 0 SPURSManagerTasksetAttribute *tattr = new SPURSManagerTasksetAttribute(args, priority, maxContention); taskset->taskset = new SPURSManagerTaskset(taskset.addr(), tattr); return CELL_OK; #endif + + return spursCreateTaskset(spurs, taskset, args, priority, maxContention, vm::ptr::make(0), CellSpursTaskset::size, 0); } s64 cellSpursJoinTaskset(vm::ptr taskset) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursJoinTaskset(taskset_addr=0x%x)", taskset.addr()); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x152F8, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2343,21 +2675,38 @@ s64 cellSpursJoinTaskset(vm::ptr taskset) #endif } -s64 cellSpursGetTasksetId(vm::ptr taskset, vm::ptr workloadId) +s64 cellSpursGetTasksetId(vm::ptr taskset, vm::ptr wid) { + cellSpurs->Warning("cellSpursGetTasksetId(taskset_addr=0x%x, wid=0x%x)", taskset.addr(), wid.addr()); + #ifdef PRX_DEBUG - cellSpurs->Warning("cellSpursGetTasksetId(taskset_addr=0x%x, workloadId_addr=0x%x)", taskset.addr(), workloadId.addr()); return GetCurrentPPUThread().FastCall2(libsre + 0x14EA0, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!taskset || !wid) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (taskset.addr() % CellSpursTaskset::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + *wid = taskset->m.wid; return CELL_OK; #endif } s64 cellSpursShutdownTaskset(vm::ptr taskset) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursShutdownTaskset(taskset_addr=0x%x)", taskset.addr()); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x14868, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2365,34 +2714,236 @@ s64 cellSpursShutdownTaskset(vm::ptr taskset) #endif } -s64 cellSpursCreateTask(vm::ptr taskset, vm::ptr taskID, u32 elf_addr, u32 context_addr, u32 context_size, vm::ptr lsPattern, +u32 _cellSpursGetSdkVersion() +{ + static s32 sdk_version = -2; + + if (sdk_version == -2) + { + vm::var> version; + sys_process_get_sdk_version(sys_process_getpid(), vm::ptr::make(version.addr())); + sdk_version = version.value(); + } + + return sdk_version; +} + +s64 spursCreateTask(vm::ptr taskset, vm::ptr task_id, vm::ptr elf_addr, vm::ptr context_addr, u32 context_size, vm::ptr ls_pattern, vm::ptr arg) +{ + if (!taskset || !elf_addr) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (elf_addr.addr() % 16) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + auto sdk_version = _cellSpursGetSdkVersion(); + if (sdk_version < 0x27FFFF) + { + if (context_addr.addr() % 16) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + } + else + { + if (context_addr.addr() % 128) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + } + + u32 alloc_ls_blocks = 0; + if (context_addr.addr() != 0) + { + if (context_size < CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + alloc_ls_blocks = context_size > 0x3D400 ? 0x7A : ((context_size - 0x400) >> 11); + if (ls_pattern.addr() != 0) + { + u32 ls_blocks = 0; + for (auto i = 0; i < 128; i++) + { + if (ls_pattern->_u128.value()._bit[i]) + { + ls_blocks++; + } + } + + if (ls_blocks > alloc_ls_blocks) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + u128 _0 = u128::from32(0); + if ((ls_pattern->_u128.value() & u128::from32r(0xFC000000)) != _0) + { + // Prevent save/restore to SPURS management area + return CELL_SPURS_TASK_ERROR_INVAL; + } + } + } + else + { + alloc_ls_blocks = 0; + } + + // TODO: Verify the ELF header is proper and all its load segments are at address >= 0x3000 + + u32 tmp_task_id; + for (tmp_task_id = 0; tmp_task_id < CELL_SPURS_MAX_TASK; tmp_task_id++) + { + if (!taskset->m.enabled.value()._bit[tmp_task_id]) + { + auto enabled = taskset->m.enabled.value(); + enabled._bit[tmp_task_id] = true; + taskset->m.enabled = enabled; + break; + } + } + + if (tmp_task_id >= CELL_SPURS_MAX_TASK) + { + CELL_SPURS_TASK_ERROR_AGAIN; + } + + taskset->m.task_info[tmp_task_id].elf_addr.set(elf_addr.addr()); + taskset->m.task_info[tmp_task_id].context_save_storage_and_alloc_ls_blocks = (context_addr.addr() | alloc_ls_blocks); + taskset->m.task_info[tmp_task_id].args = *arg; + if (ls_pattern.addr()) + { + taskset->m.task_info[tmp_task_id].ls_pattern = *ls_pattern; + } + + *task_id = tmp_task_id; + return CELL_OK; +} + +s64 spursTaskStart(vm::ptr taskset, u32 taskId) +{ + auto pendingReady = taskset->m.pending_ready.value(); + pendingReady._bit[taskId] = true; + taskset->m.pending_ready = pendingReady; + + cellSpursSendWorkloadSignal(vm::ptr::make((u32)taskset->m.spurs.addr()), taskset->m.wid); + auto rc = cellSpursWakeUp(GetCurrentPPUThread(), vm::ptr::make((u32)taskset->m.spurs.addr())); + if (rc != CELL_OK) + { + if (rc == CELL_SPURS_POLICY_MODULE_ERROR_STAT) + { + rc = CELL_SPURS_TASK_ERROR_STAT; + } + else + { + assert(0); + } + } + + return rc; +} + +s64 cellSpursCreateTask(vm::ptr taskset, vm::ptr taskId, u32 elf_addr, u32 context_addr, u32 context_size, vm::ptr lsPattern, vm::ptr argument) { -#ifdef PRX_DEBUG cellSpurs->Warning("cellSpursCreateTask(taskset_addr=0x%x, taskID_addr=0x%x, elf_addr_addr=0x%x, context_addr_addr=0x%x, context_size=%d, lsPattern_addr=0x%x, argument_addr=0x%x)", - taskset.addr(), taskID.addr(), elf_addr, context_addr, context_size, lsPattern.addr(), argument.addr()); + taskset.addr(), taskId.addr(), elf_addr, context_addr, context_size, lsPattern.addr(), argument.addr()); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x12414, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); - return CELL_OK; + if (!taskset) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (taskset.addr() % CellSpursTaskset::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + vm::var tmpTaskId; + auto rc = spursCreateTask(taskset, tmpTaskId, vm::ptr::make(elf_addr), vm::ptr::make(context_addr), context_size, lsPattern, argument); + if (rc != CELL_OK) + { + return rc; + } + + rc = spursTaskStart(taskset, tmpTaskId); + if (rc != CELL_OK) + { + return rc; + } + + *taskId = tmpTaskId; + return CELL_OK; #endif } -s64 _cellSpursSendSignal(vm::ptr taskset, u32 taskID) +s64 _cellSpursSendSignal(vm::ptr taskset, u32 taskId) { #ifdef PRX_DEBUG - cellSpurs->Warning("_cellSpursSendSignal(taskset_addr=0x%x, taskID=%d)", taskset.addr(), taskID); + cellSpurs->Warning("_cellSpursSendSignal(taskset_addr=0x%x, taskId=%d)", taskset.addr(), taskId); return GetCurrentPPUThread().FastCall2(libsre + 0x124CC, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!taskset) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (taskset.addr() % CellSpursTaskset::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (taskId >= CELL_SPURS_MAX_TASK || taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD2) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + auto _0 = be_t::make(u128::from32(0)); + auto disabled = taskset->m.enabled.value()._bit[taskId] ? false : true; + auto invalid = (taskset->m.ready & taskset->m.pending_ready) != _0 || (taskset->m.running & taskset->m.waiting) != _0 || disabled || + ((taskset->m.running | taskset->m.ready | taskset->m.pending_ready | taskset->m.waiting | taskset->m.signalled) & be_t::make(~taskset->m.enabled.value())) != _0; + + if (invalid) + { + return CELL_SPURS_TASK_ERROR_SRCH; + } + + auto shouldSignal = (taskset->m.waiting & be_t::make(~taskset->m.signalled.value()) & be_t::make(u128::fromBit(taskId))) != _0 ? true : false; + auto signalled = taskset->m.signalled.value(); + signalled._bit[taskId] = true; + taskset->m.signalled = signalled; + if (shouldSignal) + { + cellSpursSendWorkloadSignal(vm::ptr::make((u32)taskset->m.spurs.addr()), taskset->m.wid); + auto rc = cellSpursWakeUp(GetCurrentPPUThread(), vm::ptr::make((u32)taskset->m.spurs.addr())); + if (rc == CELL_SPURS_POLICY_MODULE_ERROR_STAT) + { + return CELL_SPURS_TASK_ERROR_STAT; + } + + if (rc != CELL_OK) + { + assert(0); + } + } + return CELL_OK; #endif } s64 cellSpursCreateTaskWithAttribute() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x12204, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2400,35 +2951,73 @@ s64 cellSpursCreateTaskWithAttribute() #endif } -s64 cellSpursTasksetAttributeSetName() +s64 cellSpursTasksetAttributeSetName(vm::ptr attr, vm::ptr name) { + cellSpurs->Warning("%s(attr=0x%x, name=0x%x)", __FUNCTION__, attr.addr(), name.addr()); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x14210, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!attr || !name) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (attr.addr() % CellSpursTasksetAttribute::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + attr->m.name = name; return CELL_OK; #endif } -s64 cellSpursTasksetAttributeSetTasksetSize() +s64 cellSpursTasksetAttributeSetTasksetSize(vm::ptr attr, u32 size) { + cellSpurs->Warning("%s(attr=0x%x, size=0x%x)", __FUNCTION__, attr.addr(), size); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x14254, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!attr) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (attr.addr() % CellSpursTasksetAttribute::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (size != CellSpursTaskset::size && size != CellSpursTaskset2::size) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + attr->m.taskset_size = size; return CELL_OK; #endif } -s64 cellSpursTasksetAttributeEnableClearLS() +s64 cellSpursTasksetAttributeEnableClearLS(vm::ptr attr, s32 enable) { + cellSpurs->Warning("%s(attr=0x%x, enable=%d)", __FUNCTION__, attr.addr(), enable); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x142AC, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!attr) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (attr.addr() % CellSpursTasksetAttribute::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + attr->m.enable_clear_ls = enable ? 1 : 0; return CELL_OK; #endif } @@ -2440,27 +3029,28 @@ s64 _cellSpursTasksetAttribute2Initialize(vm::ptr at #ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x1474C, libsre_rtoc); #else - attribute->revision = revision; - attribute->name_addr = 0; - attribute->argTaskset = 0; + memset(attribute.get_ptr(), 0, CellSpursTasksetAttribute2::size); + attribute->m.revision = revision; + attribute->m.name.set(0); + attribute->m.args = 0; for (s32 i = 0; i < 8; i++) { - attribute->priority[i] = 1; + attribute->m.priority[i] = 1; } - attribute->maxContention = 8; - attribute->enableClearLs = 0; - attribute->CellSpursTaskNameBuffer_addr = 0; - + attribute->m.max_contention = 8; + attribute->m.enable_clear_ls = 0; + attribute->m.task_name_buffer.set(0); return CELL_OK; #endif } s64 cellSpursTaskExitCodeGet() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x1397C, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2470,8 +3060,9 @@ s64 cellSpursTaskExitCodeGet() s64 cellSpursTaskExitCodeInitialize() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x1352C, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2481,8 +3072,9 @@ s64 cellSpursTaskExitCodeInitialize() s64 cellSpursTaskExitCodeTryGet() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x13974, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2492,8 +3084,9 @@ s64 cellSpursTaskExitCodeTryGet() s64 cellSpursTaskGetLoadableSegmentPattern() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x13ED4, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2503,8 +3096,9 @@ s64 cellSpursTaskGetLoadableSegmentPattern() s64 cellSpursTaskGetReadOnlyAreaPattern() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x13CFC, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2514,8 +3108,9 @@ s64 cellSpursTaskGetReadOnlyAreaPattern() s64 cellSpursTaskGenerateLsPattern() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x13B78, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2525,8 +3120,9 @@ s64 cellSpursTaskGenerateLsPattern() s64 _cellSpursTaskAttributeInitialize() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x10C30, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2536,8 +3132,9 @@ s64 _cellSpursTaskAttributeInitialize() s64 cellSpursTaskAttributeSetExitCodeContainer() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x10A98, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2558,12 +3155,7 @@ s64 _cellSpursTaskAttribute2Initialize(vm::ptr attribut for (s32 c = 0; c < 4; c++) { - attribute->lsPattern.u32[c] = 0; - } - - for (s32 i = 0; i < 2; i++) - { - attribute->lsPattern.u64[i] = 0; + attribute->lsPattern._u128 = u128::from64r(0); } attribute->name_addr = 0; @@ -2574,8 +3166,9 @@ s64 _cellSpursTaskAttribute2Initialize(vm::ptr attribut s64 cellSpursTaskGetContextSaveAreaSize() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x1409C, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2583,21 +3176,44 @@ s64 cellSpursTaskGetContextSaveAreaSize() #endif } -s64 cellSpursCreateTaskset2() +s64 cellSpursCreateTaskset2(vm::ptr spurs, vm::ptr taskset, vm::ptr attr) { + cellSpurs->Warning("%s(spurs=0x%x, taskset=0x%x, attr=0x%x)", __FUNCTION__, spurs.addr(), taskset.addr(), attr.addr()); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x15108, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + vm::ptr tmp_attr; + + if (!attr) + { + attr.set(tmp_attr.addr()); + _cellSpursTasksetAttribute2Initialize(attr, 0); + } + + auto rc = spursCreateTaskset(spurs, vm::ptr::make(taskset.addr()), attr->m.args, + vm::ptr::make(attr.addr() + offsetof(CellSpursTasksetAttribute, m.priority)), + attr->m.max_contention, vm::ptr::make(attr->m.name.addr()), CellSpursTaskset2::size, (u8)attr->m.enable_clear_ls); + if (rc != CELL_OK) + { + return rc; + } + + if (attr->m.task_name_buffer.addr() % CellSpursTaskNameBuffer::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + // TODO: Implement rest of the function return CELL_OK; #endif } s64 cellSpursCreateTask2() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x11E54, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2607,8 +3223,9 @@ s64 cellSpursCreateTask2() s64 cellSpursJoinTask2() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x11378, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2618,8 +3235,9 @@ s64 cellSpursJoinTask2() s64 cellSpursTryJoinTask2() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x11748, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2629,8 +3247,9 @@ s64 cellSpursTryJoinTask2() s64 cellSpursDestroyTaskset2() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x14EE8, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2640,8 +3259,9 @@ s64 cellSpursDestroyTaskset2() s64 cellSpursCreateTask2WithBinInfo() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x120E0, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2649,54 +3269,124 @@ s64 cellSpursCreateTask2WithBinInfo() #endif } -s64 cellSpursTasksetSetExceptionEventHandler() +s64 cellSpursTasksetSetExceptionEventHandler(vm::ptr taskset, vm::ptr handler, vm::ptr arg) { + cellSpurs->Warning("%s(taskset=0x5x, handler=0x%x, arg=0x%x)", __FUNCTION__, taskset.addr(), handler.addr(), arg.addr()); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x13124, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!taskset || !handler) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (taskset.addr() % CellSpursTaskset::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + if (taskset->m.exception_handler != 0) + { + return CELL_SPURS_TASK_ERROR_BUSY; + } + + taskset->m.exception_handler = handler; + taskset->m.exception_handler_arg = arg; return CELL_OK; #endif } -s64 cellSpursTasksetUnsetExceptionEventHandler() +s64 cellSpursTasksetUnsetExceptionEventHandler(vm::ptr taskset) { + cellSpurs->Warning("%s(taskset=0x%x)", __FUNCTION__, taskset.addr()); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x13194, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!taskset) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (taskset.addr() % CellSpursTaskset::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + taskset->m.exception_handler.set(0); + taskset->m.exception_handler_arg.set(0); return CELL_OK; #endif } -s64 cellSpursLookUpTasksetAddress() +s64 cellSpursLookUpTasksetAddress(vm::ptr spurs, vm::ptr taskset, u32 id) { + cellSpurs->Warning("%s(spurs=0x%x, taskset=0x%x, id=0x%x)", __FUNCTION__, spurs.addr(), taskset.addr(), id); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x133AC, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (taskset.addr() == 0) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + vm::var> data; + auto rc = cellSpursGetWorkloadData(spurs, vm::ptr::make(data.addr()), id); + if (rc != CELL_OK) + { + // Convert policy module error code to a task error code + return rc ^ 0x100; + } + + taskset.set((u32)data.value()); return CELL_OK; #endif } -s64 cellSpursTasksetGetSpursAddress() +s64 cellSpursTasksetGetSpursAddress(vm::ptr taskset, vm::ptr spurs) { + cellSpurs->Warning("%s(taskset=0x%x, spurs=0x%x)", __FUNCTION__, taskset.addr(), spurs.addr()); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x14408, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!taskset || !spurs) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (taskset.addr() % CellSpursTaskset::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + if (taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + *spurs = (u32)taskset->m.spurs.addr(); return CELL_OK; #endif } s64 cellSpursGetTasksetInfo() { -#ifdef PRX_DEBUG cellSpurs->Warning("%s()", __FUNCTION__); + +#ifdef PRX_DEBUG return GetCurrentPPUThread().FastCall2(libsre + 0x1445C, libsre_rtoc); #else UNIMPLEMENTED_FUNC(cellSpurs); @@ -2704,13 +3394,39 @@ s64 cellSpursGetTasksetInfo() #endif } -s64 _cellSpursTasksetAttributeInitialize() +s64 _cellSpursTasksetAttributeInitialize(vm::ptr attribute, u32 revision, u32 sdk_version, u64 args, vm::ptr priority, u32 max_contention) { + cellSpurs->Warning("%s(attribute=0x%x, revision=%d, skd_version=%d, args=0x%llx, priority=0x%x, max_contention=%d)", + __FUNCTION__, attribute.addr(), revision, sdk_version, args, priority.addr(), max_contention); + #ifdef PRX_DEBUG - cellSpurs->Warning("%s()", __FUNCTION__); return GetCurrentPPUThread().FastCall2(libsre + 0x142FC, libsre_rtoc); #else - UNIMPLEMENTED_FUNC(cellSpurs); + if (!attribute) + { + return CELL_SPURS_TASK_ERROR_NULL_POINTER; + } + + if (attribute.addr() % CellSpursTasksetAttribute::align) + { + return CELL_SPURS_TASK_ERROR_ALIGN; + } + + for (u32 i = 0; i < 8; i++) + { + if (priority[i] > 0xF) + { + return CELL_SPURS_TASK_ERROR_INVAL; + } + } + + memset(attribute.get_ptr(), 0, CellSpursTasksetAttribute::size); + attribute->m.revision = revision; + attribute->m.sdk_version = sdk_version; + attribute->m.args = args; + memcpy(attribute->m.priority, priority.get_ptr(), 8); + attribute->m.taskset_size = CellSpursTaskset::size; + attribute->m.max_contention = max_contention; return CELL_OK; #endif } @@ -2913,6 +3629,190 @@ s64 cellSpursSemaphoreGetTasksetAddress() #endif } +bool spursIsLibProfLoaded() +{ + return false; +} + +void spursTraceStatusUpdate(vm::ptr spurs) +{ + LV2_LOCK(0); + + if (spurs->m.xCC != 0) + { + spurs->m.xCD = 1; + spurs->m.sysSrvMsgUpdateTrace = (1 << spurs->m.nSpus) - 1; + spurs->m.sysSrvMessage.write_relaxed(0xFF); + sys_semaphore_wait((u32)spurs->m.semPrv, 0); + } +} + +s64 spursTraceInitialize(vm::ptr spurs, vm::ptr buffer, u32 size, u32 mode, u32 updateStatus) +{ + if (!spurs || !buffer) + { + return CELL_SPURS_CORE_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align || buffer.addr() % CellSpursTraceInfo::align) + { + return CELL_SPURS_CORE_ERROR_ALIGN; + } + + if (size < CellSpursTraceInfo::size || mode & ~(CELL_SPURS_TRACE_MODE_FLAG_MASK)) + { + return CELL_SPURS_CORE_ERROR_INVAL; + } + + if (spurs->m.traceBuffer != 0) + { + return CELL_SPURS_CORE_ERROR_STAT; + } + + spurs->m.traceDataSize = size - CellSpursTraceInfo::size; + for (u32 i = 0; i < 8; i++) + { + buffer->spu_thread[i] = spurs->m.spus[i]; + buffer->count[i] = 0; + } + + buffer->spu_thread_grp = spurs->m.spuTG; + buffer->nspu = spurs->m.nSpus; + spurs->m.traceBuffer.set(buffer.addr() | (mode & CELL_SPURS_TRACE_MODE_FLAG_WRAP_BUFFER ? 1 : 0)); + spurs->m.traceMode = mode; + + u32 spuTraceDataCount = (u32)((spurs->m.traceDataSize / CellSpursTracePacket::size) / spurs->m.nSpus); + for (u32 i = 0, j = 8; i < 6; i++) + { + spurs->m.traceStartIndex[i] = j; + j += spuTraceDataCount; + } + + spurs->m.sysSrvTraceControl = 0; + if (updateStatus) + { + spursTraceStatusUpdate(spurs); + } + + return CELL_OK; +} + +s64 cellSpursTraceInitialize(vm::ptr spurs, vm::ptr buffer, u32 size, u32 mode) +{ + if (spursIsLibProfLoaded()) + { + return CELL_SPURS_CORE_ERROR_STAT; + } + + return spursTraceInitialize(spurs, buffer, size, mode, 1); +} + +s64 spursTraceStart(vm::ptr spurs, u32 updateStatus) +{ + if (!spurs) + { + return CELL_SPURS_CORE_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align) + { + return CELL_SPURS_CORE_ERROR_ALIGN; + } + + if (!spurs->m.traceBuffer) + { + return CELL_SPURS_CORE_ERROR_STAT; + } + + spurs->m.sysSrvTraceControl = 1; + if (updateStatus) + { + spursTraceStatusUpdate(spurs); + } + + return CELL_OK; +} + +s64 cellSpursTraceStart(vm::ptr spurs) +{ + if (!spurs) + { + return CELL_SPURS_CORE_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align) + { + return CELL_SPURS_CORE_ERROR_ALIGN; + } + + return spursTraceStart(spurs, spurs->m.traceMode & CELL_SPURS_TRACE_MODE_FLAG_SYNCHRONOUS_START_STOP); +} + +s64 spursTraceStop(vm::ptr spurs, u32 updateStatus) +{ + if (!spurs) + { + return CELL_SPURS_CORE_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align) + { + return CELL_SPURS_CORE_ERROR_ALIGN; + } + + if (!spurs->m.traceBuffer) + { + return CELL_SPURS_CORE_ERROR_STAT; + } + + spurs->m.sysSrvTraceControl = 2; + if (updateStatus) + { + spursTraceStatusUpdate(spurs); + } + + return CELL_OK; +} + +s64 cellSpursTraceStop(vm::ptr spurs) +{ + if (!spurs) + { + return CELL_SPURS_CORE_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align) + { + return CELL_SPURS_CORE_ERROR_ALIGN; + } + + return spursTraceStop(spurs, spurs->m.traceMode & CELL_SPURS_TRACE_MODE_FLAG_SYNCHRONOUS_START_STOP); +} + +s64 cellSpursTraceFinalize(vm::ptr spurs) +{ + if (!spurs) + { + return CELL_SPURS_CORE_ERROR_NULL_POINTER; + } + + if (spurs.addr() % CellSpurs::align) + { + return CELL_SPURS_CORE_ERROR_ALIGN; + } + + if (!spurs->m.traceBuffer) + { + return CELL_SPURS_CORE_ERROR_STAT; + } + + spurs->m.sysSrvTraceControl = 0; + spurs->m.traceMode = 0; + spurs->m.traceBuffer.set(0); + spursTraceStatusUpdate(spurs); + return CELL_OK; +} + void cellSpurs_init(Module *pxThis) { cellSpurs = pxThis; @@ -2940,6 +3840,8 @@ void cellSpurs_init(Module *pxThis) REG_FUNC(cellSpurs, cellSpursEnableExceptionEventHandler); REG_FUNC(cellSpurs, cellSpursSetGlobalExceptionEventHandler); REG_FUNC(cellSpurs, cellSpursUnsetGlobalExceptionEventHandler); + REG_FUNC(cellSpurs, cellSpursSetExceptionEventHandler); + REG_FUNC(cellSpurs, cellSpursUnsetExceptionEventHandler); // Event flag REG_FUNC(cellSpurs, _cellSpursEventFlagInitialize); @@ -2985,8 +3887,6 @@ void cellSpurs_init(Module *pxThis) REG_FUNC(cellSpurs, cellSpursCreateTask2WithBinInfo); REG_FUNC(cellSpurs, cellSpursLookUpTasksetAddress); REG_FUNC(cellSpurs, cellSpursTasksetGetSpursAddress); - REG_FUNC(cellSpurs, cellSpursSetExceptionEventHandler); - REG_FUNC(cellSpurs, cellSpursUnsetExceptionEventHandler); REG_FUNC(cellSpurs, cellSpursGetTasksetInfo); REG_FUNC(cellSpurs, cellSpursTasksetSetExceptionEventHandler); REG_FUNC(cellSpurs, cellSpursTasksetUnsetExceptionEventHandler); @@ -3070,5 +3970,9 @@ void cellSpurs_init(Module *pxThis) REG_FUNC(cellSpurs, _cellSpursSemaphoreInitialize); REG_FUNC(cellSpurs, cellSpursSemaphoreGetTasksetAddress); - // TODO: some trace funcs + // Trace + REG_FUNC(cellSpurs, cellSpursTraceInitialize); + REG_FUNC(cellSpurs, cellSpursTraceStart); + REG_FUNC(cellSpurs, cellSpursTraceStop); + REG_FUNC(cellSpurs, cellSpursTraceFinalize); } \ No newline at end of file diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h index 64be4a99cb..348c795653 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h +++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h @@ -40,6 +40,7 @@ enum { CELL_SPURS_TASK_ERROR_AGAIN = 0x80410901, CELL_SPURS_TASK_ERROR_INVAL = 0x80410902, + CELL_SPURS_TASK_ERROR_NOSYS = 0x80410903, CELL_SPURS_TASK_ERROR_NOMEM = 0x80410904, CELL_SPURS_TASK_ERROR_SRCH = 0x80410905, CELL_SPURS_TASK_ERROR_NOEXEC = 0x80410907, @@ -91,6 +92,7 @@ enum SPURSKernelInterfaces CELL_SPURS_MAX_SPU = 8, CELL_SPURS_MAX_WORKLOAD = 16, CELL_SPURS_MAX_WORKLOAD2 = 32, + CELL_SPURS_SYS_SERVICE_WORKLOAD_ID = 32, CELL_SPURS_MAX_PRIORITY = 16, CELL_SPURS_NAME_MAX_LENGTH = 15, CELL_SPURS_SIZE = 4096, @@ -101,6 +103,12 @@ enum SPURSKernelInterfaces CELL_SPURS_INTERRUPT_VECTOR = 0x0, CELL_SPURS_LOCK_LINE = 0x80, CELL_SPURS_KERNEL_DMA_TAG_ID = 31, + CELL_SPURS_KERNEL1_ENTRY_ADDR = 0x818, + CELL_SPURS_KERNEL2_ENTRY_ADDR = 0x848, + CELL_SPURS_KERNEL1_EXIT_ADDR = 0x808, + CELL_SPURS_KERNEL2_EXIT_ADDR = 0x838, + CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR = 0x290, + CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR = 0x290, }; enum RangeofEventQueuePortNumbers @@ -110,31 +118,6 @@ enum RangeofEventQueuePortNumbers CELL_SPURS_DYNAMIC_PORT_RANGE_BOTTOM = 63, }; -enum SPURSTraceTypes -{ - CELL_SPURS_TRACE_TAG_LOAD = 0x2a, - CELL_SPURS_TRACE_TAG_MAP = 0x2b, - CELL_SPURS_TRACE_TAG_START = 0x2c, - CELL_SPURS_TRACE_TAG_STOP = 0x2d, - CELL_SPURS_TRACE_TAG_USER = 0x2e, - CELL_SPURS_TRACE_TAG_GUID = 0x2f, -}; - -// SPURS task defines. -enum TaskConstants -{ - CELL_SPURS_MAX_TASK = 128, - CELL_SPURS_TASK_TOP = 0x3000, - CELL_SPURS_TASK_BOTTOM = 0x40000, - CELL_SPURS_MAX_TASK_NAME_LENGTH = 32, -}; - -class SPURSManager; -class SPURSManagerEventFlag; -class SPURSManagerTaskset; - -struct CellSpurs; - enum SpursAttrFlags : u32 { SAF_NONE = 0x0, @@ -156,11 +139,129 @@ enum SpursAttrFlags : u32 enum SpursFlags1 : u8 { SF1_NONE = 0x0, - - SF1_IS_SECOND = 0x40, + + SF1_32_WORKLOADS = 0x40, SF1_EXIT_IF_NO_WORK = 0x80, }; +enum SpursWorkloadConstants : u64 +{ + // Workload states + SPURS_WKL_STATE_NON_EXISTENT = 0, + SPURS_WKL_STATE_PREPARING = 1, + SPURS_WKL_STATE_RUNNABLE = 2, + SPURS_WKL_STATE_SHUTTING_DOWN = 3, + SPURS_WKL_STATE_REMOVABLE = 4, + SPURS_WKL_STATE_INVALID = 5, + + // GUID + SPURS_GUID_SYS_WKL = 0x1BB841BF38F89D33ull, + SPURS_GUID_TASKSET_PM = 0x836E915B2E654143ull, + + // Image addresses + SPURS_IMG_ADDR_SYS_SRV_WORKLOAD = 0x100, + SPURS_IMG_ADDR_TASKSET_PM = 0x200, +}; + +enum CellSpursModulePollStatus +{ + CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT = 1, + CELL_SPURS_MODULE_POLL_STATUS_SIGNAL = 2, + CELL_SPURS_MODULE_POLL_STATUS_FLAG = 4 +}; + +enum SpursTraceConstants +{ + // Trace tag types + CELL_SPURS_TRACE_TAG_KERNEL = 0x20, + CELL_SPURS_TRACE_TAG_SERVICE = 0x21, + CELL_SPURS_TRACE_TAG_TASK = 0x22, + CELL_SPURS_TRACE_TAG_JOB = 0x23, + CELL_SPURS_TRACE_TAG_OVIS = 0x24, + CELL_SPURS_TRACE_TAG_LOAD = 0x2a, + CELL_SPURS_TRACE_TAG_MAP = 0x2b, + CELL_SPURS_TRACE_TAG_START = 0x2c, + CELL_SPURS_TRACE_TAG_STOP = 0x2d, + CELL_SPURS_TRACE_TAG_USER = 0x2e, + CELL_SPURS_TRACE_TAG_GUID = 0x2f, + + // Service incident + CELL_SPURS_TRACE_SERVICE_INIT = 0x01, + CELL_SPURS_TRACE_SERVICE_WAIT = 0x02, + CELL_SPURS_TRACE_SERVICE_EXIT = 0x03, + + // Task incident + CELL_SPURS_TRACE_TASK_DISPATCH = 0x01, + CELL_SPURS_TRACE_TASK_YIELD = 0x03, + CELL_SPURS_TRACE_TASK_WAIT = 0x04, + CELL_SPURS_TRACE_TASK_EXIT = 0x05, + + // Trace mode flags + CELL_SPURS_TRACE_MODE_FLAG_WRAP_BUFFER = 0x1, + CELL_SPURS_TRACE_MODE_FLAG_SYNCHRONOUS_START_STOP = 0x2, + CELL_SPURS_TRACE_MODE_FLAG_MASK = 0x3, +}; + +// SPURS task constants +enum SpursTaskConstants +{ + CELL_SPURS_MAX_TASK = 128, + CELL_SPURS_TASK_TOP = 0x3000, + CELL_SPURS_TASK_BOTTOM = 0x40000, + CELL_SPURS_MAX_TASK_NAME_LENGTH = 32, + CELL_SPURS_TASK_ATTRIBUTE_REVISION = 1, + CELL_SPURS_TASKSET_ATTRIBUTE_REVISION = 1, + CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE = 1024, + CELL_SPURS_TASKSET_PM_ENTRY_ADDR = 0xA00, + CELL_SPURS_TASKSET_PM_SYSCALL_ADDR = 0xA70, + + // Task syscall numbers + CELL_SPURS_TASK_SYSCALL_EXIT = 0, + CELL_SPURS_TASK_SYSCALL_YIELD = 1, + CELL_SPURS_TASK_SYSCALL_WAIT_SIGNAL = 2, + CELL_SPURS_TASK_SYSCALL_POLL = 3, + CELL_SPURS_TASK_SYSCALL_RECV_WKL_FLAG = 4, + + // Task poll status + CELL_SPURS_TASK_POLL_FOUND_TASK = 1, + CELL_SPURS_TASK_POLL_FOUND_WORKLOAD = 2, +}; + +enum CellSpursEventFlagWaitMode +{ + CELL_SPURS_EVENT_FLAG_OR = 0, + CELL_SPURS_EVENT_FLAG_AND = 1, + CELL_SPURS_EVENT_FLAG_WAIT_MODE_LAST = CELL_SPURS_EVENT_FLAG_AND, +}; + +enum CellSpursEventFlagClearMode +{ + CELL_SPURS_EVENT_FLAG_CLEAR_AUTO = 0, + CELL_SPURS_EVENT_FLAG_CLEAR_MANUAL = 1, + CELL_SPURS_EVENT_FLAG_CLEAR_LAST = CELL_SPURS_EVENT_FLAG_CLEAR_MANUAL, +}; + +enum CellSpursEventFlagDirection +{ + CELL_SPURS_EVENT_FLAG_SPU2SPU, + CELL_SPURS_EVENT_FLAG_SPU2PPU, + CELL_SPURS_EVENT_FLAG_PPU2SPU, + CELL_SPURS_EVENT_FLAG_ANY2ANY, + CELL_SPURS_EVENT_FLAG_LAST = CELL_SPURS_EVENT_FLAG_ANY2ANY, +}; + +// Event flag constants +enum SpursEventFlagConstants +{ + CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS = 16, + CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT = 0xFF, +}; + +class SPURSManager; +class SPURSManagerEventFlag; +class SPURSManagerTaskset; +struct CellSpurs; + struct CellSpursAttribute { static const uint align = 8; @@ -208,6 +309,72 @@ struct CellSpursWorkloadFlag typedef void(CellSpursShutdownCompletionEventHook)(vm::ptr, u32 wid, vm::ptr arg); +struct CellSpursTraceInfo +{ + static const u32 size = 0x80; + static const u32 align = 16; + + be_t spu_thread[8]; // 0x00 + be_t count[8]; // 0x20 + be_t spu_thread_grp; // 0x40 + be_t nspu; // 0x44 + //u8 padding[]; +}; + +struct CellSpursTracePacket +{ + static const u32 size = 16; + + struct + { + u8 tag; + u8 length; + u8 spu; + u8 workload; + be_t time; + } header; + + union + { + struct + { + be_t incident; + be_t reserved; + } service; + + struct + { + be_t ea; + be_t ls; + be_t size; + } load; + + struct + { + be_t offset; + be_t ls; + be_t size; + } map; + + struct + { + s8 module[4]; + be_t level; + be_t ls; + } start; + + struct + { + be_t incident; + be_t taskId; + } task; + + be_t user; + be_t guid; + be_t stop; + } data; +}; + // Core CellSpurs structures struct CellSpurs { @@ -218,7 +385,7 @@ struct CellSpurs struct _sub_str1 { - u8 unk0[0x20]; + u8 unk0[0x20]; // 0x00 - SPU exceptionh handler 0x08 - SPU exception handler args be_t sem; // 0x20 u8 unk1[0x8]; vm::bptr hook; // 0x30 @@ -228,28 +395,29 @@ struct CellSpurs static_assert(sizeof(_sub_str1) == 0x80, "Wrong _sub_str1 size"); - struct _sub_str2 + struct _sub_str2 // Event port multiplexer { - be_t unk0; - be_t unk1; - be_t unk2; - be_t unk3; + be_t unk0; // 0x00 Outstanding requests + be_t unk1; // 0x04 + be_t unk2; // 0x08 + be_t unk3; // 0x0C be_t port; // 0x10 - u8 unk_[0x68]; + u8 unk_[0x68]; // 0x18 - The first u64 seems to be the start of a linked list. The linked list struct seems to be {u64 next; u64 data; u64 handler} }; static_assert(sizeof(_sub_str2) == 0x80, "Wrong _sub_str2 size"); - struct _sub_str3 + struct WorkloadInfo { - vm::bptr pm; // policy module - be_t data; // spu argument + vm::bptr addr; // Address of the executable + be_t arg; // spu argument be_t size; - atomic_t copy; - be_t priority; + atomic_t uniqueId; // The unique id is the same for all workloads with the same addr + u8 pad[3]; + u8 priority[8]; }; - static_assert(sizeof(_sub_str3) == 0x20, "Wrong _sub_str3 size"); + static_assert(sizeof(WorkloadInfo) == 0x20, "Wrong WorkloadInfo size"); struct _sub_str4 { @@ -268,61 +436,68 @@ struct CellSpurs // real data struct { - atomic_t wklReadyCount[0x20]; // 0x0 (index = wid) - u8 wklA[0x10]; // 0x20 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - u8 wklB[0x10]; // 0x30 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - u8 wklMinCnt[0x10]; // 0x40 (seems only for first 0..15 wids) - atomic_t wklMaxCnt[0x10]; // 0x50 (packed 4-bit data, index = wid % 16, internal index = wid / 16) - CellSpursWorkloadFlag wklFlag; // 0x60 - atomic_t wklSet1; // 0x70 (bitset for 0..15 wids) - atomic_t x72; // 0x72 - u8 x73; // 0x73 - u8 flags1; // 0x74 - u8 x75; // 0x75 - u8 nSpus; // 0x76 - atomic_t flagRecv; // 0x77 - atomic_t wklSet2; // 0x78 (bitset for 16..32 wids) - u8 x7A[6]; // 0x7A - atomic_t wklStat1[0x10]; // 0x80 - u8 wklD1[0x10]; // 0x90 - u8 wklE1[0x10]; // 0xA0 - atomic_t wklMskA; // 0xB0 - atomic_t wklMskB; // 0xB4 - u8 xB8[5]; // 0xB8 - atomic_t xBD; // 0xBD - u8 xBE[2]; // 0xBE - u8 xC0[8]; // 0xC0 - u8 xC8; // 0xC8 - u8 spuPort; // 0xC9 - u8 xCA; // 0xCA - u8 xCB; // 0xCB - u8 xCC; // 0xCC - u8 xCD; // 0xCD - u8 xCE; // 0xCE - u8 xCF; // 0xCF - atomic_t wklStat2[0x10]; // 0xD0 - u8 wklD2[0x10]; // 0xE0 - u8 wklE2[0x10]; // 0xF0 - _sub_str1 wklF1[0x10]; // 0x100 - be_t unk22; // 0x900 - u8 unknown7[0x980 - 0x908]; + atomic_t wklReadyCount1[0x10]; // 0x00 Number of SPUs requested by each workload (0..15 wids). + atomic_t wklIdleSpuCountOrReadyCount2[0x10]; // 0x10 SPURS1: Number of idle SPUs requested by each workload (0..15 wids). SPURS2: Number of SPUs requested by each workload (16..31 wids). + u8 wklCurrentContention[0x10]; // 0x20 Number of SPUs used by each workload. SPURS1: index = wid. SPURS2: packed 4-bit data, index = wid % 16, internal index = wid / 16. + u8 wklPendingContention[0x10]; // 0x30 Number of SPUs that are pending to context switch to the workload. SPURS1: index = wid. SPURS2: packed 4-bit data, index = wid % 16, internal index = wid / 16. + u8 wklMinContention[0x10]; // 0x40 Min SPUs required for each workload. SPURS1: index = wid. SPURS2: Unused. + atomic_t wklMaxContention[0x10]; // 0x50 Max SPUs that may be allocated to each workload. SPURS1: index = wid. SPURS2: packed 4-bit data, index = wid % 16, internal index = wid / 16. + CellSpursWorkloadFlag wklFlag; // 0x60 + atomic_t wklSignal1; // 0x70 (bitset for 0..15 wids) + atomic_t sysSrvMessage; // 0x72 + u8 spuIdling; // 0x73 + u8 flags1; // 0x74 Type is SpursFlags1 + u8 sysSrvTraceControl; // 0x75 + u8 nSpus; // 0x76 + atomic_t wklFlagReceiver; // 0x77 + atomic_t wklSignal2; // 0x78 (bitset for 16..32 wids) + u8 x7A[6]; // 0x7A + atomic_t wklState1[0x10]; // 0x80 SPURS_WKL_STATE_* + u8 wklStatus1[0x10]; // 0x90 + u8 wklEvent1[0x10]; // 0xA0 + atomic_t wklMskA; // 0xB0 - System service - Available workloads (32*u1) + atomic_t wklMskB; // 0xB4 - System service - Available module id + u32 xB8; // 0xB8 + u8 sysSrvExitBarrier; // 0xBC + atomic_t sysSrvMsgUpdateWorkload; // 0xBD + u8 xBE; // 0xBE + u8 sysSrvMsgTerminate; // 0xBF + u8 sysSrvWorkload[8]; // 0xC0 + u8 sysSrvOnSpu; // 0xC8 + u8 spuPort; // 0xC9 + u8 xCA; // 0xCA + u8 xCB; // 0xCB + u8 xCC; // 0xCC + u8 xCD; // 0xCD + u8 sysSrvMsgUpdateTrace; // 0xCE + u8 xCF; // 0xCF + atomic_t wklState2[0x10]; // 0xD0 SPURS_WKL_STATE_* + u8 wklStatus2[0x10]; // 0xE0 + u8 wklEvent2[0x10]; // 0xF0 + _sub_str1 wklF1[0x10]; // 0x100 + vm::bptr traceBuffer; // 0x900 + be_t traceStartIndex[6]; // 0x908 + u8 unknown7[0x948 - 0x920]; // 0x920 + be_t traceDataSize; // 0x948 + be_t traceMode; // 0x950 + u8 unknown8[0x980 - 0x954]; // 0x954 be_t semPrv; // 0x980 be_t unk11; // 0x988 be_t unk12; // 0x98C be_t unk13; // 0x990 u8 unknown4[0xB00 - 0x998]; - _sub_str3 wklG1[0x10]; // 0xB00 - _sub_str3 wklSysG; // 0xD00 + WorkloadInfo wklInfo1[0x10]; // 0xB00 + WorkloadInfo wklInfoSysSrv; // 0xD00 be_t ppu0; // 0xD20 be_t ppu1; // 0xD28 - be_t spuTG; // 0xD30 + be_t spuTG; // 0xD30 - SPU thread group be_t spus[8]; // 0xD34 u8 unknown3[0xD5C - 0xD54]; - be_t queue; // 0xD5C - be_t port; // 0xD60 - atomic_t xD64; // 0xD64 - atomic_t xD65; // 0xD65 - atomic_t xD66; // 0xD66 + be_t queue; // 0xD5C - Event queue + be_t port; // 0xD60 - Event port + atomic_t xD64; // 0xD64 - SPURS handler dirty + atomic_t xD65; // 0xD65 - SPURS handler waiting + atomic_t xD66; // 0xD66 - SPURS handler exiting atomic_t enableEH; // 0xD68 be_t exception; // 0xD6C sys_spu_image spuImg; // 0xD70 @@ -334,14 +509,14 @@ struct CellSpurs be_t unk5; // 0xD9C be_t revision; // 0xDA0 be_t sdkVersion; // 0xDA4 - atomic_t spups; // 0xDA8 + atomic_t spups; // 0xDA8 - SPU port bits sys_lwmutex_t mutex; // 0xDB0 sys_lwcond_t cond; // 0xDC8 u8 unknown9[0xE00 - 0xDD0]; _sub_str4 wklH1[0x10]; // 0xE00 _sub_str2 sub3; // 0xF00 - u8 unknown6[0x1000 - 0xF80]; - _sub_str3 wklG2[0x10]; // 0x1000 + u8 unknown6[0x1000 - 0xF80]; // 0xF80 - Gloabl SPU exception handler 0xF88 - Gloabl SPU exception handlers args + WorkloadInfo wklInfo2[0x10]; // 0x1000 _sub_str1 wklF2[0x10]; // 0x1200 _sub_str4 wklH2[0x10]; // 0x1A00 } m; @@ -353,15 +528,15 @@ struct CellSpurs } c; }; - __forceinline atomic_t& wklStat(const u32 wid) + __forceinline atomic_t& wklState(const u32 wid) { if (wid & 0x10) { - return m.wklStat2[wid & 0xf]; + return m.wklState2[wid & 0xf]; } else { - return m.wklStat1[wid & 0xf]; + return m.wklState1[wid & 0xf]; } } @@ -409,12 +584,104 @@ struct CellSpursWorkloadAttribute struct CellSpursEventFlag { - SPURSManagerEventFlag *eventFlag; + static const u32 align = 128; + static const u32 size = 128; + + union + { + // Raw data + u8 _u8[size]; + + // Real data + struct _CellSpursEventFlag + { + be_t events; // 0x00 Event bits + be_t spuTaskPendingRecv; // 0x02 A bit is set to 1 when the condition of the SPU task using the slot are met and back to 0 when the SPU task unblocks + be_t ppuWaitMask; // 0x04 Wait mask for blocked PPU thread + u8 ppuWaitSlotAndMode; // 0x06 Top 4 bits: Wait slot number of the blocked PPU threa, Bottom 4 bits: Wait mode of the blocked PPU thread + u8 ppuPendingRecv; // 0x07 Set to 1 when the blocked PPU thread's conditions are met and back to 0 when the PPU thread is unblocked + be_t spuTaskUsedWaitSlots; // 0x08 A bit is set to 1 if the wait slot corresponding to the bit is used by an SPU task and 0 otherwise + be_t spuTaskWaitMode; // 0x0A A bit is set to 1 if the wait mode for the SPU task corresponding to the bit is AND and 0 otherwise + u8 spuPort; // 0x0C + u8 isIwl; // 0x0D + u8 direction; // 0x0E + u8 clearMode; // 0x0F + be_t spuTaskWaitMask[16]; // 0x10 Wait mask for blocked SPU tasks + be_t pendingRecvTaskEvents[16]; // 0x30 The value of event flag when the wait condition for the thread/task was met + u8 waitingTaskId[16]; // 0x50 Task id of waiting SPU threads + u8 waitingTaskWklId[16]; // 0x60 Workload id of waiting SPU threads + be_t addr; // 0x70 + be_t eventPortId; // 0x78 + be_t eventQueueId; // 0x7C + } m; + + static_assert(sizeof(_CellSpursEventFlag) == size, "Wrong _CellSpursEventFlag size"); + + SPURSManagerEventFlag *eventFlag; + }; +}; + +union CellSpursTaskArgument +{ + be_t _u128; +}; + +union CellSpursTaskLsPattern +{ + be_t _u128; }; struct CellSpursTaskset { - SPURSManagerTaskset *taskset; + static const u32 align = 128; + static const u32 size = 6400; + + struct TaskInfo + { + CellSpursTaskArgument args; // 0x00 + vm::bptr elf_addr; // 0x10 + be_t context_save_storage_and_alloc_ls_blocks; // 0x18 This is (context_save_storage_addr | allocated_ls_blocks) + CellSpursTaskLsPattern ls_pattern; // 0x20 + }; + + static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size"); + + union + { + // Raw data + u8 _u8[size]; + + // Real data + struct _CellSpursTaskset + { + be_t running; // 0x00 + be_t ready; // 0x10 + be_t pending_ready; // 0x20 + be_t enabled; // 0x30 + be_t signalled; // 0x40 + be_t waiting; // 0x50 + vm::bptr spurs; // 0x60 + be_t args; // 0x68 + u8 enable_clear_ls; // 0x70 + u8 x71; // 0x71 + u8 wkl_flag_wait_task; // 0x72 + u8 last_scheduled_task; // 0x73 + be_t wid; // 0x74 + be_t x78; // 0x78 + TaskInfo task_info[128]; // 0x80 + vm::bptr exception_handler; // 0x1880 + vm::bptr exception_handler_arg; // 0x1888 + be_t size; // 0x1890 + u32 unk2; // 0x1894 + u32 event_flag_id1; // 0x1898 + u32 event_flag_id2; // 0x189C + u8 unk3[0x60]; // 0x18A0 + } m; + + static_assert(sizeof(_CellSpursTaskset) == size, "Wrong _CellSpursTaskset size"); + + SPURSManagerTaskset *taskset; + }; }; struct CellSpursInfo @@ -446,63 +713,6 @@ struct CellSpursExceptionInfo be_t option; }; -struct CellSpursTraceInfo -{ - be_t spu_thread[8]; - be_t count[8]; - be_t spu_thread_grp; - be_t nspu; - //u8 padding[]; -}; - -struct CellTraceHeader -{ - u8 tag; - u8 length; - u8 cpu; - u8 thread; - be_t time; -}; - -struct CellSpursTracePacket -{ - struct header_struct - { - u8 tag; - u8 length; - u8 spu; - u8 workload; - be_t time; - } header; - - struct data_struct - { - struct load_struct - { - be_t ea; - be_t ls; - be_t size; - } load; - - struct map_struct - { - be_t offset; - be_t ls; - be_t size; - } map; - - struct start_struct - { - s8 module[4]; - be_t level; - be_t ls; - } start; - - be_t user; - be_t guid; - } data; -}; - // Exception handlers. //typedef void (*CellSpursGlobalExceptionEventHandler)(vm::ptr spurs, vm::ptr info, // u32 id, vm::ptr arg); @@ -510,6 +720,13 @@ struct CellSpursTracePacket //typedef void (*CellSpursTasksetExceptionEventHandler)(vm::ptr spurs, vm::ptr taskset, // u32 idTask, vm::ptr info, vm::ptr arg); +struct CellSpursTaskNameBuffer +{ + static const u32 align = 16; + + char taskName[CELL_SPURS_MAX_TASK][CELL_SPURS_MAX_TASK_NAME_LENGTH]; +}; + struct CellSpursTasksetInfo { //CellSpursTaskInfo taskInfo[CELL_SPURS_MAX_TASK]; @@ -525,25 +742,104 @@ struct CellSpursTasksetInfo struct CellSpursTaskset2 { - be_t skip[10496]; + static const u32 align = 128; + static const u32 size = 10496; + + struct TaskInfo + { + CellSpursTaskArgument args; + vm::bptr elf_addr; + vm::bptr context_save_storage; // This is (context_save_storage_addr | allocated_ls_blocks) + CellSpursTaskLsPattern ls_pattern; + }; + + static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size"); + + union + { + // Raw data + u8 _u8[size]; + + // Real data + struct _CellSpursTaskset2 + { + be_t running_set[4]; // 0x00 + be_t ready_set[4]; // 0x10 + be_t ready2_set[4]; // 0x20 - TODO: Find out what this is + be_t enabled_set[4]; // 0x30 + be_t signal_received_set[4]; // 0x40 + be_t waiting_set[4]; // 0x50 + vm::bptr spurs; // 0x60 + be_t args; // 0x68 + u8 enable_clear_ls; // 0x70 + u8 x71; // 0x71 + u8 x72; // 0x72 + u8 last_scheduled_task; // 0x73 + be_t wid; // 0x74 + be_t x78; // 0x78 + TaskInfo task_info[128]; // 0x80 + vm::bptr exception_handler; // 0x1880 + vm::bptr exception_handler_arg; // 0x1888 + be_t size; // 0x1890 + u32 unk2; // 0x1894 + u32 event_flag_id1; // 0x1898 + u32 event_flag_id2; // 0x189C + u8 unk3[0x1980 - 0x18A0]; // 0x18A0 + be_t task_exit_code[128]; // 0x1980 + u8 unk4[0x2900 - 0x2180]; // 0x2180 + } m; + + static_assert(sizeof(_CellSpursTaskset2) == size, "Wrong _CellSpursTaskset2 size"); + }; +}; + +struct CellSpursTasksetAttribute +{ + static const u32 align = 8; + static const u32 size = 512; + + union + { + // Raw data + u8 _u8[size]; + + // Real data + struct + { + be_t revision; // 0x00 + be_t sdk_version; // 0x04 + be_t args; // 0x08 + u8 priority[8]; // 0x10 + be_t max_contention; // 0x18 + vm::bptr name; // 0x1C + be_t taskset_size; // 0x20 + be_t enable_clear_ls; // 0x24 + } m; + }; }; struct CellSpursTasksetAttribute2 { - be_t revision; - be_t name_addr; - be_t argTaskset; - u8 priority[8]; - be_t maxContention; - be_t enableClearLs; - be_t CellSpursTaskNameBuffer_addr; //??? *taskNameBuffer - //be_t __reserved__[]; -}; + static const u32 align = 8; + static const u32 size = 512; -// cellSpurs task structures. -struct CellSpursTaskNameBuffer -{ - char taskName[CELL_SPURS_MAX_TASK][CELL_SPURS_MAX_TASK_NAME_LENGTH]; + union + { + // Raw data + u8 _u8[size]; + + // Real data + struct + { + be_t revision; // 0x00 + vm::bptr name; // 0x04 + be_t args; // 0x08 + u8 priority[8]; // 0x10 + be_t max_contention; // 0x18 + be_t enable_clear_ls; // 0x1C + vm::bptr task_name_buffer; // 0x20 + } m; + }; }; struct CellSpursTraceTaskData @@ -552,21 +848,6 @@ struct CellSpursTraceTaskData be_t task; }; -typedef be_t be_u32; -typedef be_t be_u64; - -struct CellSpursTaskArgument -{ - be_u32 u32[4]; - be_u64 u64[2]; -}; - -struct CellSpursTaskLsPattern -{ - be_u32 u32[4]; - be_u64 u64[2]; -}; - struct CellSpursTaskAttribute2 { be_t revision; @@ -604,7 +885,77 @@ struct CellSpursTaskBinInfo CellSpursTaskLsPattern lsPattern; }; -class PPUThread; +// The SPURS kernel context. This resides at 0x100 of the LS. +struct SpursKernelContext +{ + u8 tempArea[0x80]; // 0x100 + u8 wklLocContention[0x10]; // 0x180 + u8 wklLocPendingContention[0x10]; // 0x190 + u8 priority[0x10]; // 0x1A0 + u8 x1B0[0x10]; // 0x1B0 + vm::bptr spurs; // 0x1C0 + be_t spuNum; // 0x1C8 + be_t dmaTagId; // 0x1CC + vm::bptr wklCurrentAddr; // 0x1D0 + be_t wklCurrentUniqueId; // 0x1D8 + be_t wklCurrentId; // 0x1DC + be_t exitToKernelAddr; // 0x1E0 + be_t selectWorkloadAddr; // 0x1E4 + u8 moduleId[2]; // 0x1E8 + u8 sysSrvInitialised; // 0x1EA + u8 spuIdling; // 0x1EB + be_t wklRunnable1; // 0x1EC + be_t wklRunnable2; // 0x1EE + be_t x1F0; // 0x1F0 + be_t x1F4; // 0x1F4 + be_t x1F8; // 0x1F8 + be_t x1FC; // 0x1FC + be_t x200; // 0x200 + be_t x204; // 0x204 + be_t x208; // 0x208 + be_t x20C; // 0x20C + be_t traceBuffer; // 0x210 + be_t traceMsgCount; // 0x218 + be_t traceMaxCount; // 0x21C + u8 wklUniqueId[0x10]; // 0x220 + u8 x230[0x280 - 0x230]; // 0x230 + be_t guid[4]; // 0x280 +}; + +static_assert(sizeof(SpursKernelContext) == 0x190, "Incorrect size for SpursKernelContext"); + +// The SPURS taskset policy module context. This resides at 0x2700 of the LS. +struct SpursTasksetContext +{ + u8 tempAreaTaskset[0x80]; // 0x2700 + u8 tempAreaTaskInfo[0x30]; // 0x2780 + be_t x27B0; // 0x27B0 + vm::bptr taskset; // 0x27B8 + be_t kernelMgmtAddr; // 0x27C0 + be_t syscallAddr; // 0x27C4 + be_t x27C8; // 0x27C8 + be_t spuNum; // 0x27CC + be_t dmaTagId; // 0x27D0 + be_t taskId; // 0x27D4 + u8 x27D8[0x2840 - 0x27D8]; // 0x27D8 + u8 moduleId[16]; // 0x2840 + u8 stackArea[0x2C80 - 0x2850]; // 0x2850 + be_t savedContextLr; // 0x2C80 + be_t savedContextSp; // 0x2C90 + be_t savedContextR80ToR127[48]; // 0x2CA0 + be_t savedContextFpscr; // 0x2FA0 + be_t savedWriteTagGroupQueryMask; // 0x2FB0 + be_t savedSpuWriteEventMask; // 0x2FB4 + be_t tasksetMgmtAddr; // 0x2FB8 + be_t guidAddr; // 0x2FBC + be_t x2FC0; // 0x2FC0 + be_t x2FC8; // 0x2FC8 + be_t taskExitCode; // 0x2FD0 + be_t x2FD4; // 0x2FD4 + u8 x2FD8[0x3000 - 0x2FD8]; // 0x2FD8 +}; + +static_assert(sizeof(SpursTasksetContext) == 0x900, "Incorrect size for SpursTasksetContext"); s64 spursAttachLv2EventQueue(vm::ptr spurs, u32 queue, vm::ptr port, s32 isDynamic, bool wasCreated); s64 spursWakeUp(PPUThread& CPU, vm::ptr spurs); diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp new file mode 100644 index 0000000000..d1dc487eeb --- /dev/null +++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp @@ -0,0 +1,1677 @@ +#include "stdafx.h" +#include "Emu/Memory/Memory.h" +#include "Emu/System.h" +#include "Emu/Cell/SPUThread.h" +#include "Emu/SysCalls/Modules.h" +#include "Emu/SysCalls/lv2/sys_lwmutex.h" +#include "Emu/SysCalls/lv2/sys_lwcond.h" +#include "Emu/SysCalls/lv2/sys_spu.h" +#include "Emu/SysCalls/Modules/cellSpurs.h" +#include "Loader/ELF32.h" +#include "Emu/FS/vfsStreamMemory.h" + +// +// SPURS utility functions +// +void cellSpursModulePutTrace(CellSpursTracePacket * packet, u32 dmaTagId); +u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status); +void cellSpursModuleExit(SPUThread & spu); + +bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag); +u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask); +u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll = true); +void spursHalt(SPUThread & spu); + +// +// SPURS Kernel functions +// +bool spursKernel1SelectWorkload(SPUThread & spu); +bool spursKernel2SelectWorkload(SPUThread & spu); +void spursKernelDispatchWorkload(SPUThread & spu, u64 widAndPollStatus); +bool spursKernelWorkloadExit(SPUThread & spu); +bool spursKernelEntry(SPUThread & spu); + +// +// SPURS System Service functions +// +bool spursSysServiceEntry(SPUThread & spu); +// TODO: Exit +void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt); +void spursSysServiceMain(SPUThread & spu, u32 pollStatus); +void spursSysServiceProcessRequests(SPUThread & spu, SpursKernelContext * ctxt); +void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt); +// TODO: Deactivate workload +void spursSysServiceUpdateShutdownCompletionEvents(SPUThread & spu, SpursKernelContext * ctxt, u32 wklShutdownBitSet); +void spursSysServiceTraceSaveCount(SPUThread & spu, SpursKernelContext * ctxt); +void spursSysServiceTraceUpdate(SPUThread & spu, SpursKernelContext * ctxt, u32 arg2, u32 arg3, u32 arg4); +// TODO: Deactivate trace +// TODO: System workload entry +void spursSysServiceCleanupAfterSystemWorkload(SPUThread & spu, SpursKernelContext * ctxt); + +// +// SPURS Taskset Policy Module functions +// +bool spursTasksetEntry(SPUThread & spu); +bool spursTasksetSyscallEntry(SPUThread & spu); +void spursTasksetResumeTask(SPUThread & spu); +void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs); +s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting); +void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus); +bool spursTasksetPollStatus(SPUThread & spu); +void spursTasksetExit(SPUThread & spu); +void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args); +s32 spursTasketSaveTaskContext(SPUThread & spu); +void spursTasksetDispatch(SPUThread & spu); +s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args); +void spursTasksetInit(SPUThread & spu, u32 pollStatus); +s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments); + +extern Module *cellSpurs; + +////////////////////////////////////////////////////////////////////////////// +// SPURS utility functions +////////////////////////////////////////////////////////////////////////////// + +/// Output trace information +void cellSpursModulePutTrace(CellSpursTracePacket * packet, u32 dmaTagId) { + // TODO: Implement this +} + +/// Check for execution right requests +u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); + + spu.GPR[3]._u32[3] = 1; + if (ctxt->spurs->m.flags1 & SF1_32_WORKLOADS) { + spursKernel2SelectWorkload(spu); + } else { + spursKernel1SelectWorkload(spu); + } + + auto result = spu.GPR[3]._u64[1]; + if (status) { + *status = (u32)result; + } + + u32 wklId = result >> 32; + return wklId == ctxt->wklCurrentId ? 0 : 1; +} + +/// Exit current workload +void cellSpursModuleExit(SPUThread & spu) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); + spu.SetBranch(ctxt->exitToKernelAddr); +} + +/// Execute a DMA operation +bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag) { + spu.WriteChannel(MFC_LSA, u128::from32r(lsa)); + spu.WriteChannel(MFC_EAH, u128::from32r((u32)(ea >> 32))); + spu.WriteChannel(MFC_EAL, u128::from32r((u32)ea)); + spu.WriteChannel(MFC_Size, u128::from32r(size)); + spu.WriteChannel(MFC_TagID, u128::from32r(tag)); + spu.WriteChannel(MFC_Cmd, u128::from32r(cmd)); + + if (cmd == MFC_GETLLAR_CMD || cmd == MFC_PUTLLC_CMD || cmd == MFC_PUTLLUC_CMD) { + u128 rv; + + spu.ReadChannel(rv, MFC_RdAtomicStat); + auto success = rv._u32[3] ? true : false; + success = cmd == MFC_PUTLLC_CMD ? !success : success; + return success; + } + + return true; +} + +/// Get the status of DMA operations +u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask) { + u128 rv; + + spu.WriteChannel(MFC_WrTagMask, u128::from32r(tagMask)); + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_IMMEDIATE)); + spu.ReadChannel(rv, MFC_RdTagStat); + return rv._u32[3]; +} + +/// Wait for DMA operations to complete +u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll) { + u128 rv; + + spu.WriteChannel(MFC_WrTagMask, u128::from32r(tagMask)); + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(waitForAll ? MFC_TAG_UPDATE_ALL : MFC_TAG_UPDATE_ANY)); + spu.ReadChannel(rv, MFC_RdTagStat); + return rv._u32[3]; +} + +/// Halt the SPU +void spursHalt(SPUThread & spu) { + spu.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_HALT); + spu.Stop(); +} + +////////////////////////////////////////////////////////////////////////////// +// SPURS kernel functions +////////////////////////////////////////////////////////////////////////////// + +/// Select a workload to run +bool spursKernel1SelectWorkload(SPUThread & spu) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); + + // The first and only argument to this function is a boolean that is set to false if the function + // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus. + // If the first argument is true then the shared data is not updated with the result. + const auto isPoll = spu.GPR[3]._u32[3]; + + u32 wklSelectedId; + u32 pollStatus; + + do { + // DMA and lock the first 0x80 bytes of spurs + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x100); + + // Calculate the contention (number of SPUs used) for each workload + u8 contention[CELL_SPURS_MAX_WORKLOAD]; + u8 pendingContention[CELL_SPURS_MAX_WORKLOAD]; + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + contention[i] = spurs->m.wklCurrentContention[i] - ctxt->wklLocContention[i]; + + // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably + // to prevent unnecessary jumps to the kernel + if (isPoll) { + pendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i]; + if (i != ctxt->wklCurrentId) { + contention[i] += pendingContention[i]; + } + } + } + + wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; + pollStatus = 0; + + // The system service has the highest priority. Select the system service if + // the system service message bit for this SPU is set. + if (spurs->m.sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) { + ctxt->spuIdling = 0; + if (!isPoll || ctxt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + // Clear the message bit + spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << ctxt->spuNum)); + } + } else { + // Caclulate the scheduling weight for each workload + u16 maxWeight = 0; + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + u16 runnable = ctxt->wklRunnable1 & (0x8000 >> i); + u16 wklSignal = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); + u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed(); + u8 idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed(); + u8 requestCount = readyCount + idleSpuCount; + + // For a workload to be considered for scheduling: + // 1. Its priority must not be 0 + // 2. The number of SPUs used by it must be less than the max contention for that workload + // 3. The workload should be in runnable state + // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) + // OR the workload must be signalled + // OR the workload flag is 0 and the workload is configured as the wokload flag receiver + if (runnable && ctxt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) { + if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) { + // The scheduling weight of the workload is formed from the following parameters in decreasing order of priority: + // 1. Wokload signal set or workload flag or ready count > contention + // 2. Priority of the workload on the SPU + // 3. Is the workload the last selected workload + // 4. Minimum contention of the workload + // 5. Number of SPUs that are being used by the workload (lesser the number, more the weight) + // 6. Is the workload executable same as the currently loaded executable + // 7. The workload id (lesser the number, more the weight) + u16 weight = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0; + weight |= (u16)(ctxt->priority[i] & 0x7F) << 16; + weight |= i == ctxt->wklCurrentId ? 0x80 : 0x00; + weight |= (contention[i] > 0 && spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00; + weight |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2; + weight |= ctxt->wklUniqueId[i] == ctxt->wklCurrentId ? 0x02 : 0x00; + weight |= 0x01; + + // In case of a tie the lower numbered workload is chosen + if (weight > maxWeight) { + wklSelectedId = i; + maxWeight = weight; + pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; + pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; + pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; + } + } + } + } + + // Not sure what this does. Possibly mark the SPU as idle/in use. + ctxt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; + + if (!isPoll || wklSelectedId == ctxt->wklCurrentId) { + // Clear workload signal for the selected workload + spurs->m.wklSignal1.write_relaxed(be_t::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); + spurs->m.wklSignal2.write_relaxed(be_t::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); + + // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s + if (wklSelectedId == spurs->m.wklFlagReceiver.read_relaxed()) { + spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); + } + } + } + + if (!isPoll) { + // Called by kernel + // Increment the contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + contention[wklSelectedId]++; + } + + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + spurs->m.wklCurrentContention[i] = contention[i]; + spurs->m.wklPendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i]; + ctxt->wklLocContention[i] = 0; + ctxt->wklLocPendingContention[i] = 0; + } + + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + ctxt->wklLocContention[wklSelectedId] = 1; + } + + ctxt->wklCurrentId = wklSelectedId; + } else if (wklSelectedId != ctxt->wklCurrentId) { + // Not called by kernel but a context switch is required + // Increment the pending contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + pendingContention[wklSelectedId]++; + } + + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + spurs->m.wklPendingContention[i] = pendingContention[i]; + ctxt->wklLocPendingContention[i] = 0; + } + + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + ctxt->wklLocPendingContention[wklSelectedId] = 1; + } + } else { + // Not called by kernel and no context switch is required + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + spurs->m.wklPendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i]; + ctxt->wklLocPendingContention[i] = 0; + } + } + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + u64 result = (u64)wklSelectedId << 32; + result |= pollStatus; + spu.GPR[3]._u64[1] = result; + return true; +} + +/// Select a workload to run +bool spursKernel2SelectWorkload(SPUThread & spu) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); + + // The first and only argument to this function is a boolean that is set to false if the function + // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus. + // If the first argument is true then the shared data is not updated with the result. + const auto isPoll = spu.GPR[3]._u32[3]; + + u32 wklSelectedId; + u32 pollStatus; + + do { + // DMA and lock the first 0x80 bytes of spurs + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x100); + + // Calculate the contention (number of SPUs used) for each workload + u8 contention[CELL_SPURS_MAX_WORKLOAD2]; + u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2]; + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { + contention[i] = spurs->m.wklCurrentContention[i & 0x0F] - ctxt->wklLocContention[i & 0x0F]; + contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4; + + // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably + // to prevent unnecessary jumps to the kernel + if (isPoll) { + pendingContention[i] = spurs->m.wklPendingContention[i & 0x0F] - ctxt->wklLocPendingContention[i & 0x0F]; + pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4; + if (i != ctxt->wklCurrentId) { + contention[i] += pendingContention[i]; + } + } + } + + wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; + pollStatus = 0; + + // The system service has the highest priority. Select the system service if + // the system service message bit for this SPU is set. + if (spurs->m.sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) { + // Not sure what this does. Possibly Mark the SPU as in use. + ctxt->spuIdling = 0; + if (!isPoll || ctxt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + // Clear the message bit + spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << ctxt->spuNum)); + } + } else { + // Caclulate the scheduling weight for each workload + u8 maxWeight = 0; + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { + auto j = i & 0x0F; + u16 runnable = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->wklRunnable1 & (0x8000 >> j) : ctxt->wklRunnable2 & (0x8000 >> j); + u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->priority[j] & 0x0F : ctxt->priority[j] >> 4; + u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4; + u16 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); + u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed(); + + // For a workload to be considered for scheduling: + // 1. Its priority must be greater than 0 + // 2. The number of SPUs used by it must be less than the max contention for that workload + // 3. The workload should be in runnable state + // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount) + // OR the workload must be signalled + // OR the workload flag is 0 and the workload is configured as the wokload receiver + if (runnable && priority > 0 && maxContention > contention[i]) { + if (wklFlag || wklSignal || readyCount > contention[i]) { + // The scheduling weight of the workload is equal to the priority of the workload for the SPU. + // The current workload is given a sligtly higher weight presumably to reduce the number of context switches. + // In case of a tie the lower numbered workload is chosen. + u8 weight = priority << 4; + if (ctxt->wklCurrentId == i) { + weight |= 0x04; + } + + if (weight > maxWeight) { + wklSelectedId = i; + maxWeight = weight; + pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0; + pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0; + pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0; + } + } + } + } + + // Not sure what this does. Possibly mark the SPU as idle/in use. + ctxt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0; + + if (!isPoll || wklSelectedId == ctxt->wklCurrentId) { + // Clear workload signal for the selected workload + spurs->m.wklSignal1.write_relaxed(be_t::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId))); + spurs->m.wklSignal2.write_relaxed(be_t::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId))); + + // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s + if (wklSelectedId == spurs->m.wklFlagReceiver.read_relaxed()) { + spurs->m.wklFlag.flag.write_relaxed(be_t::make(0xFFFFFFFF)); + } + } + } + + if (!isPoll) { + // Called by kernel + // Increment the contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + contention[wklSelectedId]++; + } + + for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { + spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4); + spurs->m.wklPendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i]; + ctxt->wklLocContention[i] = 0; + ctxt->wklLocPendingContention[i] = 0; + } + + ctxt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; + ctxt->wklCurrentId = wklSelectedId; + } else if (wklSelectedId != ctxt->wklCurrentId) { + // Not called by kernel but a context switch is required + // Increment the pending contention for the selected workload + if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + pendingContention[wklSelectedId]++; + } + + for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) { + spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4); + ctxt->wklLocPendingContention[i] = 0; + } + + ctxt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0; + } else { + // Not called by kernel and no context switch is required + for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + spurs->m.wklPendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i]; + ctxt->wklLocPendingContention[i] = 0; + } + } + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + u64 result = (u64)wklSelectedId << 32; + result |= pollStatus; + spu.GPR[3]._u64[1] = result; + return true; +} + +/// SPURS kernel dispatch workload +void spursKernelDispatchWorkload(SPUThread & spu, u64 widAndPollStatus) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); + auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; + + auto pollStatus = (u32)widAndPollStatus; + auto wid = (u32)(widAndPollStatus >> 32); + + // DMA in the workload info for the selected workload + auto wklInfoOffset = wid < CELL_SPURS_MAX_WORKLOAD ? offsetof(CellSpurs, m.wklInfo1[wid]) : + wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? offsetof(CellSpurs, m.wklInfo2[wid & 0xf]) : + offsetof(CellSpurs, m.wklInfoSysSrv); + spursDma(spu, MFC_GET_CMD, ctxt->spurs.addr() + wklInfoOffset, 0x3FFE0/*LSA*/, 0x20/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID); + spursDmaWaitForCompletion(spu, 0x80000000); + + // Load the workload to LS + auto wklInfo = vm::get_ptr(spu.ls_offset + 0x3FFE0); + if (ctxt->wklCurrentAddr != wklInfo->addr) { + switch (wklInfo->addr.addr()) { + case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD: + spu.RegisterHleFunction(0xA00, spursSysServiceEntry); + break; + case SPURS_IMG_ADDR_TASKSET_PM: + spu.RegisterHleFunction(0xA00, spursTasksetEntry); + break; + default: + spursDma(spu, MFC_GET_CMD, wklInfo-> addr.addr(), 0xA00/*LSA*/, wklInfo->size, CELL_SPURS_KERNEL_DMA_TAG_ID); + spursDmaWaitForCompletion(spu, 0x80000000); + break; + } + + ctxt->wklCurrentAddr = wklInfo->addr; + ctxt->wklCurrentUniqueId = wklInfo->uniqueId.read_relaxed(); + } + + if (!isKernel2) { + ctxt->moduleId[0] = 0; + ctxt->moduleId[1] = 0; + } + + // Run workload + spu.GPR[0]._u32[3] = ctxt->exitToKernelAddr; + spu.GPR[1]._u32[3] = 0x3FFB0; + spu.GPR[3]._u32[3] = 0x100; + spu.GPR[4]._u64[1] = wklInfo->arg; + spu.GPR[5]._u32[3] = pollStatus; + spu.SetBranch(0xA00); +} + +/// SPURS kernel workload exit +bool spursKernelWorkloadExit(SPUThread & spu) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); + auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; + + // Select next workload to run + spu.GPR[3].clear(); + if (isKernel2) { + spursKernel2SelectWorkload(spu); + } else { + spursKernel1SelectWorkload(spu); + } + + spursKernelDispatchWorkload(spu, spu.GPR[3]._u64[1]); + return false; +} + +/// SPURS kernel entry point +bool spursKernelEntry(SPUThread & spu) { + while (true) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + if (Emu.IsStopped()) { + return false; + } + } + + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); + memset(ctxt, 0, sizeof(SpursKernelContext)); + + // Save arguments + ctxt->spuNum = spu.GPR[3]._u32[3]; + ctxt->spurs.set(spu.GPR[4]._u64[1]); + + auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false; + + // Initialise the SPURS context to its initial values + ctxt->dmaTagId = CELL_SPURS_KERNEL_DMA_TAG_ID; + ctxt->wklCurrentUniqueId = 0x20; + ctxt->wklCurrentId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID; + ctxt->exitToKernelAddr = isKernel2 ? CELL_SPURS_KERNEL2_EXIT_ADDR : CELL_SPURS_KERNEL1_EXIT_ADDR; + ctxt->selectWorkloadAddr = isKernel2 ? CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR : CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR; + if (!isKernel2) { + ctxt->x1F0 = 0xF0020000; + ctxt->x200 = 0x20000; + ctxt->guid[0] = 0x423A3A02; + ctxt->guid[1] = 0x43F43A82; + ctxt->guid[2] = 0x43F26502; + ctxt->guid[3] = 0x420EB382; + } else { + ctxt->guid[0] = 0x43A08402; + ctxt->guid[1] = 0x43FB0A82; + ctxt->guid[2] = 0x435E9302; + ctxt->guid[3] = 0x43A3C982; + } + + // Register SPURS kernel HLE functions + spu.UnregisterHleFunctions(0, 0x40000/*LS_BOTTOM*/); + spu.RegisterHleFunction(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelEntry); + spu.RegisterHleFunction(ctxt->exitToKernelAddr, spursKernelWorkloadExit); + spu.RegisterHleFunction(ctxt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload); + + // Start the system service + spursKernelDispatchWorkload(spu, ((u64)CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) << 32); + return false; +} + +////////////////////////////////////////////////////////////////////////////// +// SPURS system workload functions +////////////////////////////////////////////////////////////////////////////// + +/// Entry point of the system service +bool spursSysServiceEntry(SPUThread & spu) { + auto ctxt = vm::get_ptr(spu.ls_offset + spu.GPR[3]._u32[3]); + auto arg = spu.GPR[4]._u64[1]; + auto pollStatus = spu.GPR[5]._u32[3]; + + if (ctxt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) { + spursSysServiceMain(spu, pollStatus); + } else { + // TODO: If we reach here it means the current workload was preempted to start the + // system workload. Need to implement this. + } + + cellSpursModuleExit(spu); + return false; +} + +/// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled +void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt) { + // Monitor only lock line reservation lost events + spu.WriteChannel(SPU_WrEventMask, u128::from32r(SPU_EVENT_LR)); + + bool shouldExit; + while (true) { + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x100); + + // Find the number of SPUs that are idling in this SPURS instance + u32 nIdlingSpus = 0; + for (u32 i = 0; i < 8; i++) { + if (spurs->m.spuIdling & (1 << i)) { + nIdlingSpus++; + } + } + + bool allSpusIdle = nIdlingSpus == spurs->m.nSpus ? true: false; + bool exitIfNoWork = spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false; + shouldExit = allSpusIdle && exitIfNoWork; + + // Check if any workloads can be scheduled + bool foundReadyWorkload = false; + if (spurs->m.sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) { + foundReadyWorkload = true; + } else { + if (spurs->m.flags1 & SF1_32_WORKLOADS) { + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) { + u32 j = i & 0x0F; + u16 runnable = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->wklRunnable1 & (0x8000 >> j) : ctxt->wklRunnable2 & (0x8000 >> j); + u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->priority[j] & 0x0F : ctxt->priority[j] >> 4; + u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4; + u8 contention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklCurrentContention[j] & 0x0F : spurs->m.wklCurrentContention[j] >> 4; + u16 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j); + u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed(); + + if (runnable && priority > 0 && maxContention > contention) { + if (wklFlag || wklSignal || readyCount > contention) { + foundReadyWorkload = true; + break; + } + } + } + } else { + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + u16 runnable = ctxt->wklRunnable1 & (0x8000 >> i); + u16 wklSignal = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i); + u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0; + u8 readyCount = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed(); + u8 idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed(); + u8 requestCount = readyCount + idleSpuCount; + + if (runnable && ctxt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > spurs->m.wklCurrentContention[i]) { + if (wklFlag || wklSignal || (readyCount != 0 && requestCount > spurs->m.wklCurrentContention[i])) { + foundReadyWorkload = true; + break; + } + } + } + } + } + + bool spuIdling = spurs->m.spuIdling & (1 << ctxt->spuNum) ? true : false; + if (foundReadyWorkload && shouldExit == false) { + spurs->m.spuIdling &= ~(1 << ctxt->spuNum); + } else { + spurs->m.spuIdling |= 1 << ctxt->spuNum; + } + + // If all SPUs are idling and the exit_if_no_work flag is set then the SPU thread group must exit. Otherwise wait for external events. + if (spuIdling && shouldExit == false && foundReadyWorkload == false) { + // The system service blocks by making a reservation and waiting on the lock line reservation lost event. + u128 r; + spu.ReadChannel(r, SPU_RdEventStat); + spu.WriteChannel(SPU_WrEventAck, u128::from32r(SPU_EVENT_LR)); + } + + auto dmaSuccess = spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + if (dmaSuccess && (shouldExit || foundReadyWorkload)) { + break; + } + } + + if (shouldExit) { + // TODO: exit spu thread group + } +} + +/// Main function for the system service +void spursSysServiceMain(SPUThread & spu, u32 pollStatus) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x100); + + if (ctxt->spurs.addr() % CellSpurs::align) { + spursHalt(spu); + return; + } + + // Initialise the system service if this is the first time its being started on this SPU + if (ctxt->sysSrvInitialised == 0) { + ctxt->sysSrvInitialised = 1; + + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + + do { + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + + // Halt if already initialised + if (spurs->m.sysSrvOnSpu & (1 << ctxt->spuNum)) { + spursHalt(spu); + return; + } + + spurs->m.sysSrvOnSpu |= 1 << ctxt->spuNum; + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + ctxt->traceBuffer = 0; + ctxt->traceMsgCount = -1; + spursSysServiceTraceUpdate(spu, ctxt, 1, 1, 0); + spursSysServiceCleanupAfterSystemWorkload(spu, ctxt); + + // Trace - SERVICE: INIT + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; + pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_INIT; + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); + } + + // Trace - START: Module='SYS ' + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_START; + memcpy(pkt.data.start.module, "SYS ", 4); + pkt.data.start.level = 1; // Policy module + pkt.data.start.ls = 0xA00 >> 2; + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); + + while (true) { + // Process requests for the system service + spursSysServiceProcessRequests(spu, ctxt); + +poll: + if (cellSpursModulePollStatus(spu, nullptr)) { + // Trace - SERVICE: EXIT + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; + pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_EXIT; + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); + + // Trace - STOP: GUID + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP; + pkt.data.stop = SPURS_GUID_SYS_WKL; + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); + + spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); + break; + } + + // If we reach here it means that either there are more system service messages to be processed + // or there are no workloads that can be scheduled. + + // If the SPU is not idling then process the remaining system service messages + if (ctxt->spuIdling == 0) { + continue; + } + + // If we reach here it means that the SPU is idling + + // Trace - SERVICE: WAIT + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_SERVICE; + pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_WAIT; + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); + + spursSysServiceIdleHandler(spu, ctxt); + goto poll; + } +} + +/// Process any requests +void spursSysServiceProcessRequests(SPUThread & spu, SpursKernelContext * ctxt) { + bool updateTrace = false; + bool updateWorkload = false; + bool terminate = false; + + do { + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + + // Terminate request + if (spurs->m.sysSrvMsgTerminate & (1 << ctxt->spuNum)) { + spurs->m.sysSrvOnSpu &= ~(1 << ctxt->spuNum); + terminate = true; + } + + // Update workload message + if (spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << ctxt->spuNum)) { + spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << ctxt->spuNum); + updateWorkload = true; + } + + // Update trace message + if (spurs->m.sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) { + updateTrace = true; + } + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + // Process update workload message + if (updateWorkload) { + spursSysServiceActivateWorkload(spu, ctxt); + } + + // Process update trace message + if (updateTrace) { + spursSysServiceTraceUpdate(spu, ctxt, 1, 0, 0); + } + + // Process terminate request + if (terminate) { + // TODO: Rest of the terminate processing + } +} + +/// Activate a workload +void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt) { + auto spurs = vm::get_ptr(spu.ls_offset + 0x100); + spursDma(spu, MFC_GET_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklInfo1), 0x30000/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID); + if (spurs->m.flags1 & SF1_32_WORKLOADS) { + spursDma(spu, MFC_GET_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklInfo2), 0x30200/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID); + } + + u32 wklShutdownBitSet = 0; + ctxt->wklRunnable1 = 0; + ctxt->wklRunnable2 = 0; + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + auto wklInfo1 = vm::get_ptr(spu.ls_offset + 0x30000); + + // Copy the priority of the workload for this SPU and its unique id to the LS + ctxt->priority[i] = wklInfo1[i].priority[ctxt->spuNum] == 0 ? 0 : 0x10 - wklInfo1[i].priority[ctxt->spuNum]; + ctxt->wklUniqueId[i] = wklInfo1[i].uniqueId.read_relaxed(); + + if (spurs->m.flags1 & SF1_32_WORKLOADS) { + auto wklInfo2 = vm::get_ptr(spu.ls_offset + 0x30200); + + // Copy the priority of the workload for this SPU to the LS + if (wklInfo2[i].priority[ctxt->spuNum]) { + ctxt->priority[i] |= (0x10 - wklInfo2[i].priority[ctxt->spuNum]) << 4; + } + } + } + + do { + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + // Update workload status and runnable flag based on the workload state + auto wklStatus = spurs->m.wklStatus1[i]; + if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { + spurs->m.wklStatus1[i] |= 1 << ctxt->spuNum; + ctxt->wklRunnable1 |= 0x8000 >> i; + } else { + spurs->m.wklStatus1[i] &= ~(1 << ctxt->spuNum); + } + + // If the workload is shutting down and if this is the last SPU from which it is being removed then + // add it to the shutdown bit set + if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { + if (((wklStatus & (1 << ctxt->spuNum)) != 0) && (spurs->m.wklStatus1[i] == 0)) { + spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); + wklShutdownBitSet |= 0x80000000u >> i; + } + } + + if (spurs->m.flags1 & SF1_32_WORKLOADS) { + // Update workload status and runnable flag based on the workload state + wklStatus = spurs->m.wklStatus2[i]; + if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) { + spurs->m.wklStatus2[i] |= 1 << ctxt->spuNum; + ctxt->wklRunnable2 |= 0x8000 >> i; + } else { + spurs->m.wklStatus2[i] &= ~(1 << ctxt->spuNum); + } + + // If the workload is shutting down and if this is the last SPU from which it is being removed then + // add it to the shutdown bit set + if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) { + if (((wklStatus & (1 << ctxt->spuNum)) != 0) && (spurs->m.wklStatus2[i] == 0)) { + spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE); + wklShutdownBitSet |= 0x8000 >> i; + } + } + } + } + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + if (wklShutdownBitSet) { + spursSysServiceUpdateShutdownCompletionEvents(spu, ctxt, wklShutdownBitSet); + } +} + +/// Update shutdown completion events +void spursSysServiceUpdateShutdownCompletionEvents(SPUThread & spu, SpursKernelContext * ctxt, u32 wklShutdownBitSet) { + // Mark the workloads in wklShutdownBitSet as completed and also generate a bit set of the completed + // workloads that have a shutdown completion hook registered + u32 wklNotifyBitSet; + u8 spuPort; + do { + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + + wklNotifyBitSet = 0; + spuPort = spurs->m.spuPort;; + for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) { + if (wklShutdownBitSet & (0x80000000u >> i)) { + spurs->m.wklEvent1[i] |= 0x01; + if (spurs->m.wklEvent1[i] & 0x02 || spurs->m.wklEvent1[i] & 0x10) { + wklNotifyBitSet |= 0x80000000u >> i; + } + } + + if (wklShutdownBitSet & (0x8000 >> i)) { + spurs->m.wklEvent2[i] |= 0x01; + if (spurs->m.wklEvent2[i] & 0x02 || spurs->m.wklEvent2[i] & 0x10) { + wklNotifyBitSet |= 0x8000 >> i; + } + } + } + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + if (wklNotifyBitSet) { + // TODO: sys_spu_thread_send_event(spuPort, 0, wklNotifyMask); + } +} + +/// Update the trace count for this SPU +void spursSysServiceTraceSaveCount(SPUThread & spu, SpursKernelContext * ctxt) { + if (ctxt->traceBuffer) { + auto traceInfo = vm::ptr::make((u32)(ctxt->traceBuffer - (ctxt->spurs->m.traceStartIndex[ctxt->spuNum] << 4))); + traceInfo->count[ctxt->spuNum] = ctxt->traceMsgCount; + } +} + +/// Update trace control +void spursSysServiceTraceUpdate(SPUThread & spu, SpursKernelContext * ctxt, u32 arg2, u32 arg3, u32 arg4) { + bool notify; + + u8 sysSrvMsgUpdateTrace; + do { + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + + sysSrvMsgUpdateTrace = spurs->m.sysSrvMsgUpdateTrace; + spurs->m.sysSrvMsgUpdateTrace &= ~(1 << ctxt->spuNum); + spurs->m.xCC &= ~(1 << ctxt->spuNum); + spurs->m.xCC |= arg2 << ctxt->spuNum; + + notify = false; + if (((sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) != 0) && (spurs->m.sysSrvMsgUpdateTrace == 0) && (spurs->m.xCD != 0)) { + spurs->m.xCD = 0; + notify = true; + } + + if (arg4 && spurs->m.xCD != 0) { + spurs->m.xCD = 0; + notify = true; + } + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + // Get trace parameters from CellSpurs and store them in the LS + if (((sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) != 0) || (arg3 != 0)) { + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.traceBuffer), 0x80/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x80 - offsetof(CellSpurs, m.traceBuffer)); + + if (ctxt->traceMsgCount != 0xFF || spurs->m.traceBuffer.addr() == 0) { + spursSysServiceTraceSaveCount(spu, ctxt); + } else { + spursDma(spu, MFC_GET_CMD, spurs->m.traceBuffer.addr() & 0xFFFFFFFC, 0x2C00/*LSA*/, 0x80/*size*/, ctxt->dmaTagId); + auto traceBuffer = vm::get_ptr(spu.ls_offset + 0x2C00); + ctxt->traceMsgCount = traceBuffer->count[ctxt->spuNum]; + } + + ctxt->traceBuffer = spurs->m.traceBuffer.addr() + (spurs->m.traceStartIndex[ctxt->spuNum] << 4); + ctxt->traceMaxCount = spurs->m.traceStartIndex[1] - spurs->m.traceStartIndex[0]; + if (ctxt->traceBuffer == 0) { + ctxt->traceMsgCount = 0; + } + } + + if (notify) { + auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + sys_spu_thread_send_event(spu, spurs->m.spuPort, 2, 0); + } +} + +/// Restore state after executing the system workload +void spursSysServiceCleanupAfterSystemWorkload(SPUThread & spu, SpursKernelContext * ctxt) { + u8 wklId; + + do { + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + + if (spurs->m.sysSrvWorkload[ctxt->spuNum] == 0xFF) { + return; + } + + wklId = spurs->m.sysSrvWorkload[ctxt->spuNum]; + spurs->m.sysSrvWorkload[ctxt->spuNum] = 0xFF; + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + spursSysServiceActivateWorkload(spu, ctxt); + + do { + spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x100); + + if (wklId >= CELL_SPURS_MAX_WORKLOAD) { + spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10; + spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1); + } else { + spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01; + spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1); + } + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + // Set the current workload id to the id of the pre-empted workload since cellSpursModulePutTrace + // uses the current worload id to determine the workload to which the trace belongs + auto wklIdSaved = ctxt->wklCurrentId; + ctxt->wklCurrentId = wklId; + + // Trace - STOP: GUID + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP; + pkt.data.stop = SPURS_GUID_SYS_WKL; + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); + + ctxt->wklCurrentId = wklIdSaved; +} + +////////////////////////////////////////////////////////////////////////////// +// SPURS taskset policy module functions +////////////////////////////////////////////////////////////////////////////// + +enum SpursTasksetRequest { + SPURS_TASKSET_REQUEST_POLL_SIGNAL = -1, + SPURS_TASKSET_REQUEST_DESTROY_TASK = 0, + SPURS_TASKSET_REQUEST_YIELD_TASK = 1, + SPURS_TASKSET_REQUEST_WAIT_SIGNAL = 2, + SPURS_TASKSET_REQUEST_POLL = 3, + SPURS_TASKSET_REQUEST_WAIT_WKL_FLAG = 4, + SPURS_TASKSET_REQUEST_SELECT_TASK = 5, + SPURS_TASKSET_REQUEST_RECV_WKL_FLAG = 6, +}; + +/// Taskset PM entry point +bool spursTasksetEntry(SPUThread & spu) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); + auto kernelCtxt = vm::get_ptr(spu.ls_offset + spu.GPR[3]._u32[3]); + + auto arg = spu.GPR[4]._u64[1]; + auto pollStatus = spu.GPR[5]._u32[3]; + + // Initialise memory and save args + memset(ctxt, 0, sizeof(*ctxt)); + ctxt->taskset.set(arg); + memcpy(ctxt->moduleId, "SPURSTASK MODULE", sizeof(ctxt->moduleId)); + ctxt->kernelMgmtAddr = spu.GPR[3]._u32[3]; + ctxt->syscallAddr = CELL_SPURS_TASKSET_PM_SYSCALL_ADDR; + ctxt->spuNum = kernelCtxt->spuNum; + ctxt->dmaTagId = kernelCtxt->dmaTagId; + ctxt->taskId = 0xFFFFFFFF; + + // Register SPURS takset policy module HLE functions + spu.UnregisterHleFunctions(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, 0x40000/*LS_BOTTOM*/); + spu.RegisterHleFunction(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, spursTasksetEntry); + spu.RegisterHleFunction(ctxt->syscallAddr, spursTasksetSyscallEntry); + + // Initialise the taskset policy module + spursTasksetInit(spu, pollStatus); + + // Dispatch + spursTasksetDispatch(spu); + return false; +} + +/// Entry point into the Taskset PM for task syscalls +bool spursTasksetSyscallEntry(SPUThread & spu) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); + + // Save task context + ctxt->savedContextLr = spu.GPR[0]; + ctxt->savedContextSp = spu.GPR[1]; + for (auto i = 0; i < 48; i++) { + ctxt->savedContextR80ToR127[i] = spu.GPR[80 + i]; + } + + // Handle the syscall + spu.GPR[3]._u32[3] = spursTasksetProcessSyscall(spu, spu.GPR[3]._u32[3], spu.GPR[4]._u32[3]); + + // Resume the previously executing task if the syscall did not cause a context switch + if (spu.m_is_branch == false) { + spursTasksetResumeTask(spu); + } + + return false; +} + +/// Resume a task +void spursTasksetResumeTask(SPUThread & spu) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); + + // Restore task context + spu.GPR[0] = ctxt->savedContextLr; + spu.GPR[1] = ctxt->savedContextSp; + for (auto i = 0; i < 48; i++) { + spu.GPR[80 + i] = ctxt->savedContextR80ToR127[i]; + } + + spu.SetBranch(spu.GPR[0]._u32[3]); +} + +/// Start a task +void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); + auto taskset = vm::get_ptr(spu.ls_offset + 0x2700); + + spu.GPR[2].clear(); + spu.GPR[3] = taskArgs._u128; + spu.GPR[4]._u64[1] = taskset->m.args; + spu.GPR[4]._u64[0] = taskset->m.spurs.addr(); + for (auto i = 5; i < 128; i++) { + spu.GPR[i].clear(); + } + + spu.SetBranch(ctxt->savedContextLr.value()._u32[3]); +} + +/// Process a request and update the state of the taskset +s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting) { + auto kernelCtxt = vm::get_ptr(spu.ls_offset + 0x100); + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); + + s32 rc = CELL_OK; + s32 numNewlyReadyTasks; + do { + spursDma(spu, MFC_GETLLAR_CMD, ctxt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto taskset = vm::get_ptr(spu.ls_offset + 0x2700); + + // Verify taskset state is valid + auto _0 = be_t::make(u128::from32(0)); + if ((taskset->m.waiting & taskset->m.running) != _0 || (taskset->m.ready & taskset->m.pending_ready) != _0 || + ((taskset->m.running | taskset->m.ready | taskset->m.pending_ready | taskset->m.signalled | taskset->m.waiting) & be_t::make(~taskset->m.enabled.value())) != _0) { + spursHalt(spu); + return CELL_OK; + } + + // Find the number of tasks that have become ready since the last iteration + auto newlyReadyTasks = (taskset->m.signalled | taskset->m.pending_ready).value() & ~taskset->m.ready.value(); + numNewlyReadyTasks = 0; + for (auto i = 0; i < 128; i++) { + if (newlyReadyTasks._bit[i]) { + numNewlyReadyTasks++; + } + } + + u128 readyButNotRunning; + u8 selectedTaskId; + auto running = taskset->m.running.value(); + auto waiting = taskset->m.waiting.value(); + auto enabled = taskset->m.enabled.value(); + auto signalled = (taskset->m.signalled & (taskset->m.ready | taskset->m.pending_ready)).value(); + auto ready = (taskset->m.signalled | taskset->m.ready | taskset->m.pending_ready).value(); + + switch (request) { + case SPURS_TASKSET_REQUEST_POLL_SIGNAL: + rc = signalled._bit[ctxt->taskId] ? 1 : 0; + signalled._bit[ctxt->taskId] = false; + break; + case SPURS_TASKSET_REQUEST_DESTROY_TASK: + numNewlyReadyTasks--; + running._bit[ctxt->taskId] = false; + enabled._bit[ctxt->taskId] = false; + signalled._bit[ctxt->taskId] = false; + ready._bit[ctxt->taskId] = false; + break; + case SPURS_TASKSET_REQUEST_YIELD_TASK: + running._bit[ctxt->taskId] = false; + waiting._bit[ctxt->taskId] = true; + break; + case SPURS_TASKSET_REQUEST_WAIT_SIGNAL: + if (signalled._bit[ctxt->taskId] == false) { + numNewlyReadyTasks--; + running._bit[ctxt->taskId] = false; + waiting._bit[ctxt->taskId] = true; + signalled._bit[ctxt->taskId] = false; + ready._bit[ctxt->taskId] = false; + } + break; + case SPURS_TASKSET_REQUEST_POLL: + readyButNotRunning = ready & ~running; + if (taskset->m.wkl_flag_wait_task < CELL_SPURS_MAX_TASK) { + readyButNotRunning = readyButNotRunning & ~(u128::fromBit(taskset->m.wkl_flag_wait_task)); + } + + rc = readyButNotRunning != _0 ? 1 : 0; + break; + case SPURS_TASKSET_REQUEST_WAIT_WKL_FLAG: + if (taskset->m.wkl_flag_wait_task == 0x81) { + // A workload flag is already pending so consume it + taskset->m.wkl_flag_wait_task = 0x80; + rc = 0; + } else if (taskset->m.wkl_flag_wait_task == 0x80) { + // No tasks are waiting for the workload flag. Mark this task as waiting for the workload flag. + taskset->m.wkl_flag_wait_task = ctxt->taskId; + running._bit[ctxt->taskId] = false; + waiting._bit[ctxt->taskId] = true; + rc = 1; + numNewlyReadyTasks--; + } else { + // Another task is already waiting for the workload signal + rc = CELL_SPURS_TASK_ERROR_BUSY; + } + break; + case SPURS_TASKSET_REQUEST_SELECT_TASK: + readyButNotRunning = ready & ~running; + if (taskset->m.wkl_flag_wait_task < CELL_SPURS_MAX_TASK) { + readyButNotRunning = readyButNotRunning & ~(u128::fromBit(taskset->m.wkl_flag_wait_task)); + } + + // Select a task from the readyButNotRunning set to run. Start from the task after the last scheduled task to ensure fairness. + for (selectedTaskId = taskset->m.last_scheduled_task + 1; selectedTaskId < 128; selectedTaskId++) { + if (readyButNotRunning._bit[selectedTaskId]) { + break; + } + } + + if (selectedTaskId == 128) { + for (selectedTaskId = 0; selectedTaskId < taskset->m.last_scheduled_task + 1; selectedTaskId++) { + if (readyButNotRunning._bit[selectedTaskId]) { + break; + } + } + + if (selectedTaskId == taskset->m.last_scheduled_task + 1) { + selectedTaskId = CELL_SPURS_MAX_TASK; + } + } + + *taskId = selectedTaskId; + *isWaiting = waiting._bit[selectedTaskId < CELL_SPURS_MAX_TASK ? selectedTaskId : 0] ? 1 : 0; + if (selectedTaskId != CELL_SPURS_MAX_TASK) { + taskset->m.last_scheduled_task = selectedTaskId; + running._bit[selectedTaskId] = true; + waiting._bit[selectedTaskId] = false; + } + break; + case SPURS_TASKSET_REQUEST_RECV_WKL_FLAG: + if (taskset->m.wkl_flag_wait_task < CELL_SPURS_MAX_TASK) { + // There is a task waiting for the workload flag + taskset->m.wkl_flag_wait_task = 0x80; + rc = 1; + numNewlyReadyTasks++; + } else { + // No tasks are waiting for the workload flag + taskset->m.wkl_flag_wait_task = 0x81; + rc = 0; + } + break; + default: + spursHalt(spu); + return CELL_OK; + } + + taskset->m.pending_ready = _0; + taskset->m.running = running; + taskset->m.waiting = waiting; + taskset->m.enabled = enabled; + taskset->m.signalled = signalled; + taskset->m.ready = ready; + } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + // Increment the ready count of the workload by the number of tasks that have become ready + do { + spursDma(spu, MFC_GETLLAR_CMD, kernelCtxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/); + auto spurs = vm::get_ptr(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1)); + + s32 readyCount = kernelCtxt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[kernelCtxt->wklCurrentId].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[kernelCtxt->wklCurrentId & 0x0F].read_relaxed(); + readyCount += numNewlyReadyTasks; + readyCount = readyCount < 0 ? 0 : readyCount > 0xFF ? 0xFF : readyCount; + + if (kernelCtxt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD) { + spurs->m.wklReadyCount1[kernelCtxt->wklCurrentId].write_relaxed(readyCount); + } else { + spurs->m.wklIdleSpuCountOrReadyCount2[kernelCtxt->wklCurrentId & 0x0F].write_relaxed(readyCount); + } + } while (spursDma(spu, MFC_PUTLLC_CMD, kernelCtxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false); + + return rc; +} + +/// Process pollStatus received from the SPURS kernel +void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) { + if (pollStatus & CELL_SPURS_MODULE_POLL_STATUS_FLAG) { + spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_RECV_WKL_FLAG, nullptr, nullptr); + } +} + +/// Check execution rights +bool spursTasksetPollStatus(SPUThread & spu) { + u32 pollStatus; + + if (cellSpursModulePollStatus(spu, &pollStatus)) { + return true; + } + + spursTasksetProcessPollStatus(spu, pollStatus); + return false; +} + +/// Exit the Taskset PM +void spursTasksetExit(SPUThread & spu) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); + + // Trace - STOP + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = 0x54; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_STOP + pkt.data.stop = SPURS_GUID_TASKSET_PM; + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); + + // Not sure why this check exists. Perhaps to check for memory corruption. + if (memcmp(ctxt->moduleId, "SPURSTASK MODULE", 16) != 0) { + spursHalt(spu); + } + + cellSpursModuleExit(spu); +} + +/// Invoked when a task exits +void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); + + spursDma(spu, MFC_GET_CMD, addr & 0xFFFFFF80, 0x10000/*LSA*/, (addr & 0x7F) << 11/*size*/, 0); + spursDmaWaitForCompletion(spu, 1); + + spu.GPR[3]._u64[1] = ctxt->taskset.addr(); + spu.GPR[4]._u32[3] = taskId; + spu.GPR[5]._u32[3] = exitCode; + spu.GPR[6]._u64[1] = args; + spu.FastCall(0x10000); +} + +/// Save the context of a task +s32 spursTasketSaveTaskContext(SPUThread & spu) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); + auto taskInfo = vm::get_ptr(spu.ls_offset + 0x2780); + + spursDmaWaitForCompletion(spu, 0xFFFFFFFF); + + if (taskInfo->context_save_storage_and_alloc_ls_blocks == 0) { + return CELL_SPURS_TASK_ERROR_STAT; + } + + u32 allocLsBlocks = taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F; + u32 lsBlocks = 0; + for (auto i = 0; i < 128; i++) { + if (taskInfo->ls_pattern._u128.value()._bit[i]) { + lsBlocks++; + } + } + + if (lsBlocks > allocLsBlocks) { + return CELL_SPURS_TASK_ERROR_STAT; + } + + // Make sure the stack is area is specified in the ls pattern + for (auto i = (ctxt->savedContextSp.value()._u32[3]) >> 11; i < 128; i++) { + if (taskInfo->ls_pattern._u128.value()._bit[i] == false) { + return CELL_SPURS_TASK_ERROR_STAT; + } + } + + // Get the processor context + u128 r; + spu.FPSCR.Read(r); + ctxt->savedContextFpscr = r; + spu.ReadChannel(r, SPU_RdEventMask); + ctxt->savedSpuWriteEventMask = r._u32[3]; + spu.ReadChannel(r, MFC_RdTagMask); + ctxt->savedWriteTagGroupQueryMask = r._u32[3]; + + // Store the processor context + u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull; + spursDma(spu, MFC_PUT_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, ctxt->dmaTagId); + + // Save LS context + for (auto i = 6; i < 128; i++) { + if (taskInfo->ls_pattern._u128.value()._bit[i]) { + // TODO: Combine DMA requests for consecutive blocks into a single request + spursDma(spu, MFC_PUT_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId); + } + } + + spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); + return CELL_OK; +} + +/// Taskset dispatcher +void spursTasksetDispatch(SPUThread & spu) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); + auto taskset = vm::get_ptr(spu.ls_offset + 0x2700); + + u32 taskId; + u32 isWaiting; + spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_SELECT_TASK, &taskId, &isWaiting); + if (taskId >= CELL_SPURS_MAX_TASK) { + spursTasksetExit(spu); + return; + } + + ctxt->taskId = taskId; + + // DMA in the task info for the selected task + spursDma(spu, MFC_GET_CMD, ctxt->taskset.addr() + offsetof(CellSpursTaskset, m.task_info[taskId]), 0x2780/*LSA*/, sizeof(CellSpursTaskset::TaskInfo), ctxt->dmaTagId); + spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); + auto taskInfo = vm::get_ptr(spu.ls_offset + 0x2780); + auto elfAddr = taskInfo->elf_addr.addr().value(); + taskInfo->elf_addr.set(taskInfo->elf_addr.addr() & 0xFFFFFFFFFFFFFFF8ull); + + // Trace - Task: Incident=dispatch + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_TASK; + pkt.data.task.incident = CELL_SPURS_TRACE_TASK_DISPATCH; + pkt.data.task.taskId = taskId; + cellSpursModulePutTrace(&pkt, CELL_SPURS_KERNEL_DMA_TAG_ID); + + if (isWaiting == 0) { + // If we reach here it means that the task is being started and not being resumed + memset(vm::get_ptr(spu.ls_offset + CELL_SPURS_TASK_TOP), 0, CELL_SPURS_TASK_BOTTOM - CELL_SPURS_TASK_TOP); + ctxt->guidAddr = CELL_SPURS_TASK_TOP; + + u32 entryPoint; + u32 lowestLoadAddr; + if (spursTasksetLoadElf(spu, &entryPoint, &lowestLoadAddr, taskInfo->elf_addr.addr(), false) != CELL_OK) { + spursHalt(spu); + return; + } + + spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); + + ctxt->savedContextLr = u128::from32r(entryPoint); + ctxt->guidAddr = lowestLoadAddr; + ctxt->tasksetMgmtAddr = 0x2700; + ctxt->x2FC0 = 0; + ctxt->taskExitCode = isWaiting; + ctxt->x2FD4 = elfAddr & 5; // TODO: Figure this out + + if ((elfAddr & 5) == 1) { + spursDma(spu, MFC_GET_CMD, ctxt->taskset.addr() + offsetof(CellSpursTaskset2, m.task_exit_code[taskId]), 0x2FC0/*LSA*/, 0x10/*size*/, ctxt->dmaTagId); + } + + // Trace - GUID + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_GUID; + pkt.data.guid = 0; // TODO: Put GUID of taskId here + cellSpursModulePutTrace(&pkt, 0x1F); + + if (elfAddr & 2) { // TODO: Figure this out + spu.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_STOP); + spu.Stop(); + return; + } + + spursTasksetStartTask(spu, taskInfo->args); + } else { + if (taskset->m.enable_clear_ls) { + memset(vm::get_ptr(spu.ls_offset + CELL_SPURS_TASK_TOP), 0, CELL_SPURS_TASK_BOTTOM - CELL_SPURS_TASK_TOP); + } + + // If the entire LS is saved then there is no need to load the ELF as it will be be saved in the context save area as well + if (taskInfo->ls_pattern._u128.value() != u128::from64r(0x03FFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull)) { + // Load the ELF + u32 entryPoint; + if (spursTasksetLoadElf(spu, &entryPoint, nullptr, taskInfo->elf_addr.addr(), true) != CELL_OK) { + spursHalt(spu); + return; + } + } + + // Load saved context from main memory to LS + u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull; + spursDma(spu, MFC_GET_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, ctxt->dmaTagId); + for (auto i = 6; i < 128; i++) { + if (taskInfo->ls_pattern._u128.value()._bit[i]) { + // TODO: Combine DMA requests for consecutive blocks into a single request + spursDma(spu, MFC_GET_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId); + } + } + + spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId); + + // Restore saved registers + spu.FPSCR.Write(ctxt->savedContextFpscr.value()); + spu.WriteChannel(MFC_WrTagMask, u128::from32r(ctxt->savedWriteTagGroupQueryMask)); + spu.WriteChannel(SPU_WrEventMask, u128::from32r(ctxt->savedSpuWriteEventMask)); + + // Trace - GUID + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_GUID; + pkt.data.guid = 0; // TODO: Put GUID of taskId here + cellSpursModulePutTrace(&pkt, 0x1F); + + if (elfAddr & 2) { // TODO: Figure this out + spu.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_STOP); + spu.Stop(); + return; + } + + spu.GPR[3].clear(); + spursTasksetResumeTask(spu); + } +} + +/// Process a syscall request +s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); + auto taskset = vm::get_ptr(spu.ls_offset + 0x2700); + + // If the 0x10 bit is set in syscallNum then its the 2nd version of the + // syscall (e.g. cellSpursYield2 instead of cellSpursYield) and so don't wait + // for DMA completion + if ((syscallNum & 0x10) == 0) { + spursDmaWaitForCompletion(spu, 0xFFFFFFFF); + } + + s32 rc = 0; + u32 incident = 0; + switch (syscallNum & 0x0F) { + case CELL_SPURS_TASK_SYSCALL_EXIT: + if (ctxt->x2FD4 == 4 || (ctxt->x2FC0 & 0xFFFFFFFF) != 0) { // TODO: Figure this out + if (ctxt->x2FD4 != 4) { + spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_DESTROY_TASK, nullptr, nullptr); + } + + auto addr = ctxt->x2FD4 == 4 ? taskset->m.x78 : ctxt->x2FC0; + auto args = ctxt->x2FD4 == 4 ? 0 : ctxt->x2FC8; + spursTasksetOnTaskExit(spu, addr, ctxt->taskId, ctxt->taskExitCode, args); + } + + incident = CELL_SPURS_TRACE_TASK_EXIT; + break; + case CELL_SPURS_TASK_SYSCALL_YIELD: + if (spursTasksetPollStatus(spu) || spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_POLL, nullptr, nullptr)) { + // If we reach here then it means that either another task can be scheduled or another workload can be scheduled + // Save the context of the current task + rc = spursTasketSaveTaskContext(spu); + if (rc == CELL_OK) { + spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_YIELD_TASK, nullptr, nullptr); + incident = CELL_SPURS_TRACE_TASK_YIELD; + } + } + break; + case CELL_SPURS_TASK_SYSCALL_WAIT_SIGNAL: + if (spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_POLL_SIGNAL, nullptr, nullptr) == 0) { + rc = spursTasketSaveTaskContext(spu); + if (rc == CELL_OK) { + if (spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_WAIT_SIGNAL, nullptr, nullptr) == 0) { + incident = CELL_SPURS_TRACE_TASK_WAIT; + } + } + } + break; + case CELL_SPURS_TASK_SYSCALL_POLL: + rc = spursTasksetPollStatus(spu) ? CELL_SPURS_TASK_POLL_FOUND_WORKLOAD : 0; + rc |= spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_POLL, nullptr, nullptr) ? CELL_SPURS_TASK_POLL_FOUND_TASK : 0; + break; + case CELL_SPURS_TASK_SYSCALL_RECV_WKL_FLAG: + if (args == 0) { // TODO: Figure this out + spursHalt(spu); + } + + if (spursTasksetPollStatus(spu) || spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_WAIT_WKL_FLAG, nullptr, nullptr) != 1) { + rc = spursTasketSaveTaskContext(spu); + if (rc == CELL_OK) { + incident = CELL_SPURS_TRACE_TASK_WAIT; + } + } + break; + default: + rc = CELL_SPURS_TASK_ERROR_NOSYS; + break; + } + + if (incident) { + // Trace - TASK + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = CELL_SPURS_TRACE_TAG_TASK; + pkt.data.task.incident = incident; + pkt.data.task.taskId = ctxt->taskId; + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); + + // Clear the GUID of the task + memset(vm::get_ptr(spu.ls_offset + ctxt->guidAddr), 0, 0x10); + + if (spursTasksetPollStatus(spu)) { + spursTasksetExit(spu); + } else { + spursTasksetDispatch(spu); + } + } + + return rc; +} + +/// Initialise the Taskset PM +void spursTasksetInit(SPUThread & spu, u32 pollStatus) { + auto ctxt = vm::get_ptr(spu.ls_offset + 0x2700); + auto kernelCtxt = vm::get_ptr(spu.ls_offset + 0x100); + + kernelCtxt->moduleId[0] = 'T'; + kernelCtxt->moduleId[1] = 'K'; + + // Trace - START: Module='TKST' + CellSpursTracePacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.header.tag = 0x52; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_START + memcpy(pkt.data.start.module, "TKST", 4); + pkt.data.start.level = 2; + pkt.data.start.ls = 0xA00 >> 2; + cellSpursModulePutTrace(&pkt, ctxt->dmaTagId); + + spursTasksetProcessPollStatus(spu, pollStatus); +} + +/// Load an ELF +s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments) { + if (elfAddr == 0 || (elfAddr & 0x0F) != 0) { + return CELL_SPURS_TASK_ERROR_INVAL; + } + + vfsStreamMemory stream(elfAddr); + loader::handlers::elf32 loader; + auto rc = loader.init(stream); + if (rc != loader::handler::ok) { + return CELL_SPURS_TASK_ERROR_NOEXEC; + } + + u32 _lowestLoadAddr = CELL_SPURS_TASK_BOTTOM; + for (auto & phdr : loader.m_phdrs) { + if (phdr.data_be.p_paddr >= CELL_SPURS_TASK_BOTTOM) { + break; + } + + if (phdr.data_be.p_type == 1/*PT_LOAD*/) { + if (skipWriteableSegments == false || (phdr.data_be.p_flags & 2/*PF_W*/) == 0) { + if (phdr.data_be.p_vaddr < CELL_SPURS_TASK_TOP || + phdr.data_be.p_vaddr + phdr.data_be.p_memsz > CELL_SPURS_TASK_BOTTOM) { + return CELL_SPURS_TASK_ERROR_FAULT; + } + + _lowestLoadAddr = _lowestLoadAddr > phdr.data_be.p_vaddr ? phdr.data_be.p_vaddr : _lowestLoadAddr; + } + } + } + + loader.load_data(spu.ls_offset, skipWriteableSegments); + *entryPoint = loader.m_ehdr.data_be.e_entry; + if (*lowestLoadAddr) { + *lowestLoadAddr = _lowestLoadAddr; + } + + return CELL_OK; +} diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp index f3fb23b383..8def286a85 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp +++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp @@ -1099,3 +1099,86 @@ s32 sys_raw_spu_get_spu_cfg(u32 id, vm::ptr value) *value = (u32)t->cfg.value; return CELL_OK; } + +void sys_spu_thread_exit(SPUThread & spu, s32 status) +{ + // Cancel any pending status update requests + u128 r; + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(0)); + while (spu.GetChannelCount(MFC_RdTagStat) != 1); + spu.ReadChannel(r, MFC_RdTagStat); + + // Wait for all pending DMA operations to complete + spu.WriteChannel(MFC_WrTagMask, u128::from32r(0xFFFFFFFF)); + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_ALL)); + spu.ReadChannel(r, MFC_RdTagStat); + + spu.WriteChannel(SPU_WrOutMbox, u128::from32r(status)); + spu.StopAndSignal(0x102); +} + +void sys_spu_thread_group_exit(SPUThread & spu, s32 status) +{ + // Cancel any pending status update requests + u128 r; + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(0)); + while (spu.GetChannelCount(MFC_RdTagStat) != 1); + spu.ReadChannel(r, MFC_RdTagStat); + + // Wait for all pending DMA operations to complete + spu.WriteChannel(MFC_WrTagMask, u128::from32r(0xFFFFFFFF)); + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_ALL)); + spu.ReadChannel(r, MFC_RdTagStat); + + spu.WriteChannel(SPU_WrOutMbox, u128::from32r(status)); + spu.StopAndSignal(0x101); +} + +s32 sys_spu_thread_send_event(SPUThread & spu, u8 spup, u32 data0, u32 data1) +{ + if (spup > 0x3F) + { + return CELL_EINVAL; + } + + if (spu.GetChannelCount(SPU_RdInMbox)) + { + return CELL_EBUSY; + } + + spu.WriteChannel(SPU_WrOutMbox, u128::from32r(data1)); + spu.WriteChannel(SPU_WrOutIntrMbox, u128::from32r((spup << 24) | (data0 & 0x00FFFFFF))); + + u128 r; + spu.ReadChannel(r, SPU_RdInMbox); + return r._u32[3]; +} + +s32 sys_spu_thread_switch_system_module(SPUThread & spu, u32 status) +{ + if (spu.GetChannelCount(SPU_RdInMbox)) + { + return CELL_EBUSY; + } + + // Cancel any pending status update requests + u128 r; + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(0)); + while (spu.GetChannelCount(MFC_RdTagStat) != 1); + spu.ReadChannel(r, MFC_RdTagStat); + + // Wait for all pending DMA operations to complete + spu.WriteChannel(MFC_WrTagMask, u128::from32r(0xFFFFFFFF)); + spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_ALL)); + spu.ReadChannel(r, MFC_RdTagStat); + + do + { + spu.WriteChannel(SPU_WrOutMbox, u128::from32r(status)); + spu.StopAndSignal(0x120); + spu.ReadChannel(r, SPU_RdInMbox); + } + while (r._u32[3] == CELL_EBUSY); + + return r._u32[3]; +} diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.h b/rpcs3/Emu/SysCalls/lv2/sys_spu.h index e71c606bf7..e129455758 100644 --- a/rpcs3/Emu/SysCalls/lv2/sys_spu.h +++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.h @@ -204,3 +204,9 @@ s32 sys_raw_spu_get_int_stat(u32 id, u32 class_id, vm::ptr stat); s32 sys_raw_spu_read_puint_mb(u32 id, vm::ptr value); s32 sys_raw_spu_set_spu_cfg(u32 id, u32 value); s32 sys_raw_spu_get_spu_cfg(u32 id, vm::ptr value); + +// SPU Calls +void sys_spu_thread_exit(SPUThread & spu, s32 status); +void sys_spu_thread_group_exit(SPUThread & spu, s32 status); +s32 sys_spu_thread_send_event(SPUThread & spu, u8 spup, u32 data0, u32 data1); +s32 sys_spu_thread_switch_system_module(SPUThread & spu, u32 status); diff --git a/rpcs3/Loader/ELF32.cpp b/rpcs3/Loader/ELF32.cpp index 1d0ea7a16e..4eef027421 100644 --- a/rpcs3/Loader/ELF32.cpp +++ b/rpcs3/Loader/ELF32.cpp @@ -413,7 +413,7 @@ namespace loader return ok; } - handler::error_code elf32::load_data(u32 offset) + handler::error_code elf32::load_data(u32 offset, bool skip_writeable) { Elf_Machine machine = (Elf_Machine)(u16)(m_ehdr.is_le() ? m_ehdr.data_le.e_machine : m_ehdr.data_be.e_machine); @@ -436,6 +436,11 @@ namespace loader return loading_error; } + if (skip_writeable == true && (phdr.data_be.p_flags & 2/*PF_W*/) != 0) + { + continue; + } + if (filesz) { m_stream->Seek(handler::get_stream_offset() + offset); diff --git a/rpcs3/Loader/ELF32.h b/rpcs3/Loader/ELF32.h index d3d37f543c..6a13b6f7cb 100644 --- a/rpcs3/Loader/ELF32.h +++ b/rpcs3/Loader/ELF32.h @@ -132,7 +132,7 @@ namespace loader error_code init(vfsStream& stream) override; error_code load() override; - error_code load_data(u32 offset); + error_code load_data(u32 offset, bool skip_writeable = false); virtual ~elf32() = default; }; diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 5327e220e3..56bc415e11 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -37,6 +37,7 @@ + @@ -621,6 +622,7 @@ _UNICODE;UNICODE;%(PreprocessorDefinitions) stdafx.h Async + true false @@ -638,6 +640,7 @@ _UNICODE;UNICODE;LLVM_AVAILABLE;%(PreprocessorDefinitions) stdafx.h Async + true @@ -658,6 +661,7 @@ _UNICODE;UNICODE;MSVC_CRT_MEMLEAK_DETECTION;%(PreprocessorDefinitions) stdafx.h Async + true @@ -675,6 +679,7 @@ Use stdafx.h Async + true @@ -695,6 +700,7 @@ stdafx.h Async LLVM_AVAILABLE;%(PreprocessorDefinitions) + true diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index ea11ba34b9..49c65e3094 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -668,6 +668,9 @@ Emu\Audio\XAudio2 + + Emu\SysCalls\Modules + Emu\CPU\ARMv7 diff --git a/rpcs3/stdafx.h b/rpcs3/stdafx.h index 66792d0ea4..f43f0a0cd5 100644 --- a/rpcs3/stdafx.h +++ b/rpcs3/stdafx.h @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include #include "Utilities/GNU.h"