diff --git a/Utilities/BEType.h b/Utilities/BEType.h
index 4965eac336..8cf5e9a89c 100644
--- a/Utilities/BEType.h
+++ b/Utilities/BEType.h
@@ -1,5 +1,7 @@
 #pragma once
 
+#define IS_LE_MACHINE
+
 union _CRT_ALIGN(16) u128
 {
 	u64 _u64[2];
@@ -136,16 +138,28 @@ union _CRT_ALIGN(16) u128
 			}
 		};
 
+		// Index 0 returns the MSB and index 127 returns the LSB
 		bit_element operator [] (u32 index)
 		{
 			assert(index < 128);
-			return bit_element(data[index / 64], 1ull << (index % 64));
+
+#ifdef IS_LE_MACHINE
+			return bit_element(data[1 - (index >> 6)], 0x8000000000000000ull >> (index & 0x3F));
+#else
+			return bit_element(data[index >> 6], 0x8000000000000000ull >> (index & 0x3F));
+#endif
 		}
 
+		// Index 0 returns the MSB and index 127 returns the LSB
 		const bool operator [] (u32 index) const
 		{
 			assert(index < 128);
-			return (data[index / 64] & (1ull << (index % 64))) != 0;
+
+#ifdef IS_LE_MACHINE
+			return (data[1 - (index >> 6)] & (0x8000000000000000ull >> (index & 0x3F))) != 0;
+#else
+			return (data[index >> 6] & (0x8000000000000000ull >> (index & 0x3F))) != 0;
+#endif
 		}
 
 	} _bit;
@@ -509,8 +523,6 @@ struct be_storage_t<T, 16>
 	typedef u128 type;
 };
 
-#define IS_LE_MACHINE
-
 template<typename T, typename T2 = T>
 class be_t
 {
diff --git a/rpcs3/Emu/Cell/MFC.h b/rpcs3/Emu/Cell/MFC.h
index a6c731d3da..0b669deb97 100644
--- a/rpcs3/Emu/Cell/MFC.h
+++ b/rpcs3/Emu/Cell/MFC.h
@@ -35,6 +35,14 @@ enum
 	MFC_GETLLAR_SUCCESS = 4,
 };
 
+// MFC Write Tag Status Update Request Channel (ch23) operations
+enum 
+{
+	MFC_TAG_UPDATE_IMMEDIATE = 0,
+	MFC_TAG_UPDATE_ANY       = 1,
+	MFC_TAG_UPDATE_ALL       = 2,
+};
+
 enum
 {
 	MFC_SPU_TO_PPU_MAILBOX_STATUS_MASK      = 0x000000FF,
diff --git a/rpcs3/Emu/Cell/SPUInterpreter.h b/rpcs3/Emu/Cell/SPUInterpreter.h
index 151eb4e436..7c56b4c9ac 100644
--- a/rpcs3/Emu/Cell/SPUInterpreter.h
+++ b/rpcs3/Emu/Cell/SPUInterpreter.h
@@ -1316,10 +1316,7 @@ private:
 	
 	void FSCRRD(u32 rt)
 	{
-		CPU.GPR[rt]._u32[3] = CPU.FPSCR._u32[3];
-		CPU.GPR[rt]._u32[2] = CPU.FPSCR._u32[2];
-		CPU.GPR[rt]._u32[1] = CPU.FPSCR._u32[1];
-		CPU.GPR[rt]._u32[0] = CPU.FPSCR._u32[0];
+		CPU.FPSCR.Read(CPU.GPR[rt]);
 	}
 	void FESD(u32 rt, u32 ra)
 	{
@@ -1373,10 +1370,7 @@ private:
 	}
 	void FSCRWR(u32 rt, u32 ra)
 	{
-		CPU.FPSCR._u32[3] = CPU.GPR[ra]._u32[3] & 0x00000F07;
-		CPU.FPSCR._u32[2] = CPU.GPR[ra]._u32[2] & 0x00003F07;
-		CPU.FPSCR._u32[1] = CPU.GPR[ra]._u32[1] & 0x00003F07;
-		CPU.FPSCR._u32[0] = CPU.GPR[ra]._u32[0] & 0x00000F07;
+		CPU.FPSCR.Write(CPU.GPR[ra]);
 	}
 	void DFTSV(u32 rt, u32 ra, s32 i7)
 	{
diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp
index e944fcd60d..c4cababc01 100644
--- a/rpcs3/Emu/Cell/SPUThread.cpp
+++ b/rpcs3/Emu/Cell/SPUThread.cpp
@@ -1003,7 +1003,14 @@ void SPUThread::StopAndSignal(u32 code)
 
 	case 0x003:
 	{
-		GPR[3]._u64[1] = m_code3_func(*this);
+		auto iter = m_addr_to_hle_function_map.find(PC);
+		assert(iter != m_addr_to_hle_function_map.end());
+
+		auto return_to_caller = iter->second(*this);
+		if (return_to_caller)
+		{
+			SetBranch(GPR[0]._u32[3] & 0x3fffc);
+		}
 		break;
 	}
 
diff --git a/rpcs3/Emu/Cell/SPUThread.h b/rpcs3/Emu/Cell/SPUThread.h
index bab5f1aa2d..9c0baa5335 100644
--- a/rpcs3/Emu/Cell/SPUThread.h
+++ b/rpcs3/Emu/Cell/SPUThread.h
@@ -248,6 +248,24 @@ public:
 	{
 		_u32[1+slice] |= exceptions;
 	}
+
+	// Write the FPSCR
+	void Write(u128 & r)
+	{
+		_u32[3] = r._u32[3] & 0x00000F07;
+		_u32[2] = r._u32[2] & 0x00003F07;
+		_u32[1] = r._u32[1] & 0x00003F07;
+		_u32[0] = r._u32[0] & 0x00000F07;
+	}
+
+	// Read the FPSCR
+	void Read(u128 & r)
+	{
+		r._u32[3] = _u32[3];
+		r._u32[2] = _u32[2];
+		r._u32[1] = _u32[1];
+		r._u32[0] = _u32[0];
+	}
 };
 
 union SPU_SNRConfig_hdr
@@ -287,6 +305,8 @@ public:
 	u32 m_event_mask;
 	u32 m_events;
 
+	std::unordered_map<u32, std::function<bool(SPUThread& SPU)>> m_addr_to_hle_function_map;
+
 	struct IntrTag
 	{
 		u32 enabled; // 1 == true
@@ -506,8 +526,35 @@ public:
 	void WriteLS64 (const u32 lsa, const u64&  data) const { vm::write64 (lsa + m_offset, data); }
 	void WriteLS128(const u32 lsa, const u128& data) const { vm::write128(lsa + m_offset, data); }
 
+	void RegisterHleFunction(u32 addr, std::function<bool(SPUThread & SPU)> function)
+	{
+		m_addr_to_hle_function_map[addr] = function;
+		WriteLS32(addr, 0x00000003); // STOP 3
+	}
+
+	void UnregisterHleFunction(u32 addr)
+	{
+		WriteLS32(addr, 0x00200000); // NOP
+		m_addr_to_hle_function_map.erase(addr);
+	}
+
+	void UnregisterHleFunctions(u32 start_addr, u32 end_addr)
+	{
+		for (auto iter = m_addr_to_hle_function_map.begin(); iter != m_addr_to_hle_function_map.end();)
+		{
+			if (iter->first >= start_addr && iter->first <= end_addr)
+			{
+				WriteLS32(iter->first, 0x00200000); // NOP
+				m_addr_to_hle_function_map.erase(iter++);
+			}
+			else
+			{
+				iter++;
+			}
+		}
+	}
+
 	std::function<void(SPUThread& SPU)> m_custom_task;
-	std::function<u64(SPUThread& SPU)> m_code3_func;
 
 public:
 	SPUThread(CPUThreadType type = CPU_THREAD_SPU);
@@ -606,7 +653,7 @@ public:
 		for (auto &arg : values)
 		{
 			u32 arg_size = align(u32(arg.size() + 1), stack_align);
-			u32 arg_addr = Memory.MainMem.AllocAlign(arg_size, stack_align);
+			u32 arg_addr = (u32)Memory.MainMem.AllocAlign(arg_size, stack_align);
 
 			std::strcpy(vm::get_ptr<char>(arg_addr), arg.c_str());
 
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
index f6f07dcfc6..18eaca3722 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.cpp
@@ -26,13 +26,17 @@ extern u32 libsre;
 extern u32 libsre_rtoc;
 #endif
 
+bool spursKernelEntry(SPUThread & spu);
+s64 cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u32 id);
+s64 _cellSpursSendSignal(vm::ptr<CellSpursTaskset> taskset, u32 taskID);
+
 s64 spursCreateLv2EventQueue(vm::ptr<CellSpurs> spurs, u32& queue_id, vm::ptr<u8> port, s32 size, u64 name_u64)
 {
 #ifdef PRX_DEBUG_XXX
 	vm::var<be_t<u32>> queue;
-	s32 res = cb_call<s32, vm::ptr<CellSpurs>, vm::ptr<u32>, vm::ptr<u8>, s32, u32>(GetCurrentPPUThread(), libsre + 0xB14C, libsre_rtoc,
+	s32 res = cb_call<s32, vm::ptr<CellSpurs>, vm::ptr<be_t<u32>>, vm::ptr<u8>, s32, u32>(GetCurrentPPUThread(), libsre + 0xB14C, libsre_rtoc,
 		spurs, queue, port, size, vm::read32(libsre_rtoc - 0x7E2C));
-	queue_id = queue;
+	queue_id = queue.value();
 	return res;
 #endif
 
@@ -42,7 +46,7 @@ s64 spursCreateLv2EventQueue(vm::ptr<CellSpurs> spurs, u32& queue_id, vm::ptr<u8
 		return CELL_EAGAIN; // rough
 	}
 
-	if (s32 res = spursAttachLv2EventQueue(spurs, queue_id, port, 1, true))
+	if (s32 res = (s32)spursAttachLv2EventQueue(spurs, queue_id, port, 1, true))
 	{
 		assert(!"spursAttachLv2EventQueue() failed");
 	}
@@ -106,21 +110,21 @@ s64 spursInit(
 	}
 	spurs->m.xCC = 0;
 	spurs->m.xCD = 0;
-	spurs->m.xCE = 0;
+	spurs->m.sysSrvMsgUpdateTrace = 0;
 	for (u32 i = 0; i < 8; i++)
 	{
-		spurs->m.xC0[i] = -1;
+		spurs->m.sysSrvWorkload[i] = -1;
 	}
 
 	// default or system workload:
 #ifdef PRX_DEBUG
-	spurs->m.wklSysG.pm.set(be_t<u64>::make(vm::read32(libsre_rtoc - 0x7EA4)));
-	spurs->m.wklSysG.size = 0x2200;
+	spurs->m.wklInfoSysSrv.addr.set(be_t<u64>::make(vm::read32(libsre_rtoc - 0x7EA4)));
+	spurs->m.wklInfoSysSrv.size = 0x2200;
 #else
-	spurs->m.wklSysG.pm.set(be_t<u64>::make(0x100)); // wrong 64-bit address
+	spurs->m.wklInfoSysSrv.addr.set(be_t<u64>::make(SPURS_IMG_ADDR_SYS_SRV_WORKLOAD));
 #endif
-	spurs->m.wklSysG.data = 0;
-	spurs->m.wklSysG.copy.write_relaxed(0xff);
+	spurs->m.wklInfoSysSrv.arg = 0;
+	spurs->m.wklInfoSysSrv.uniqueId.write_relaxed(0xff);
 	u32 sem;
 	for (u32 i = 0; i < 0x10; i++)
 	{
@@ -151,7 +155,8 @@ s64 spursInit(
 		assert(!"spu_image_import() failed");
 	}
 #else
-	spurs->m.spuImg.addr = Memory.Alloc(0x40000, 4096);
+	spurs->m.spuImg.addr        = (u32)Memory.Alloc(0x40000, 4096);
+	spurs->m.spuImg.entry_point = isSecond ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR;
 #endif
 
 	s32 tgt = SYS_SPU_THREAD_GROUP_TYPE_NORMAL;
@@ -175,322 +180,11 @@ s64 spursInit(
 	name += "CellSpursKernel0";
 	for (s32 num = 0; num < nSpus; num++, name[name.size() - 1]++)
 	{
-		spurs->m.spus[num] = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, 0, 0, 0, 0, [spurs, num, isSecond](SPUThread& SPU)
-		{
-#ifdef PRX_DEBUG_XXX
-			SPU.GPR[3]._u32[3] = num;
-			SPU.GPR[4]._u64[1] = spurs.addr();
-			return SPU.FastCall(SPU.PC);
+		auto spu = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, num, spurs.addr(), 0, 0);
+#ifndef PRX_DEBUG_XXX
+		spu->RegisterHleFunction(spurs->m.spuImg.entry_point, spursKernelEntry);
 #endif
-
-			// code replacement:
-			{
-				const u32 addr = /*SPU.ReadLS32(0x1e0) +*/ 8; //SPU.ReadLS32(0x1e4);
-				SPU.WriteLS32(addr + 0, 3); // hack for cellSpursModulePollStatus
-				SPU.WriteLS32(addr + 4, 0x35000000); // bi $0
-				SPU.WriteLS32(0x1e4, addr);
-
-				SPU.WriteLS32(SPU.ReadLS32(0x1e0), 2); // hack for cellSpursModuleExit
-			}
-
-			if (!isSecond) SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // first kernel
-			{
-				LV2_LOCK(0); // TODO: lock-free implementation if possible
-
-				const u32 arg1 = SPU.GPR[3]._u32[3];
-				u32 var0 = SPU.ReadLS32(0x1d8);
-				u32 var1 = SPU.ReadLS32(0x1dc);
-				u128 wklA = vm::read128(spurs.addr() + 0x20);
-				u128 wklB = vm::read128(spurs.addr() + 0x30);
-				u128 savedA = SPU.ReadLS128(0x180);
-				u128 savedB = SPU.ReadLS128(0x190);
-				u128 vAA = u128::sub8(wklA, savedA);
-				u128 vBB = u128::sub8(wklB, savedB);
-				u128 vM1 = {}; if (var1 <= 15) vM1.u8r[var1] = 0xff;
-				u128 vAABB = (arg1 == 0) ? vAA : u128::add8(vAA, u128::andnot(vM1, vBB));
-				
-				u32 vNUM = 0x20;
-				u64 vRES = 0x20ull << 32;
-				u128 vSET = {};
-
-				if (spurs->m.x72.read_relaxed() & (1 << num))
-				{
-					SPU.WriteLS8(0x1eb, 0); // var4
-					if (arg1 == 0 || var1 == 0x20)
-					{
-						spurs->m.x72._and_not(1 << num);
-					}
-				}
-				else
-				{
-					u128 wklReadyCount0 = vm::read128(spurs.addr() + 0x0);
-					u128 wklReadyCount1 = vm::read128(spurs.addr() + 0x10);
-					u128 savedC = SPU.ReadLS128(0x1A0);
-					u128 savedD = SPU.ReadLS128(0x1B0);
-					u128 vRC = u128::add8(u128::minu8(wklReadyCount0, u128::from8p(8)), u128::minu8(wklReadyCount1, u128::from8p(8)));
-					u32 wklFlag = spurs->m.wklFlag.flag.read_relaxed();
-					u32 flagRecv = spurs->m.flagRecv.read_relaxed();
-					u128 vFM = u128::fromV(g_imm_table.fsmb_table[(wklFlag == 0) && (flagRecv < 16) ? 0x8000 >> flagRecv : 0]);
-					u128 wklSet1 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSet1.read_relaxed()]);
-					u128 vFMS1 = vFM | wklSet1;
-					u128 vFMV1 = u128::fromV(g_imm_table.fsmb_table[(var1 < 16) ? 0x8000 >> var1 : 0]);
-					u32 var5 = SPU.ReadLS32(0x1ec);
-					u128 wklMinCnt = vm::read128(spurs.addr() + 0x40);
-					u128 wklMaxCnt = vm::read128(spurs.addr() + 0x50);
-					u128 vCC = u128::andnot(vFMS1, u128::eq8(wklReadyCount0, {}) | u128::leu8(vRC, vAABB)) |
-						u128::leu8(wklMaxCnt, vAABB) |
-						u128::eq8(savedC, {}) |
-						u128::fromV(g_imm_table.fsmb_table[(~var5) >> 16]);
-					u128 vCCH1 = u128::andnot(vCC,
-						u128::from8p(0x80) & (vFMS1 | u128::gtu8(wklReadyCount0, vAABB)) |
-						u128::from8p(0x7f) & savedC);
-					u128 vCCL1 = u128::andnot(vCC,
-						u128::from8p(0x80) & vFMV1 |
-						u128::from8p(0x40) & u128::gtu8(vAABB, {}) & u128::gtu8(wklMinCnt, vAABB) |
-						u128::from8p(0x3c) & u128::fromV(_mm_slli_epi32(u128::sub8(u128::from8p(8), vAABB).vi, 2)) |
-						u128::from8p(0x02) & u128::eq8(savedD, u128::from8p((u8)var0)) |
-						u128::from8p(0x01));
-					u128 vSTAT =
-						u128::from8p(0x01) & u128::gtu8(wklReadyCount0, vAABB) |
-						u128::from8p(0x02) & wklSet1 |
-						u128::from8p(0x04) & vFM;
-
-					for (s32 i = 0, max = -1; i < 0x10; i++)
-					{
-						const s32 value = ((s32)vCCH1.u8r[i] << 8) | ((s32)vCCL1.u8r[i]);
-						if (value > max && (vCC.u8r[i] & 1) == 0)
-						{
-							vNUM = i;
-							max = value;
-						}
-					}
-
-					if (vNUM < 0x10)
-					{
-						vRES = ((u64)vNUM << 32) | vSTAT.u8r[vNUM];
-						vSET.u8r[vNUM] = 0x01;
-					}
-
-					SPU.WriteLS8(0x1eb, vNUM == 0x20);
-
-					if (!arg1 || var1 == vNUM)
-					{
-						spurs->m.wklSet1._and_not(be_t<u16>::make((u16)(vNUM < 16 ? 0x8000 >> vNUM : 0)));
-						if (vNUM == flagRecv && wklFlag == 0)
-						{
-							spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(-1));
-						}
-					}
-				}
-
-				if (arg1 == 0)
-				{
-					vm::write128(spurs.addr() + 0x20, u128::add8(vAA, vSET)); // update wklA
-
-					SPU.WriteLS128(0x180, vSET); // update savedA
-					SPU.WriteLS32(0x1dc, vNUM); // update var1
-				}
-
-				if (arg1 == 1 && vNUM != var1)
-				{
-					vm::write128(spurs.addr() + 0x30, u128::add8(vBB, vSET)); // update wklB
-
-					SPU.WriteLS128(0x190, vSET); // update savedB
-				}
-				else
-				{
-					vm::write128(spurs.addr() + 0x30, vBB); // update wklB
-
-					SPU.WriteLS128(0x190, {}); // update savedB
-				}
-
-				return vRES;
-			};
-			else SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // second kernel
-			{
-				LV2_LOCK(0); // TODO: lock-free implementation if possible
-
-				const u32 arg1 = SPU.GPR[3]._u32[3];
-				u32 var0 = SPU.ReadLS32(0x1d8);
-				u32 var1 = SPU.ReadLS32(0x1dc);
-				u128 wklA = vm::read128(spurs.addr() + 0x20);
-				u128 wklB = vm::read128(spurs.addr() + 0x30);
-				u128 savedA = SPU.ReadLS128(0x180);
-				u128 savedB = SPU.ReadLS128(0x190);
-				u128 vAA = u128::sub8(wklA, savedA);
-				u128 vBB = u128::sub8(wklB, savedB);
-				u128 vM1 = {}; if (var1 <= 31) vM1.u8r[var1 & 0xf] = (var1 <= 15) ? 0xf : 0xf0;
-				u128 vAABB = (arg1 == 0) ? vAA : u128::add8(vAA, u128::andnot(vM1, vBB));
-
-				u32 vNUM = 0x20;
-				u64 vRES = 0x20ull << 32;
-				u128 vSET = {};
-
-				if (spurs->m.x72.read_relaxed() & (1 << num))
-				{
-					SPU.WriteLS8(0x1eb, 0); // var4
-					if (arg1 == 0 || var1 == 0x20)
-					{
-						spurs->m.x72._and_not(1 << num);
-					}
-				}
-				else
-				{
-					u128 wklReadyCount0 = vm::read128(spurs.addr() + 0x0);
-					u128 wklReadyCount1 = vm::read128(spurs.addr() + 0x10);
-					u128 savedC = SPU.ReadLS128(0x1A0);
-					u128 wklMaxCnt = vm::read128(spurs.addr() + 0x50);
-					u32 wklFlag = spurs->m.wklFlag.flag.read_relaxed();
-					u32 flagRecv = spurs->m.flagRecv.read_relaxed();
-					u128 wklSet1 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSet1.read_relaxed()]);
-					u128 wklSet2 = u128::fromV(g_imm_table.fsmb_table[spurs->m.wklSet2.read_relaxed()]);
-					u128 vABL = vAABB & u128::from8p(0x0f);
-					u128 vABH = u128::fromV(_mm_srli_epi32((vAABB & u128::from8p(0xf0)).vi, 4));
-					u32 var5 = SPU.ReadLS32(0x1ec);
-					u128 v5L = u128::fromV(g_imm_table.fsmb_table[var5 >> 16]);
-					u128 v5H = u128::fromV(g_imm_table.fsmb_table[(u16)var5]);
-					u128 vFML = u128::fromV(g_imm_table.fsmb_table[(wklFlag == 0) && (flagRecv < 16) ? 0x8000 >> flagRecv : 0]);
-					u128 vFMH = u128::fromV(g_imm_table.fsmb_table[(u16)((wklFlag == 0) && (flagRecv < 32) ? 0x80000000 >> flagRecv : 0)]);
-					u128 vCL = u128::fromV(_mm_slli_epi32((savedC & u128::from8p(0x0f)).vi, 4));
-					u128 vCH = savedC & u128::from8p(0xf0);
-					u128 vABRL = u128::gtu8(wklReadyCount0, vABL);
-					u128 vABRH = u128::gtu8(wklReadyCount1, vABH);
-					u128 vCCL = v5L & u128::gtu8(vCL, {}) & u128::gtu8(wklMaxCnt & u128::from8p(0x0f), vABL) & (wklSet1 | vFML | vABRL);
-					u128 vCCH = v5H & u128::gtu8(vCH, {}) & u128::gtu8(u128::fromV(_mm_srli_epi32((wklMaxCnt & u128::from8p(0xf0)).vi, 4)), vABH) & (wklSet2 | vFMH | vABRH);
-					u128 v1H = {}; if (var1 <= 31 && var1 > 15) v1H.u8r[var1 & 0xf] = 4;
-					u128 v1L = {}; if (var1 <= 15) v1L.u8r[var1] = 4;
-					u128 vCH1 = (v1H | vCH & u128::from8p(0xFB)) & vCCH;
-					u128 vCL1 = (v1L | vCL & u128::from8p(0xFB)) & vCCL;
-					u128 vSTATL = vABRL & u128::from8p(1) | wklSet1 & u128::from8p(2) | vFML & u128::from8p(4);
-					u128 vSTATH = vABRH & u128::from8p(1) | wklSet2 & u128::from8p(2) | vFMH & u128::from8p(4);
-
-					s32 max = -1;
-					for (u32 i = 0; i < 0x10; i++)
-					{
-						const s32 value = vCL1.u8r[i];
-						if (value > max && (vCCL.u8r[i] & 1))
-						{
-							vNUM = i;
-							max = value;
-						}
-					}
-					for (u32 i = 16; i < 0x20; i++)
-					{
-						const s32 value = vCH1.u8r[i];
-						if (value > max && (vCCH.u8r[i] & 1))
-						{
-							vNUM = i;
-							max = value;
-						}
-					}
-
-					if (vNUM < 0x10)
-					{
-						vRES = ((u64)vNUM << 32) | vSTATL.u8r[vNUM];
-						vSET.u8r[vNUM] = 0x01;
-					}
-					else if (vNUM < 0x20)
-					{
-						vRES = ((u64)vNUM << 32) | vSTATH.u8r[vNUM & 0xf];
-						vSET.u8r[vNUM] = 0x10;
-					}
-
-					SPU.WriteLS8(0x1eb, vNUM == 0x20);
-
-					if (!arg1 || var1 == vNUM)
-					{
-						spurs->m.wklSet1._and_not(be_t<u16>::make((u16)(vNUM < 16 ? 0x8000 >> vNUM : 0)));
-						spurs->m.wklSet2._and_not(be_t<u16>::make((u16)(0x80000000 >> vNUM)));
-						if (vNUM == flagRecv && wklFlag == 0)
-						{
-							spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(-1));
-						}
-					}
-				}
-
-				if (arg1 == 0)
-				{
-					vm::write128(spurs.addr() + 0x20, u128::add8(vAA, vSET)); // update wklA
-
-					SPU.WriteLS128(0x180, vSET); // update savedA
-					SPU.WriteLS32(0x1dc, vNUM); // update var1
-				}
-
-				if (arg1 == 1 && vNUM != var1)
-				{
-					vm::write128(spurs.addr() + 0x30, u128::add8(vBB, vSET)); // update wklB
-
-					SPU.WriteLS128(0x190, vSET); // update savedB
-				}
-				else
-				{
-					vm::write128(spurs.addr() + 0x30, vBB); // update wklB
-
-					SPU.WriteLS128(0x190, {}); // update savedB
-				}
-
-				return vRES;
-			};
-			//SPU.m_code3_func = [spurs, num](SPUThread& SPU) -> u64 // test
-			//{
-			//	LV2_LOCK(0);
-			//	SPU.FastCall(0x290);
-			//	u64 vRES = SPU.GPR[3]._u64[1];
-			//	return vRES;
-			//};
-
-			SPU.WriteLS128(0x1c0, u128::from32r(0, spurs.addr(), num, 0x1f));
-
-			u32 wid = 0x20;
-			u32 stat = 0;
-			while (true)
-			{
-				if (Emu.IsStopped())
-				{
-					cellSpurs->Warning("Spurs Kernel aborted");
-					return;
-				}
-
-				// get current workload info:
-				auto& wkl = wid <= 15 ? spurs->m.wklG1[wid] : (wid <= 31 && isSecond ? spurs->m.wklG2[wid & 0xf] : spurs->m.wklSysG);
-
-				if (SPU.ReadLS64(0x1d0) != wkl.pm.addr())
-				{
-					// load executable code:
-					memcpy(vm::get_ptr<void>(SPU.ls_offset + 0xa00), wkl.pm.get_ptr(), wkl.size);
-					SPU.WriteLS64(0x1d0, wkl.pm.addr());
-					SPU.WriteLS32(0x1d8, wkl.copy.read_relaxed());
-				}
-				
-				if (!isSecond) SPU.WriteLS16(0x1e8, 0);
-
-				// run workload:
-				SPU.GPR[1]._u32[3] = 0x3FFB0;
-				SPU.GPR[3]._u32[3] = 0x100;
-				SPU.GPR[4]._u64[1] = wkl.data;
-				SPU.GPR[5]._u32[3] = stat;
-				SPU.FastCall(0xa00);
-
-				// check status:
-				auto status = SPU.SPU.Status.GetValue();
-				if (status == SPU_STATUS_STOPPED_BY_STOP)
-				{
-					return;
-				}
-				else
-				{
-					assert(status == SPU_STATUS_RUNNING);
-				}
-
-				// get workload id:
-				SPU.GPR[3].clear();
-				assert(SPU.m_code3_func);
-				u64 res = SPU.m_code3_func(SPU);
-				stat = (u32)(res);
-				wid = (u32)(res >> 32);
-			}
-			
-		})->GetId();
+		spurs->m.spus[num] = spu->GetId();
 	}
 
 	if (flags & SAF_SPU_PRINTF_ENABLED)
@@ -512,8 +206,8 @@ s64 spursInit(
 		assert(!"lwcond_create() failed");
 	}
 
-	spurs->m.flags1 = (flags & SAF_EXIT_IF_NO_WORK ? SF1_EXIT_IF_NO_WORK : 0) | (isSecond ? SF1_IS_SECOND : 0);
-	spurs->m.flagRecv.write_relaxed(0xff);
+	spurs->m.flags1 = (flags & SAF_EXIT_IF_NO_WORK ? SF1_EXIT_IF_NO_WORK : 0) | (isSecond ? SF1_32_WORKLOADS : 0);
+	spurs->m.wklFlagReceiver.write_relaxed(0xff);
 	spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(-1));
 	spurs->_u8[0xD64] = 0;
 	spurs->_u8[0xD65] = 0;
@@ -521,7 +215,7 @@ s64 spursInit(
 	spurs->m.ppuPriority = ppuPriority;
 
 	u32 queue;
-	if (s32 res = spursCreateLv2EventQueue(spurs, queue, vm::ptr<u8>::make(spurs.addr() + 0xc9), 0x2a, *(u64*)"_spuPrv"))
+	if (s32 res = (s32)spursCreateLv2EventQueue(spurs, queue, vm::ptr<u8>::make(spurs.addr() + 0xc9), 0x2a, *(u64*)"_spuPrv"))
 	{
 		assert(!"spursCreateLv2EventQueue() failed");
 	}
@@ -581,15 +275,15 @@ s64 spursInit(
 						bool do_break = false;
 						for (u32 i = 0; i < 16; i++)
 						{
-							if (spurs->m.wklStat1[i].read_relaxed() == 2 &&
-								spurs->m.wklG1[i].priority.data() != 0 &&
-								spurs->m.wklMaxCnt[i].read_relaxed() & 0xf
+							if (spurs->m.wklState1[i].read_relaxed() == 2 &&
+								*((u64 *)spurs->m.wklInfo1[i].priority) != 0 &&
+								spurs->m.wklMaxContention[i].read_relaxed() & 0xf
 								)
 							{
-								if (spurs->m.wklReadyCount[i].read_relaxed() ||
-									spurs->m.wklSet1.read_relaxed() & (0x8000u >> i) ||
+								if (spurs->m.wklReadyCount1[i].read_relaxed() ||
+									spurs->m.wklSignal1.read_relaxed() & (0x8000u >> i) ||
 									(spurs->m.wklFlag.flag.read_relaxed() == 0 &&
-									spurs->m.flagRecv.read_relaxed() == (u8)i
+									spurs->m.wklFlagReceiver.read_relaxed() == (u8)i
 									))
 								{
 									do_break = true;
@@ -597,17 +291,17 @@ s64 spursInit(
 								}
 							}
 						}
-						if (spurs->m.flags1 & SF1_IS_SECOND) for (u32 i = 0; i < 16; i++)
+						if (spurs->m.flags1 & SF1_32_WORKLOADS) for (u32 i = 0; i < 16; i++)
 						{
-							if (spurs->m.wklStat2[i].read_relaxed() == 2 &&
-								spurs->m.wklG2[i].priority.data() != 0 &&
-								spurs->m.wklMaxCnt[i].read_relaxed() & 0xf0
+							if (spurs->m.wklState2[i].read_relaxed() == 2 &&
+								*((u64 *)spurs->m.wklInfo2[i].priority) != 0 &&
+								spurs->m.wklMaxContention[i].read_relaxed() & 0xf0
 								)
 							{
-								if (spurs->m.wklReadyCount[i + 0x10].read_relaxed() ||
-									spurs->m.wklSet2.read_relaxed() & (0x8000u >> i) ||
+								if (spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() ||
+									spurs->m.wklSignal2.read_relaxed() & (0x8000u >> i) ||
 									(spurs->m.wklFlag.flag.read_relaxed() == 0 &&
-									spurs->m.flagRecv.read_relaxed() == (u8)i + 0x10
+									spurs->m.wklFlagReceiver.read_relaxed() == (u8)i + 0x10
 									))
 								{
 									do_break = true;
@@ -687,7 +381,7 @@ s64 spursInit(
 		}
 	}
 	
-	spurs->m.unk22 = 0;
+	spurs->m.traceBuffer.set(0);
 	// can also use cellLibprof if available (omitted)
 
 	// some unknown subroutine
@@ -1349,7 +1043,7 @@ s32 spursAddWorkload(
 	}
 	
 	u32 wnum;
-	const u32 wmax = spurs->m.flags1 & SF1_IS_SECOND ? 0x20u : 0x10u; // TODO: check if can be changed
+	const u32 wmax = spurs->m.flags1 & SF1_32_WORKLOADS ? 0x20u : 0x10u; // TODO: check if can be changed
 	spurs->m.wklMskA.atomic_op([spurs, wmax, &wnum](be_t<u32>& value)
 	{
 		wnum = cntlz32(~(u32)value); // found empty position
@@ -1368,15 +1062,18 @@ s32 spursAddWorkload(
 	u32 index = wnum & 0xf;
 	if (wnum <= 15)
 	{
-		assert((spurs->m.wklA[wnum] & 0xf) == 0);
-		assert((spurs->m.wklB[wnum] & 0xf) == 0);
-		spurs->m.wklStat1[wnum].write_relaxed(1);
-		spurs->m.wklD1[wnum] = 0;
-		spurs->m.wklE1[wnum] = 0;
-		spurs->m.wklG1[wnum].pm = pm;
-		spurs->m.wklG1[wnum].data = data;
-		spurs->m.wklG1[wnum].size = size;
-		spurs->m.wklG1[wnum].priority = *(be_t<u64>*)priorityTable;
+		assert((spurs->m.wklCurrentContention[wnum] & 0xf) == 0);
+		assert((spurs->m.wklPendingContention[wnum] & 0xf) == 0);
+		spurs->m.wklState1[wnum].write_relaxed(1);
+		spurs->m.wklStatus1[wnum] = 0;
+		spurs->m.wklEvent1[wnum] = 0;
+		spurs->m.wklInfo1[wnum].addr = pm;
+		spurs->m.wklInfo1[wnum].arg = data;
+		spurs->m.wklInfo1[wnum].size = size;
+		for (u32 i = 0; i < 8; i++)
+		{
+			spurs->m.wklInfo1[wnum].priority[i] = priorityTable[i];
+		}
 		spurs->m.wklH1[wnum].nameClass = nameClass;
 		spurs->m.wklH1[wnum].nameInstance = nameInstance;
 		memset(spurs->m.wklF1[wnum].unk0, 0, 0x20); // clear struct preserving semaphore id
@@ -1385,25 +1082,29 @@ s32 spursAddWorkload(
 		{
 			spurs->m.wklF1[wnum].hook = hook;
 			spurs->m.wklF1[wnum].hookArg = hookArg;
-			spurs->m.wklE1[wnum] |= 2;
+			spurs->m.wklEvent1[wnum] |= 2;
 		}
-		if ((spurs->m.flags1 & SF1_IS_SECOND) == 0)
+		if ((spurs->m.flags1 & SF1_32_WORKLOADS) == 0)
 		{
-			spurs->m.wklReadyCount[wnum + 16].write_relaxed(0);
-			spurs->m.wklMinCnt[wnum] = minContention > 8 ? 8 : minContention;
+			spurs->m.wklIdleSpuCountOrReadyCount2[wnum].write_relaxed(0);
+			spurs->m.wklMinContention[wnum] = minContention > 8 ? 8 : minContention;
 		}
+		spurs->m.wklReadyCount1[wnum].write_relaxed(0);
 	}
 	else
 	{
-		assert((spurs->m.wklA[index] & 0xf0) == 0);
-		assert((spurs->m.wklB[index] & 0xf0) == 0);
-		spurs->m.wklStat2[index].write_relaxed(1);
-		spurs->m.wklD2[index] = 0;
-		spurs->m.wklE2[index] = 0;
-		spurs->m.wklG2[index].pm = pm;
-		spurs->m.wklG2[index].data = data;
-		spurs->m.wklG2[index].size = size;
-		spurs->m.wklG2[index].priority = *(be_t<u64>*)priorityTable;
+		assert((spurs->m.wklCurrentContention[index] & 0xf0) == 0);
+		assert((spurs->m.wklPendingContention[index] & 0xf0) == 0);
+		spurs->m.wklState2[index].write_relaxed(1);
+		spurs->m.wklStatus2[index] = 0;
+		spurs->m.wklEvent2[index] = 0;
+		spurs->m.wklInfo2[index].addr = pm;
+		spurs->m.wklInfo2[index].arg = data;
+		spurs->m.wklInfo2[index].size = size;
+		for (u32 i = 0; i < 8; i++)
+		{
+			spurs->m.wklInfo2[index].priority[i] = priorityTable[i];
+		}
 		spurs->m.wklH2[index].nameClass = nameClass;
 		spurs->m.wklH2[index].nameInstance = nameInstance;
 		memset(spurs->m.wklF2[index].unk0, 0, 0x20); // clear struct preserving semaphore id
@@ -1412,34 +1113,34 @@ s32 spursAddWorkload(
 		{
 			spurs->m.wklF2[index].hook = hook;
 			spurs->m.wklF2[index].hookArg = hookArg;
-			spurs->m.wklE2[index] |= 2;
+			spurs->m.wklEvent2[index] |= 2;
 		}
+		spurs->m.wklIdleSpuCountOrReadyCount2[wnum].write_relaxed(0);
 	}
-	spurs->m.wklReadyCount[wnum].write_relaxed(0);
 
 	if (wnum <= 15)
 	{
-		spurs->m.wklMaxCnt[wnum].atomic_op([maxContention](u8& v)
+		spurs->m.wklMaxContention[wnum].atomic_op([maxContention](u8& v)
 		{
 			v &= ~0xf;
 			v |= (maxContention > 8 ? 8 : maxContention);
 		});
-		spurs->m.wklSet1._and_not({ be_t<u16>::make(0x8000 >> index) }); // clear bit in wklFlag1
+		spurs->m.wklSignal1._and_not({ be_t<u16>::make(0x8000 >> index) }); // clear bit in wklFlag1
 	}
 	else
 	{
-		spurs->m.wklMaxCnt[index].atomic_op([maxContention](u8& v)
+		spurs->m.wklMaxContention[index].atomic_op([maxContention](u8& v)
 		{
 			v &= ~0xf0;
 			v |= (maxContention > 8 ? 8 : maxContention) << 4;
 		});
-		spurs->m.wklSet2._and_not({ be_t<u16>::make(0x8000 >> index) }); // clear bit in wklFlag2
+		spurs->m.wklSignal2._and_not({ be_t<u16>::make(0x8000 >> index) }); // clear bit in wklFlag2
 	}
 
-	spurs->m.flagRecv.compare_and_swap(wnum, 0xff);
+	spurs->m.wklFlagReceiver.compare_and_swap(wnum, 0xff);
 
 	u32 res_wkl;
-	CellSpurs::_sub_str3& wkl = wnum <= 15 ? spurs->m.wklG1[wnum] : spurs->m.wklG2[wnum & 0xf];
+	CellSpurs::WorkloadInfo& wkl = wnum <= 15 ? spurs->m.wklInfo1[wnum] : spurs->m.wklInfo2[wnum & 0xf];
 	spurs->m.wklMskB.atomic_op_sync([spurs, &wkl, wnum, &res_wkl](be_t<u32>& v)
 	{
 		const u32 mask = v & ~(0x80000000u >> wnum);
@@ -1449,29 +1150,29 @@ s32 spursAddWorkload(
 		{
 			if (mask & m)
 			{
-				CellSpurs::_sub_str3& current = i <= 15 ? spurs->m.wklG1[i] : spurs->m.wklG2[i & 0xf];
-				if (current.pm.addr() == wkl.pm.addr())
+				CellSpurs::WorkloadInfo& current = i <= 15 ? spurs->m.wklInfo1[i] : spurs->m.wklInfo2[i & 0xf];
+				if (current.addr.addr() == wkl.addr.addr())
 				{
 					// if a workload with identical policy module found
-					res_wkl = current.copy.read_relaxed();
+					res_wkl = current.uniqueId.read_relaxed();
 					break;
 				}
 				else
 				{
-					k |= 0x80000000 >> current.copy.read_relaxed();
+					k |= 0x80000000 >> current.uniqueId.read_relaxed();
 					res_wkl = cntlz32(~k);
 				}
 			}
 		}
 
-		wkl.copy.exchange((u8)res_wkl);
+		wkl.uniqueId.exchange((u8)res_wkl);
 		v = mask | (0x80000000u >> wnum);
 	});
 	assert(res_wkl <= 31);
 
-	spurs->wklStat(wnum).exchange(2);
-	spurs->m.xBD.exchange(0xff);
-	spurs->m.x72.exchange(0xff);
+	spurs->wklState(wnum).exchange(2);
+	spurs->m.sysSrvMsgUpdateWorkload.exchange(0xff);
+	spurs->m.sysSrvMessage.exchange(0xff);
 	return CELL_OK;
 }
 
@@ -1598,7 +1299,7 @@ s64 cellSpursWorkloadAttributeSetShutdownCompletionEventHook(vm::ptr<CellSpursWo
 	return CELL_OK;
 }
 
-s64 cellSpursAddWorkloadWithAttribute(vm::ptr<CellSpurs> spurs, vm::ptr<u32> wid, vm::ptr<const CellSpursWorkloadAttribute> attr)
+s64 cellSpursAddWorkloadWithAttribute(vm::ptr<CellSpurs> spurs, const vm::ptr<u32> wid, vm::ptr<const CellSpursWorkloadAttribute> attr)
 {
 	cellSpurs->Warning("%s(spurs_addr=0x%x, wid_addr=0x%x, attr_addr=0x%x)", __FUNCTION__, spurs.addr(), wid.addr(), attr.addr());
 #ifdef PRX_DEBUG_XXX
@@ -1681,7 +1382,7 @@ s64 _cellSpursWorkloadFlagReceiver(vm::ptr<CellSpurs> spurs, u32 wid, u32 is_set
 	{
 		return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN;
 	}
-	if (wid >= (spurs->m.flags1 & SF1_IS_SECOND ? 0x20u : 0x10u))
+	if (wid >= (spurs->m.flags1 & SF1_32_WORKLOADS ? 0x20u : 0x10u))
 	{
 		return CELL_SPURS_POLICY_MODULE_ERROR_INVAL;
 	}
@@ -1697,14 +1398,14 @@ s64 _cellSpursWorkloadFlagReceiver(vm::ptr<CellSpurs> spurs, u32 wid, u32 is_set
 	{
 		if (is_set)
 		{
-			if (spurs->m.flagRecv.read_relaxed() != 0xff)
+			if (spurs->m.wklFlagReceiver.read_relaxed() != 0xff)
 			{
 				return CELL_SPURS_POLICY_MODULE_ERROR_BUSY;
 			}
 		}
 		else
 		{
-			if (spurs->m.flagRecv.read_relaxed() != wid)
+			if (spurs->m.wklFlagReceiver.read_relaxed() != wid)
 			{
 				return CELL_SPURS_POLICY_MODULE_ERROR_PERM;
 			}
@@ -1716,7 +1417,7 @@ s64 _cellSpursWorkloadFlagReceiver(vm::ptr<CellSpurs> spurs, u32 wid, u32 is_set
 		return res;
 	}
 
-	spurs->m.flagRecv.atomic_op([wid, is_set](u8& FR)
+	spurs->m.wklFlagReceiver.atomic_op([wid, is_set](u8& FR)
 	{
 		if (is_set)
 		{
@@ -1756,24 +1457,107 @@ s64 cellSpursGetWorkloadFlag(vm::ptr<CellSpurs> spurs, vm::ptr<vm::bptr<CellSpur
 	return CELL_OK;
 }
 
-s64 cellSpursSendWorkloadSignal()
+s64 cellSpursSendWorkloadSignal(vm::ptr<CellSpurs> spurs, u32 workloadId)
 {
+	cellSpurs->Warning("%s(spurs=0x%x, workloadId=0x%x)", __FUNCTION__, spurs.addr(), workloadId);
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0xA658, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (spurs.addr() == 0)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN;
+	}
+
+	if (workloadId >= CELL_SPURS_MAX_WORKLOAD2 || (workloadId >= CELL_SPURS_MAX_WORKLOAD && (spurs->m.flags1 & SF1_32_WORKLOADS) == 0))
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_INVAL;
+	}
+
+	if ((spurs->m.wklMskA.read_relaxed() & (0x80000000u >> workloadId)) == 0)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_SRCH;
+	}
+
+	if (spurs->m.exception)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_STAT;
+	}
+
+	u8 state;
+	if (workloadId >= CELL_SPURS_MAX_WORKLOAD)
+	{
+		state = spurs->m.wklState2[workloadId & 0x0F].read_relaxed();
+	}
+	else
+	{
+		state = spurs->m.wklState1[workloadId].read_relaxed();
+	}
+
+	if (state != SPURS_WKL_STATE_RUNNABLE)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_STAT;
+	}
+
+	if (workloadId >= CELL_SPURS_MAX_WORKLOAD)
+	{
+		spurs->m.wklSignal2 |= be_t<u16>::make(0x8000 >> (workloadId & 0x0F));
+	}
+	else
+	{
+		spurs->m.wklSignal1 |= be_t<u16>::make(0x8000 >> workloadId);
+	}
+
 	return CELL_OK;
 #endif
 }
 
-s64 cellSpursGetWorkloadData()
+s64 cellSpursGetWorkloadData(vm::ptr<CellSpurs> spurs, vm::ptr<u64> data, u32 workloadId)
 {
+	cellSpurs->Warning("%s(spurs_addr=0x%x, data=0x%x, workloadId=%d)", __FUNCTION__, spurs.addr(), data.addr(), workloadId);
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0xA78C, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (spurs.addr() == 0 || data.addr() == 0)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN;
+	}
+
+	if (workloadId >= CELL_SPURS_MAX_WORKLOAD2 || (workloadId >= CELL_SPURS_MAX_WORKLOAD && (spurs->m.flags1 & SF1_32_WORKLOADS) == 0))
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_INVAL;
+	}
+
+	if ((spurs->m.wklMskA.read_relaxed() & (0x80000000u >> workloadId)) == 0)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_SRCH;
+	}
+
+	if (spurs->m.exception)
+	{
+		return CELL_SPURS_POLICY_MODULE_ERROR_STAT;
+	}
+
+	if (workloadId >= CELL_SPURS_MAX_WORKLOAD)
+	{
+		*data = spurs->m.wklInfo2[workloadId & 0x0F].arg;
+	}
+	else
+	{
+		*data = spurs->m.wklInfo1[workloadId].arg;
+	}
+
 	return CELL_OK;
 #endif
 }
@@ -1793,7 +1577,7 @@ s64 cellSpursReadyCountStore(vm::ptr<CellSpurs> spurs, u32 wid, u32 value)
 	{
 		return CELL_SPURS_POLICY_MODULE_ERROR_ALIGN;
 	}
-	if (wid >= (spurs->m.flags1 & SF1_IS_SECOND ? 0x20u : 0x10u) || value > 0xff)
+	if (wid >= (spurs->m.flags1 & SF1_32_WORKLOADS ? 0x20u : 0x10u) || value > 0xff)
 	{
 		return CELL_SPURS_POLICY_MODULE_ERROR_INVAL;
 	}
@@ -1801,12 +1585,19 @@ s64 cellSpursReadyCountStore(vm::ptr<CellSpurs> spurs, u32 wid, u32 value)
 	{
 		return CELL_SPURS_POLICY_MODULE_ERROR_SRCH;
 	}
-	if (spurs->m.exception.data() || spurs->wklStat(wid).read_relaxed() != 2)
+	if (spurs->m.exception.data() || spurs->wklState(wid).read_relaxed() != 2)
 	{
 		return CELL_SPURS_POLICY_MODULE_ERROR_STAT;
 	}
 
-	spurs->m.wklReadyCount[wid].exchange((u8)value);
+	if (wid < CELL_SPURS_MAX_WORKLOAD)
+	{
+		spurs->m.wklReadyCount1[wid].exchange((u8)value);
+	}
+	else
+	{
+		spurs->m.wklIdleSpuCountOrReadyCount2[wid].exchange((u8)value);
+	}
 	return CELL_OK;
 }
 
@@ -1900,111 +1691,581 @@ s64 cellSpursUnsetExceptionEventHandler()
 
 s64 _cellSpursEventFlagInitialize(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, vm::ptr<CellSpursEventFlag> eventFlag, u32 flagClearMode, u32 flagDirection)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("_cellSpursEventFlagInitialize(spurs_addr=0x%x, taskset_addr=0x%x, eventFlag_addr=0x%x, flagClearMode=%d, flagDirection=%d)",
 		spurs.addr(), taskset.addr(), eventFlag.addr(), flagClearMode, flagDirection);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x1564C, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (taskset.addr() == 0 && spurs.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align || taskset.addr() % CellSpursTaskset::align || eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (taskset.addr() && taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD2)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	if (flagDirection > CELL_SPURS_EVENT_FLAG_LAST || flagClearMode > CELL_SPURS_EVENT_FLAG_CLEAR_LAST)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	memset(eventFlag.get_ptr(), 0, CellSpursEventFlag::size);
+	eventFlag->m.direction = flagDirection;
+	eventFlag->m.clearMode = flagClearMode;
+	eventFlag->m.spuPort   = CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT;
+
+	if (taskset.addr())
+	{
+		eventFlag->m.addr = taskset.addr();
+	}
+	else
+	{
+		eventFlag->m.isIwl = 1;
+		eventFlag->m.addr  = spurs.addr();
+	}
+
 	return CELL_OK;
 #endif
 }
 
 s64 cellSpursEventFlagAttachLv2EventQueue(vm::ptr<CellSpursEventFlag> eventFlag)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursEventFlagAttachLv2EventQueue(eventFlag_addr=0x%x)", eventFlag.addr());
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x157B8, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!eventFlag)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_AGAIN;
+	}
+
+	if (eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_SPU2PPU && eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_ANY2ANY)
+	{
+		return CELL_SPURS_TASK_ERROR_PERM;
+	}
+
+	if (eventFlag->m.spuPort != CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT)
+	{
+		return CELL_SPURS_TASK_ERROR_STAT;
+	}
+
+	vm::ptr<CellSpurs> spurs;
+	if (eventFlag->m.isIwl == 1)
+	{
+		spurs.set((u32)eventFlag->m.addr);
+	}
+	else
+	{
+		auto taskset = vm::ptr<CellSpursTaskset>::make((u32)eventFlag->m.addr);
+		spurs.set((u32)taskset->m.spurs.addr());
+	}
+
+	u32 eventQueueId;
+	vm::var<u8> port;
+	auto rc = spursCreateLv2EventQueue(spurs, eventQueueId, port, 1, *((u64 *)"_spuEvF"));
+	if (rc != CELL_OK)
+	{
+		// Return rc if its an error code from SPURS otherwise convert the error code to a SPURS task error code
+		return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF));
+	}
+
+	if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
+	{
+		vm::var<be_t<u32>> eventPortId;
+		rc = sys_event_port_create(vm::ptr<u32>::make(eventPortId.addr()), SYS_EVENT_PORT_LOCAL, 0);
+		if (rc == CELL_OK)
+		{
+			rc = sys_event_port_connect_local(eventPortId.value(), eventQueueId);
+			if (rc == CELL_OK)
+			{
+				eventFlag->m.eventPortId = eventPortId;
+				goto success;
+			}
+
+			sys_event_port_destroy(eventPortId.value());
+		}
+
+		// TODO: Implement the following
+		// if (spursDetachLv2EventQueue(spurs, port, 1) == CELL_OK)
+		// {
+		//     sys_event_queue_destroy(eventQueueId, SYS_EVENT_QUEUE_DESTROY_FORCE);
+		// }
+
+		// Return rc if its an error code from SPURS otherwise convert the error code to a SPURS task error code
+		return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF));
+	}
+
+success:
+	eventFlag->m.eventQueueId = eventQueueId;
+	eventFlag->m.spuPort      = port;
 	return CELL_OK;
 #endif
 }
 
 s64 cellSpursEventFlagDetachLv2EventQueue(vm::ptr<CellSpursEventFlag> eventFlag)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursEventFlagDetachLv2EventQueue(eventFlag_addr=0x%x)", eventFlag.addr());
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x15998, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!eventFlag)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_AGAIN;
+	}
+
+	if (eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_SPU2PPU && eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_ANY2ANY)
+	{
+		return CELL_SPURS_TASK_ERROR_PERM;
+	}
+
+	if (eventFlag->m.spuPort == CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT)
+	{
+		return CELL_SPURS_TASK_ERROR_STAT;
+	}
+
+	if (eventFlag->m.ppuWaitMask || eventFlag->m.ppuPendingRecv)
+	{
+		return CELL_SPURS_TASK_ERROR_BUSY;
+	}
+
+	auto port            = eventFlag->m.spuPort;
+	eventFlag->m.spuPort = CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT;
+
+	vm::ptr<CellSpurs> spurs;
+	if (eventFlag->m.isIwl == 1)
+	{
+		spurs.set((u32)eventFlag->m.addr);
+	}
+	else
+	{
+		auto taskset = vm::ptr<CellSpursTaskset>::make((u32)eventFlag->m.addr);
+		spurs.set((u32)taskset->m.spurs.addr());
+	}
+
+	if(eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
+	{
+		sys_event_port_disconnect(eventFlag->m.eventPortId);
+		sys_event_port_destroy(eventFlag->m.eventPortId);
+	}
+
+	s64 rc = CELL_OK;
+	// TODO: Implement the following
+	// auto rc = spursDetachLv2EventQueue(spurs, port, 1);
+	// if (rc == CELL_OK)
+	// {
+	//     rc = sys_event_queue_destroy(eventFlag->m.eventQueueId, SYS_EVENT_QUEUE_DESTROY_FORCE);
+	// }
+
+	if (rc != CELL_OK)
+	{
+		// Return rc if its an error code from SPURS otherwise convert the error code to a SPURS task error code
+		return (rc & 0x0FFF0000) == 0x00410000 ? rc : (0x80410900 | (rc & 0xFF));
+	}
+
 	return CELL_OK;
 #endif
 }
 
+s64 _cellSpursEventFlagWait(vm::ptr<CellSpursEventFlag> eventFlag, vm::ptr<u16> mask, u32 mode, u32 block)
+{
+	if (eventFlag.addr() == 0 || mask.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (mode > CELL_SPURS_EVENT_FLAG_WAIT_MODE_LAST)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	if (eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_SPU2PPU && eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_ANY2ANY)
+	{
+		return CELL_SPURS_TASK_ERROR_PERM;
+	}
+
+	if (block && eventFlag->m.spuPort == CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT)
+	{
+		return CELL_SPURS_TASK_ERROR_STAT;
+	}
+
+	if (eventFlag->m.ppuWaitMask || eventFlag->m.ppuPendingRecv)
+	{
+		return CELL_SPURS_TASK_ERROR_BUSY;
+	}
+
+	u16 relevantEvents = eventFlag->m.events & *mask;
+	if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
+	{
+		// Make sure the wait mask and mode specified does not conflict with that of the already waiting tasks.
+		// Conflict scenarios:
+		// OR  vs OR  - A conflict never occurs
+		// OR  vs AND - A conflict occurs if the masks for the two tasks overlap
+		// AND vs AND - A conflict occurs if the masks for the two tasks are not the same
+
+		// Determine the set of all already waiting tasks whose wait mode/mask can possibly conflict with the specified wait mode/mask.
+		// This set is equal to 'set of all tasks waiting' - 'set of all tasks whose wait conditions have been met'.
+		// If the wait mode is OR, we prune the set of all tasks that are waiting in OR mode from the set since a conflict cannot occur
+		// with an already waiting task in OR mode.
+		u16 relevantWaitSlots = eventFlag->m.spuTaskUsedWaitSlots & ~eventFlag->m.spuTaskPendingRecv;
+		if (mode == CELL_SPURS_EVENT_FLAG_OR)
+		{
+			relevantWaitSlots &= eventFlag->m.spuTaskWaitMode;
+		}
+
+		int i = CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS - 1;
+		while (relevantWaitSlots)
+		{
+			if (relevantWaitSlots & 0x0001)
+			{
+				if (eventFlag->m.spuTaskWaitMask[i] & *mask && eventFlag->m.spuTaskWaitMask[i] != *mask)
+				{
+					return CELL_SPURS_TASK_ERROR_AGAIN;
+				}
+			}
+
+			relevantWaitSlots >>= 1;
+			i--;
+		}
+	}
+
+	// There is no need to block if all bits required by the wait operation have already been set or
+	// if the wait mode is OR and atleast one of the bits required by the wait operation has been set.
+	bool recv;
+	if ((*mask & ~relevantEvents) == 0 || (mode == CELL_SPURS_EVENT_FLAG_OR && relevantEvents))
+	{
+		// If the clear flag is AUTO then clear the bits comnsumed by this thread
+		if (eventFlag->m.clearMode == CELL_SPURS_EVENT_FLAG_CLEAR_AUTO)
+		{
+			eventFlag->m.events &= ~relevantEvents;
+		}
+
+		recv = false;
+	}
+	else
+	{
+		// If we reach here it means that the conditions for this thread have not been met.
+		// If this is a try wait operation then do not block but return an error code.
+		if (block == 0)
+		{
+			return CELL_SPURS_TASK_ERROR_BUSY;
+		}
+
+		eventFlag->m.ppuWaitSlotAndMode = 0;
+		if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
+		{
+			// Find an unsed wait slot
+			int i                    = 0;
+			u16 spuTaskUsedWaitSlots = eventFlag->m.spuTaskUsedWaitSlots;
+			while (spuTaskUsedWaitSlots & 0x0001)
+			{
+				spuTaskUsedWaitSlots >>= 1;
+				i++;
+			}
+
+			if (i == CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS)
+			{
+				// Event flag has no empty wait slots
+				return CELL_SPURS_TASK_ERROR_BUSY;
+			}
+
+			// Mark the found wait slot as used by this thread
+			eventFlag->m.ppuWaitSlotAndMode = (CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS - 1 - i) << 4;
+		}
+
+		// Save the wait mask and mode for this thread
+		eventFlag->m.ppuWaitSlotAndMode |= mode;
+		eventFlag->m.ppuWaitMask         = *mask;
+		recv                             = true;
+	}
+
+	u16 receivedEventFlag;
+	if (recv) {
+		// Block till something happens
+		vm::var<sys_event_data> data;
+		auto rc = sys_event_queue_receive(eventFlag->m.eventQueueId, data, 0);
+		if (rc != CELL_OK)
+		{
+			assert(0);
+		}
+
+		int i = 0;
+		if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY)
+		{
+			i = eventFlag->m.ppuWaitSlotAndMode >> 4;
+		}
+
+		receivedEventFlag           = eventFlag->m.pendingRecvTaskEvents[i];
+		eventFlag->m.ppuPendingRecv = 0;
+	}
+
+	*mask = receivedEventFlag;
+	return CELL_OK;
+}
+
 s64 cellSpursEventFlagWait(vm::ptr<CellSpursEventFlag> eventFlag, vm::ptr<u16> mask, u32 mode)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursEventFlagWait(eventFlag_addr=0x%x, mask_addr=0x%x, mode=%d)", eventFlag.addr(), mask.addr(), mode);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x15E68, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
-	return CELL_OK;
+	return _cellSpursEventFlagWait(eventFlag, mask, mode, 1/*block*/);
 #endif
 }
 
 s64 cellSpursEventFlagClear(vm::ptr<CellSpursEventFlag> eventFlag, u16 bits)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursEventFlagClear(eventFlag_addr=0x%x, bits=0x%x)", eventFlag.addr(), bits);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x15E9C, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (eventFlag.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	eventFlag->m.events &= ~bits;
 	return CELL_OK;
 #endif
 }
 
 s64 cellSpursEventFlagSet(vm::ptr<CellSpursEventFlag> eventFlag, u16 bits)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursEventFlagSet(eventFlag_addr=0x%x, bits=0x%x)", eventFlag.addr(), bits);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x15F04, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (eventFlag.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_PPU2SPU && eventFlag->m.direction != CELL_SPURS_EVENT_FLAG_ANY2ANY)
+	{
+		return CELL_SPURS_TASK_ERROR_PERM;
+	}
+
+	u16 ppuEventFlag  = 0;
+	bool send         = false;
+	int ppuWaitSlot   = 0;
+	u16 eventsToClear = 0;
+	if (eventFlag->m.direction == CELL_SPURS_EVENT_FLAG_ANY2ANY && eventFlag->m.ppuWaitMask)
+	{
+		u16 ppuRelevantEvents = (eventFlag->m.events | bits) & eventFlag->m.ppuWaitMask;
+
+		// Unblock the waiting PPU thread if either all the bits being waited by the thread have been set or
+		// if the wait mode of the thread is OR and atleast one bit the thread is waiting on has been set
+		if ((eventFlag->m.ppuWaitMask & ~ppuRelevantEvents) == 0 ||
+			((eventFlag->m.ppuWaitSlotAndMode & 0x0F) == CELL_SPURS_EVENT_FLAG_OR && ppuRelevantEvents != 0))
+		{
+			eventFlag->m.ppuPendingRecv = 1;
+			eventFlag->m.ppuWaitMask    = 0;
+			ppuEventFlag                = ppuRelevantEvents;
+			eventsToClear               = ppuRelevantEvents;
+			ppuWaitSlot                 = eventFlag->m.ppuWaitSlotAndMode >> 4;
+			send                        = true;
+		}
+	}
+
+	int i                  = CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS - 1;
+	int j                  = 0;
+	u16 relevantWaitSlots  = eventFlag->m.spuTaskUsedWaitSlots & ~eventFlag->m.spuTaskPendingRecv;
+	u16 spuTaskPendingRecv = 0;
+	u16 pendingRecvTaskEvents[16];
+	while (relevantWaitSlots)
+	{
+		if (relevantWaitSlots & 0x0001)
+		{
+			u16 spuTaskRelevantEvents = (eventFlag->m.events | bits) & eventFlag->m.spuTaskWaitMask[i];
+
+			// Unblock the waiting SPU task if either all the bits being waited by the task have been set or
+			// if the wait mode of the task is OR and atleast one bit the thread is waiting on has been set
+			if ((eventFlag->m.spuTaskWaitMask[i] & ~spuTaskRelevantEvents) == 0 || 
+				(((eventFlag->m.spuTaskWaitMode >> j) & 0x0001) == CELL_SPURS_EVENT_FLAG_OR && spuTaskRelevantEvents != 0))
+			{
+				eventsToClear            |= spuTaskRelevantEvents;
+				spuTaskPendingRecv       |= 1 << j;
+				pendingRecvTaskEvents[j]  = spuTaskRelevantEvents;
+			}
+		}
+
+		relevantWaitSlots >>= 1;
+		i--;
+		j++;
+	}
+
+	eventFlag->m.events             |= bits;
+	eventFlag->m.spuTaskPendingRecv |= spuTaskPendingRecv;
+
+	// If the clear flag is AUTO then clear the bits comnsumed by all tasks marked to be unblocked
+	if (eventFlag->m.clearMode == CELL_SPURS_EVENT_FLAG_CLEAR_AUTO)
+	{
+		 eventFlag->m.events &= ~eventsToClear;
+	}
+
+	if (send)
+	{
+		// Signal the PPU thread to be woken up
+		eventFlag->m.pendingRecvTaskEvents[ppuWaitSlot] = ppuEventFlag;
+		if (sys_event_port_send(eventFlag->m.eventPortId, 0, 0, 0) != CELL_OK)
+		{
+			assert(0);
+		}
+	}
+
+	if (spuTaskPendingRecv)
+	{
+		// Signal each SPU task whose conditions have been met to be woken up
+		for (int i = 0; i < CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS; i++)
+		{
+			if (spuTaskPendingRecv & (0x8000 >> i))
+			{
+				eventFlag->m.pendingRecvTaskEvents[i] = pendingRecvTaskEvents[i];
+				vm::var<u32> taskset;
+				if (eventFlag->m.isIwl)
+				{
+					cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs>::make((u32)eventFlag->m.addr),
+												  vm::ptr<CellSpursTaskset>::make(taskset.addr()),
+												  eventFlag->m.waitingTaskWklId[i]);
+				}
+				else
+				{
+					taskset.value() = (u32)eventFlag->m.addr;
+				}
+
+				auto rc = _cellSpursSendSignal(vm::ptr<CellSpursTaskset>::make(taskset.addr()), eventFlag->m.waitingTaskId[i]);
+				if (rc == CELL_SPURS_TASK_ERROR_INVAL || rc == CELL_SPURS_TASK_ERROR_STAT)
+				{
+					return CELL_SPURS_TASK_ERROR_FATAL;
+				}
+
+				if (rc != CELL_OK)
+				{
+					assert(0);
+				}
+			}
+		}
+	}
+
 	return CELL_OK;
 #endif
 }
 
 s64 cellSpursEventFlagTryWait(vm::ptr<CellSpursEventFlag> eventFlag, vm::ptr<u16> mask, u32 mode)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursEventFlagTryWait(eventFlag_addr=0x%x, mask_addr=0x%x, mode=0x%x)", eventFlag.addr(), mask.addr(), mode);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x15E70, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
-	return CELL_OK;
+	return _cellSpursEventFlagWait(eventFlag, mask, mode, 0/*block*/);
 #endif
 }
 
 s64 cellSpursEventFlagGetDirection(vm::ptr<CellSpursEventFlag> eventFlag, vm::ptr<u32> direction)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursEventFlagGetDirection(eventFlag_addr=0x%x, direction_addr=0x%x)", eventFlag.addr(), direction.addr());
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x162C4, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (eventFlag.addr() == 0 || direction.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	*direction = eventFlag->m.direction;
 	return CELL_OK;
 #endif
 }
 
 s64 cellSpursEventFlagGetClearMode(vm::ptr<CellSpursEventFlag> eventFlag, vm::ptr<u32> clear_mode)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursEventFlagGetClearMode(eventFlag_addr=0x%x, clear_mode_addr=0x%x)", eventFlag.addr(), clear_mode.addr());
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x16310, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (eventFlag.addr() == 0 || clear_mode.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	*clear_mode = eventFlag->m.clearMode;
 	return CELL_OK;
 #endif
 }
 
 s64 cellSpursEventFlagGetTasksetAddress(vm::ptr<CellSpursEventFlag> eventFlag, vm::ptr<CellSpursTaskset> taskset)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursEventFlagGetTasksetAddress(eventFlag_addr=0x%x, taskset_addr=0x%x)", eventFlag.addr(), taskset.addr());
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x1635C, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (eventFlag.addr() == 0 || taskset.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (eventFlag.addr() % CellSpursEventFlag::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	taskset.set(eventFlag->m.isIwl ? 0 : eventFlag->m.addr);
 	return CELL_OK;
 #endif
 }
@@ -2306,36 +2567,107 @@ s64 cellSpursJobChainGetSpursAddress()
 #endif
 }
 
-s64 cellSpursCreateTasksetWithAttribute()
+s64 spursCreateTaskset(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u64 args, vm::ptr<const u8[8]> priority,
+	u32 max_contention, vm::ptr<const char> name, u32 size, s32 enable_clear_ls)
 {
-#ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
-	return GetCurrentPPUThread().FastCall2(libsre + 0x14BEC, libsre_rtoc);
-#else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!spurs || !taskset)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align || taskset.addr() % CellSpursTaskset::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	memset(taskset.get_ptr(), 0, size);
+
+	taskset->m.spurs = spurs;
+	taskset->m.args = args;
+	taskset->m.enable_clear_ls = enable_clear_ls > 0 ? 1 : 0;
+	taskset->m.size = size;
+
+	vm::var<CellSpursWorkloadAttribute> wkl_attr;
+	_cellSpursWorkloadAttributeInitialize(wkl_attr, 1 /*revision*/, 0x33 /*sdk_version*/, vm::ptr<const void>::make(SPURS_IMG_ADDR_TASKSET_PM), 0x1E40 /*pm_size*/,
+		taskset.addr(), priority, 8 /*min_contention*/, max_contention);
+	// TODO: Check return code
+
+	cellSpursWorkloadAttributeSetName(wkl_attr, vm::ptr<const char>::make(0), name);
+	// TODO: Check return code
+
+	// TODO: cellSpursWorkloadAttributeSetShutdownCompletionEventHook(wkl_attr, hook, taskset);
+	// TODO: Check return code
+
+	vm::var<be_t<u32>> wid;
+	cellSpursAddWorkloadWithAttribute(spurs, vm::ptr<u32>::make(wid.addr()), vm::ptr<const CellSpursWorkloadAttribute>::make(wkl_attr.addr()));
+	// TODO: Check return code
+
+	taskset->m.wkl_flag_wait_task = 0x80;
+	taskset->m.wid                = wid.value();
+	// TODO: cellSpursSetExceptionEventHandler(spurs, wid, hook, taskset);
+	// TODO: Check return code
+
 	return CELL_OK;
-#endif
 }
 
-s64 cellSpursCreateTaskset(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u64 args, vm::ptr<const u8> priority, u32 maxContention)
+s64 cellSpursCreateTasksetWithAttribute(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, vm::ptr<CellSpursTasksetAttribute> attr)
+{
+	cellSpurs->Warning("%s(spurs=0x%x, taskset=0x%x, attr=0x%x)", __FUNCTION__, spurs.addr(), taskset.addr(), attr.addr());
+
+#ifdef PRX_DEBUG
+	return GetCurrentPPUThread().FastCall2(libsre + 0x14BEC, libsre_rtoc);
+#endif
+
+	if (!attr)
+	{
+		CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (attr.addr() % CellSpursTasksetAttribute::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (attr->m.revision != CELL_SPURS_TASKSET_ATTRIBUTE_REVISION)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	auto rc = spursCreateTaskset(spurs, taskset, attr->m.args, vm::ptr<const u8[8]>::make(attr.addr() + offsetof(CellSpursTasksetAttribute, m.priority)),
+		attr->m.max_contention, vm::ptr<const char>::make(attr->m.name.addr()), attr->m.taskset_size, attr->m.enable_clear_ls);
+
+	if (attr->m.taskset_size >= CellSpursTaskset2::size)
+	{
+		// TODO: Implement this
+	}
+
+	return rc;
+}
+
+s64 cellSpursCreateTaskset(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u64 args, vm::ptr<const u8[8]> priority, u32 maxContention)
 {
 	cellSpurs->Warning("cellSpursCreateTaskset(spurs_addr=0x%x, taskset_addr=0x%x, args=0x%llx, priority_addr=0x%x, maxContention=%d)",
 		spurs.addr(), taskset.addr(), args, priority.addr(), maxContention);
 
 #ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14CB8, libsre_rtoc);
-#else
+#endif
+
+#if 0
 	SPURSManagerTasksetAttribute *tattr = new SPURSManagerTasksetAttribute(args, priority, maxContention);
 	taskset->taskset = new SPURSManagerTaskset(taskset.addr(), tattr);
 
 	return CELL_OK;
 #endif
+
+	return spursCreateTaskset(spurs, taskset, args, priority, maxContention, vm::ptr<const char>::make(0), CellSpursTaskset::size, 0);
 }
 
 s64 cellSpursJoinTaskset(vm::ptr<CellSpursTaskset> taskset)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursJoinTaskset(taskset_addr=0x%x)", taskset.addr());
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x152F8, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2343,21 +2675,38 @@ s64 cellSpursJoinTaskset(vm::ptr<CellSpursTaskset> taskset)
 #endif
 }
 
-s64 cellSpursGetTasksetId(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> workloadId)
+s64 cellSpursGetTasksetId(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> wid)
 {
+	cellSpurs->Warning("cellSpursGetTasksetId(taskset_addr=0x%x, wid=0x%x)", taskset.addr(), wid.addr());
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("cellSpursGetTasksetId(taskset_addr=0x%x, workloadId_addr=0x%x)", taskset.addr(), workloadId.addr());
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14EA0, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!taskset || !wid)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (taskset.addr() % CellSpursTaskset::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	*wid = taskset->m.wid;
 	return CELL_OK;
 #endif
 }
 
 s64 cellSpursShutdownTaskset(vm::ptr<CellSpursTaskset> taskset)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursShutdownTaskset(taskset_addr=0x%x)", taskset.addr());
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14868, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2365,34 +2714,236 @@ s64 cellSpursShutdownTaskset(vm::ptr<CellSpursTaskset> taskset)
 #endif
 }
 
-s64 cellSpursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> taskID, u32 elf_addr, u32 context_addr, u32 context_size, vm::ptr<CellSpursTaskLsPattern> lsPattern,
+u32 _cellSpursGetSdkVersion()
+{
+	static s32 sdk_version = -2;
+
+	if (sdk_version == -2)
+	{
+		vm::var<be_t<s32>> version;
+		sys_process_get_sdk_version(sys_process_getpid(), vm::ptr<s32>::make(version.addr()));
+		sdk_version = version.value();
+	}
+
+	return sdk_version;
+}
+
+s64 spursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> task_id, vm::ptr<u32> elf_addr, vm::ptr<u32> context_addr, u32 context_size, vm::ptr<CellSpursTaskLsPattern> ls_pattern, vm::ptr<CellSpursTaskArgument> arg)
+{
+	if (!taskset || !elf_addr)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (elf_addr.addr() % 16)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	auto sdk_version = _cellSpursGetSdkVersion();
+	if (sdk_version < 0x27FFFF)
+	{
+		if (context_addr.addr() % 16)
+		{
+			return CELL_SPURS_TASK_ERROR_ALIGN;
+		}
+	}
+	else
+	{
+		if (context_addr.addr() % 128)
+		{
+			return CELL_SPURS_TASK_ERROR_ALIGN;
+		}
+	}
+
+	u32 alloc_ls_blocks = 0;
+	if (context_addr.addr() != 0)
+	{
+		if (context_size < CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE)
+		{
+			return CELL_SPURS_TASK_ERROR_INVAL;
+		}
+
+		alloc_ls_blocks = context_size > 0x3D400 ? 0x7A : ((context_size - 0x400) >> 11);
+		if (ls_pattern.addr() != 0)
+		{
+			u32 ls_blocks = 0;
+			for (auto i = 0; i < 128; i++)
+			{
+				if (ls_pattern->_u128.value()._bit[i])
+				{
+					ls_blocks++;
+				}
+			}
+
+			if (ls_blocks > alloc_ls_blocks)
+			{
+				return CELL_SPURS_TASK_ERROR_INVAL;
+			}
+
+			u128 _0 = u128::from32(0);
+			if ((ls_pattern->_u128.value() & u128::from32r(0xFC000000)) != _0)
+			{
+				// Prevent save/restore to SPURS management area
+				return CELL_SPURS_TASK_ERROR_INVAL;
+			}
+		}
+	}
+	else
+	{
+		alloc_ls_blocks = 0;
+	}
+
+	// TODO: Verify the ELF header is proper and all its load segments are at address >= 0x3000
+
+	u32 tmp_task_id;
+	for (tmp_task_id = 0; tmp_task_id < CELL_SPURS_MAX_TASK; tmp_task_id++)
+	{
+		if (!taskset->m.enabled.value()._bit[tmp_task_id])
+		{
+			auto enabled              = taskset->m.enabled.value();
+			enabled._bit[tmp_task_id] = true;
+			taskset->m.enabled        = enabled;
+			break;
+		}
+	}
+
+	if (tmp_task_id >= CELL_SPURS_MAX_TASK)
+	{
+		CELL_SPURS_TASK_ERROR_AGAIN;
+	}
+
+	taskset->m.task_info[tmp_task_id].elf_addr.set(elf_addr.addr());
+	taskset->m.task_info[tmp_task_id].context_save_storage_and_alloc_ls_blocks = (context_addr.addr() | alloc_ls_blocks);
+	taskset->m.task_info[tmp_task_id].args                                     = *arg;
+	if (ls_pattern.addr())
+	{
+		taskset->m.task_info[tmp_task_id].ls_pattern = *ls_pattern;
+	}
+
+	*task_id = tmp_task_id;
+	return CELL_OK;
+}
+
+s64 spursTaskStart(vm::ptr<CellSpursTaskset> taskset, u32 taskId)
+{
+	auto pendingReady         = taskset->m.pending_ready.value();
+	pendingReady._bit[taskId] = true;
+	taskset->m.pending_ready  = pendingReady;
+
+	cellSpursSendWorkloadSignal(vm::ptr<CellSpurs>::make((u32)taskset->m.spurs.addr()), taskset->m.wid);
+	auto rc = cellSpursWakeUp(GetCurrentPPUThread(), vm::ptr<CellSpurs>::make((u32)taskset->m.spurs.addr()));
+	if (rc != CELL_OK)
+	{
+		if (rc == CELL_SPURS_POLICY_MODULE_ERROR_STAT)
+		{
+			rc = CELL_SPURS_TASK_ERROR_STAT;
+		}
+		else
+		{
+			assert(0);
+		}
+	}
+
+	return rc;
+}
+
+s64 cellSpursCreateTask(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u32> taskId, u32 elf_addr, u32 context_addr, u32 context_size, vm::ptr<CellSpursTaskLsPattern> lsPattern,
 	vm::ptr<CellSpursTaskArgument> argument)
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("cellSpursCreateTask(taskset_addr=0x%x, taskID_addr=0x%x, elf_addr_addr=0x%x, context_addr_addr=0x%x, context_size=%d, lsPattern_addr=0x%x, argument_addr=0x%x)",
-		taskset.addr(), taskID.addr(), elf_addr, context_addr, context_size, lsPattern.addr(), argument.addr());
+		taskset.addr(), taskId.addr(), elf_addr, context_addr, context_size, lsPattern.addr(), argument.addr());
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x12414, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
-	return CELL_OK;
+	if (!taskset)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (taskset.addr() % CellSpursTaskset::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	vm::var<u32> tmpTaskId;
+	auto rc = spursCreateTask(taskset, tmpTaskId, vm::ptr<u32>::make(elf_addr), vm::ptr<u32>::make(context_addr), context_size, lsPattern, argument);
+    if (rc != CELL_OK) 
+    {
+        return rc;
+    }
+
+    rc = spursTaskStart(taskset, tmpTaskId);
+    if (rc != CELL_OK) 
+    {
+        return rc;
+    }
+
+    *taskId = tmpTaskId;
+    return CELL_OK;
 #endif
 }
 
-s64 _cellSpursSendSignal(vm::ptr<CellSpursTaskset> taskset, u32 taskID)
+s64 _cellSpursSendSignal(vm::ptr<CellSpursTaskset> taskset, u32 taskId)
 {
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("_cellSpursSendSignal(taskset_addr=0x%x, taskID=%d)", taskset.addr(), taskID);
+	cellSpurs->Warning("_cellSpursSendSignal(taskset_addr=0x%x, taskId=%d)", taskset.addr(), taskId);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x124CC, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!taskset)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (taskset.addr() % CellSpursTaskset::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (taskId >= CELL_SPURS_MAX_TASK || taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD2)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	auto _0       = be_t<u128>::make(u128::from32(0));
+	auto disabled = taskset->m.enabled.value()._bit[taskId] ? false : true;
+	auto invalid  = (taskset->m.ready & taskset->m.pending_ready) != _0 || (taskset->m.running & taskset->m.waiting) != _0 || disabled ||
+					((taskset->m.running | taskset->m.ready | taskset->m.pending_ready | taskset->m.waiting | taskset->m.signalled) & be_t<u128>::make(~taskset->m.enabled.value())) != _0;
+
+	if (invalid)
+	{
+		return CELL_SPURS_TASK_ERROR_SRCH;
+	}
+
+	auto shouldSignal      = (taskset->m.waiting & be_t<u128>::make(~taskset->m.signalled.value()) & be_t<u128>::make(u128::fromBit(taskId))) != _0 ? true : false;
+	auto signalled         = taskset->m.signalled.value();
+	signalled._bit[taskId] = true;
+	taskset->m.signalled   = signalled;
+	if (shouldSignal)
+	{
+		cellSpursSendWorkloadSignal(vm::ptr<CellSpurs>::make((u32)taskset->m.spurs.addr()), taskset->m.wid);
+		auto rc = cellSpursWakeUp(GetCurrentPPUThread(), vm::ptr<CellSpurs>::make((u32)taskset->m.spurs.addr()));
+		if (rc == CELL_SPURS_POLICY_MODULE_ERROR_STAT)
+		{
+			return CELL_SPURS_TASK_ERROR_STAT;
+		}
+
+		if (rc != CELL_OK)
+		{
+			assert(0);
+		}
+	}
+
 	return CELL_OK;
 #endif
 }
 
 s64 cellSpursCreateTaskWithAttribute()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x12204, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2400,35 +2951,73 @@ s64 cellSpursCreateTaskWithAttribute()
 #endif
 }
 
-s64 cellSpursTasksetAttributeSetName()
+s64 cellSpursTasksetAttributeSetName(vm::ptr<CellSpursTasksetAttribute> attr, vm::ptr<const char> name)
 {
+	cellSpurs->Warning("%s(attr=0x%x, name=0x%x)", __FUNCTION__, attr.addr(), name.addr());
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14210, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!attr || !name)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (attr.addr() % CellSpursTasksetAttribute::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	attr->m.name = name;
 	return CELL_OK;
 #endif
 }
 
-s64 cellSpursTasksetAttributeSetTasksetSize()
+s64 cellSpursTasksetAttributeSetTasksetSize(vm::ptr<CellSpursTasksetAttribute> attr, u32 size)
 {
+	cellSpurs->Warning("%s(attr=0x%x, size=0x%x)", __FUNCTION__, attr.addr(), size);
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14254, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!attr)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (attr.addr() % CellSpursTasksetAttribute::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (size != CellSpursTaskset::size && size != CellSpursTaskset2::size)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	attr->m.taskset_size = size;
 	return CELL_OK;
 #endif
 }
 
-s64 cellSpursTasksetAttributeEnableClearLS()
+s64 cellSpursTasksetAttributeEnableClearLS(vm::ptr<CellSpursTasksetAttribute> attr, s32 enable)
 {
+	cellSpurs->Warning("%s(attr=0x%x, enable=%d)", __FUNCTION__, attr.addr(), enable);
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x142AC, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!attr)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (attr.addr() % CellSpursTasksetAttribute::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	attr->m.enable_clear_ls = enable ? 1 : 0;
 	return CELL_OK;
 #endif
 }
@@ -2440,27 +3029,28 @@ s64 _cellSpursTasksetAttribute2Initialize(vm::ptr<CellSpursTasksetAttribute2> at
 #ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x1474C, libsre_rtoc);
 #else
-	attribute->revision = revision;
-	attribute->name_addr = 0;
-	attribute->argTaskset = 0;
+	memset(attribute.get_ptr(), 0, CellSpursTasksetAttribute2::size);
+	attribute->m.revision = revision;
+	attribute->m.name.set(0);
+	attribute->m.args = 0;
 
 	for (s32 i = 0; i < 8; i++)
 	{
-		attribute->priority[i] = 1;
+		attribute->m.priority[i] = 1;
 	}
 
-	attribute->maxContention = 8;
-	attribute->enableClearLs = 0;
-	attribute->CellSpursTaskNameBuffer_addr = 0;
-
+	attribute->m.max_contention = 8;
+	attribute->m.enable_clear_ls = 0;
+	attribute->m.task_name_buffer.set(0);
 	return CELL_OK;
 #endif
 }
 
 s64 cellSpursTaskExitCodeGet()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x1397C, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2470,8 +3060,9 @@ s64 cellSpursTaskExitCodeGet()
 
 s64 cellSpursTaskExitCodeInitialize()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x1352C, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2481,8 +3072,9 @@ s64 cellSpursTaskExitCodeInitialize()
 
 s64 cellSpursTaskExitCodeTryGet()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x13974, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2492,8 +3084,9 @@ s64 cellSpursTaskExitCodeTryGet()
 
 s64 cellSpursTaskGetLoadableSegmentPattern()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x13ED4, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2503,8 +3096,9 @@ s64 cellSpursTaskGetLoadableSegmentPattern()
 
 s64 cellSpursTaskGetReadOnlyAreaPattern()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x13CFC, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2514,8 +3108,9 @@ s64 cellSpursTaskGetReadOnlyAreaPattern()
 
 s64 cellSpursTaskGenerateLsPattern()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x13B78, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2525,8 +3120,9 @@ s64 cellSpursTaskGenerateLsPattern()
 
 s64 _cellSpursTaskAttributeInitialize()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x10C30, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2536,8 +3132,9 @@ s64 _cellSpursTaskAttributeInitialize()
 
 s64 cellSpursTaskAttributeSetExitCodeContainer()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x10A98, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2558,12 +3155,7 @@ s64 _cellSpursTaskAttribute2Initialize(vm::ptr<CellSpursTaskAttribute2> attribut
 	
 	for (s32 c = 0; c < 4; c++)
 	{
-		attribute->lsPattern.u32[c] = 0;
-	}
-
-	for (s32 i = 0; i < 2; i++)
-	{
-		attribute->lsPattern.u64[i] = 0;
+		attribute->lsPattern._u128 = u128::from64r(0);
 	}
 
 	attribute->name_addr = 0;
@@ -2574,8 +3166,9 @@ s64 _cellSpursTaskAttribute2Initialize(vm::ptr<CellSpursTaskAttribute2> attribut
 
 s64 cellSpursTaskGetContextSaveAreaSize()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x1409C, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2583,21 +3176,44 @@ s64 cellSpursTaskGetContextSaveAreaSize()
 #endif
 }
 
-s64 cellSpursCreateTaskset2()
+s64 cellSpursCreateTaskset2(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset2> taskset, vm::ptr<CellSpursTasksetAttribute2> attr)
 {
+	cellSpurs->Warning("%s(spurs=0x%x, taskset=0x%x, attr=0x%x)", __FUNCTION__, spurs.addr(), taskset.addr(), attr.addr());
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x15108, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	vm::ptr<CellSpursTasksetAttribute2> tmp_attr;
+
+	if (!attr)
+	{
+		attr.set(tmp_attr.addr());
+		_cellSpursTasksetAttribute2Initialize(attr, 0);
+	}
+
+	auto rc = spursCreateTaskset(spurs, vm::ptr<CellSpursTaskset>::make(taskset.addr()), attr->m.args,
+		vm::ptr<const u8[8]>::make(attr.addr() + offsetof(CellSpursTasksetAttribute, m.priority)),
+		attr->m.max_contention, vm::ptr<const char>::make(attr->m.name.addr()), CellSpursTaskset2::size, (u8)attr->m.enable_clear_ls);
+	if (rc != CELL_OK)
+	{
+		return rc;
+	}
+
+	if (attr->m.task_name_buffer.addr() % CellSpursTaskNameBuffer::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	// TODO: Implement rest of the function
 	return CELL_OK;
 #endif
 }
 
 s64 cellSpursCreateTask2()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x11E54, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2607,8 +3223,9 @@ s64 cellSpursCreateTask2()
 
 s64 cellSpursJoinTask2()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x11378, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2618,8 +3235,9 @@ s64 cellSpursJoinTask2()
 
 s64 cellSpursTryJoinTask2()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x11748, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2629,8 +3247,9 @@ s64 cellSpursTryJoinTask2()
 
 s64 cellSpursDestroyTaskset2()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14EE8, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2640,8 +3259,9 @@ s64 cellSpursDestroyTaskset2()
 
 s64 cellSpursCreateTask2WithBinInfo()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x120E0, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2649,54 +3269,124 @@ s64 cellSpursCreateTask2WithBinInfo()
 #endif
 }
 
-s64 cellSpursTasksetSetExceptionEventHandler()
+s64 cellSpursTasksetSetExceptionEventHandler(vm::ptr<CellSpursTaskset> taskset, vm::ptr<u64> handler, vm::ptr<u64> arg)
 {
+	cellSpurs->Warning("%s(taskset=0x5x, handler=0x%x, arg=0x%x)", __FUNCTION__, taskset.addr(), handler.addr(), arg.addr());
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x13124, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!taskset || !handler)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (taskset.addr() % CellSpursTaskset::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	if (taskset->m.exception_handler != 0)
+	{
+		return CELL_SPURS_TASK_ERROR_BUSY;
+	}
+
+	taskset->m.exception_handler = handler;
+	taskset->m.exception_handler_arg = arg;
 	return CELL_OK;
 #endif
 }
 
-s64 cellSpursTasksetUnsetExceptionEventHandler()
+s64 cellSpursTasksetUnsetExceptionEventHandler(vm::ptr<CellSpursTaskset> taskset)
 {
+	cellSpurs->Warning("%s(taskset=0x%x)", __FUNCTION__, taskset.addr());
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x13194, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!taskset)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (taskset.addr() % CellSpursTaskset::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	taskset->m.exception_handler.set(0);
+	taskset->m.exception_handler_arg.set(0);
 	return CELL_OK;
 #endif
 }
 
-s64 cellSpursLookUpTasksetAddress()
+s64 cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u32 id)
 {
+	cellSpurs->Warning("%s(spurs=0x%x, taskset=0x%x, id=0x%x)", __FUNCTION__, spurs.addr(), taskset.addr(), id);
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x133AC, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (taskset.addr() == 0)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	vm::var<be_t<u64>> data;
+	auto rc = cellSpursGetWorkloadData(spurs, vm::ptr<u64>::make(data.addr()), id);
+	if (rc != CELL_OK)
+	{
+		// Convert policy module error code to a task error code
+		return rc ^ 0x100;
+	}
+
+	taskset.set((u32)data.value());
 	return CELL_OK;
 #endif
 }
 
-s64 cellSpursTasksetGetSpursAddress()
+s64 cellSpursTasksetGetSpursAddress(vm::ptr<const CellSpursTaskset> taskset, vm::ptr<u32> spurs)
 {
+	cellSpurs->Warning("%s(taskset=0x%x, spurs=0x%x)", __FUNCTION__, taskset.addr(), spurs.addr());
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x14408, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!taskset || !spurs)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (taskset.addr() % CellSpursTaskset::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	if (taskset->m.wid >= CELL_SPURS_MAX_WORKLOAD)
+	{
+		return CELL_SPURS_TASK_ERROR_INVAL;
+	}
+
+	*spurs = (u32)taskset->m.spurs.addr();
 	return CELL_OK;
 #endif
 }
 
 s64 cellSpursGetTasksetInfo()
 {
-#ifdef PRX_DEBUG
 	cellSpurs->Warning("%s()", __FUNCTION__);
+
+#ifdef PRX_DEBUG
 	return GetCurrentPPUThread().FastCall2(libsre + 0x1445C, libsre_rtoc);
 #else
 	UNIMPLEMENTED_FUNC(cellSpurs);
@@ -2704,13 +3394,39 @@ s64 cellSpursGetTasksetInfo()
 #endif
 }
 
-s64 _cellSpursTasksetAttributeInitialize()
+s64 _cellSpursTasksetAttributeInitialize(vm::ptr<CellSpursTasksetAttribute> attribute, u32 revision, u32 sdk_version, u64 args, vm::ptr<const u8> priority, u32 max_contention)
 {
+	cellSpurs->Warning("%s(attribute=0x%x, revision=%d, skd_version=%d, args=0x%llx, priority=0x%x, max_contention=%d)",
+		__FUNCTION__, attribute.addr(), revision, sdk_version, args, priority.addr(), max_contention);
+
 #ifdef PRX_DEBUG
-	cellSpurs->Warning("%s()", __FUNCTION__);
 	return GetCurrentPPUThread().FastCall2(libsre + 0x142FC, libsre_rtoc);
 #else
-	UNIMPLEMENTED_FUNC(cellSpurs);
+	if (!attribute)
+	{
+		return CELL_SPURS_TASK_ERROR_NULL_POINTER;
+	}
+
+	if (attribute.addr() % CellSpursTasksetAttribute::align)
+	{
+		return CELL_SPURS_TASK_ERROR_ALIGN;
+	}
+
+	for (u32 i = 0; i < 8; i++)
+	{
+		if (priority[i] > 0xF)
+		{
+			return CELL_SPURS_TASK_ERROR_INVAL;
+		}
+	}
+
+	memset(attribute.get_ptr(), 0, CellSpursTasksetAttribute::size);
+	attribute->m.revision = revision;
+	attribute->m.sdk_version = sdk_version;
+	attribute->m.args = args;
+	memcpy(attribute->m.priority, priority.get_ptr(), 8);
+	attribute->m.taskset_size = CellSpursTaskset::size;
+	attribute->m.max_contention = max_contention;
 	return CELL_OK;
 #endif
 }
@@ -2913,6 +3629,190 @@ s64 cellSpursSemaphoreGetTasksetAddress()
 #endif
 }
 
+bool spursIsLibProfLoaded()
+{
+	return false;
+}
+
+void spursTraceStatusUpdate(vm::ptr<CellSpurs> spurs)
+{
+	LV2_LOCK(0);
+
+	if (spurs->m.xCC != 0)
+	{
+		spurs->m.xCD                  = 1;
+		spurs->m.sysSrvMsgUpdateTrace = (1 << spurs->m.nSpus) - 1;
+		spurs->m.sysSrvMessage.write_relaxed(0xFF);
+		sys_semaphore_wait((u32)spurs->m.semPrv, 0);
+	}
+}
+
+s64 spursTraceInitialize(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTraceInfo> buffer, u32 size, u32 mode, u32 updateStatus)
+{
+	if (!spurs || !buffer)
+	{
+		return CELL_SPURS_CORE_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align || buffer.addr() % CellSpursTraceInfo::align)
+	{
+		return CELL_SPURS_CORE_ERROR_ALIGN;
+	}
+
+	if (size < CellSpursTraceInfo::size || mode & ~(CELL_SPURS_TRACE_MODE_FLAG_MASK))
+	{
+		return CELL_SPURS_CORE_ERROR_INVAL;
+	}
+
+	if (spurs->m.traceBuffer != 0)
+	{
+		return CELL_SPURS_CORE_ERROR_STAT;
+	}
+
+	spurs->m.traceDataSize = size - CellSpursTraceInfo::size;
+	for (u32 i = 0; i < 8; i++)
+	{
+		buffer->spu_thread[i] = spurs->m.spus[i];
+		buffer->count[i]      = 0;
+	}
+
+	buffer->spu_thread_grp = spurs->m.spuTG;
+	buffer->nspu           = spurs->m.nSpus;
+	spurs->m.traceBuffer.set(buffer.addr() | (mode & CELL_SPURS_TRACE_MODE_FLAG_WRAP_BUFFER ? 1 : 0));
+	spurs->m.traceMode     = mode;
+
+	u32 spuTraceDataCount = (u32)((spurs->m.traceDataSize / CellSpursTracePacket::size) / spurs->m.nSpus);
+	for (u32 i = 0, j = 8; i < 6; i++)
+	{
+		spurs->m.traceStartIndex[i] = j;
+		j += spuTraceDataCount;
+	}
+
+	spurs->m.sysSrvTraceControl = 0;
+	if (updateStatus)
+	{
+		spursTraceStatusUpdate(spurs);
+	}
+
+	return CELL_OK;
+}
+
+s64 cellSpursTraceInitialize(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTraceInfo> buffer, u32 size, u32 mode)
+{
+	if (spursIsLibProfLoaded())
+	{
+		return CELL_SPURS_CORE_ERROR_STAT;
+	}
+
+	return spursTraceInitialize(spurs, buffer, size, mode, 1);
+}
+
+s64 spursTraceStart(vm::ptr<CellSpurs> spurs, u32 updateStatus)
+{
+	if (!spurs)
+	{
+		return CELL_SPURS_CORE_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align)
+	{
+		return CELL_SPURS_CORE_ERROR_ALIGN;
+	}
+
+	if (!spurs->m.traceBuffer)
+	{
+		return CELL_SPURS_CORE_ERROR_STAT;
+	}
+
+	spurs->m.sysSrvTraceControl = 1;
+	if (updateStatus)
+	{
+		spursTraceStatusUpdate(spurs);
+	}
+
+	return CELL_OK;
+}
+
+s64 cellSpursTraceStart(vm::ptr<CellSpurs> spurs)
+{
+	if (!spurs)
+	{
+		return CELL_SPURS_CORE_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align)
+	{
+		return CELL_SPURS_CORE_ERROR_ALIGN;
+	}
+
+	return spursTraceStart(spurs, spurs->m.traceMode & CELL_SPURS_TRACE_MODE_FLAG_SYNCHRONOUS_START_STOP);
+}
+
+s64 spursTraceStop(vm::ptr<CellSpurs> spurs, u32 updateStatus)
+{
+	if (!spurs)
+	{
+		return CELL_SPURS_CORE_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align)
+	{
+		return CELL_SPURS_CORE_ERROR_ALIGN;
+	}
+
+	if (!spurs->m.traceBuffer)
+	{
+		return CELL_SPURS_CORE_ERROR_STAT;
+	}
+
+	spurs->m.sysSrvTraceControl = 2;
+	if (updateStatus)
+	{
+		spursTraceStatusUpdate(spurs);
+	}
+
+	return CELL_OK;
+}
+
+s64 cellSpursTraceStop(vm::ptr<CellSpurs> spurs)
+{
+	if (!spurs)
+	{
+		return CELL_SPURS_CORE_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align)
+	{
+		return CELL_SPURS_CORE_ERROR_ALIGN;
+	}
+
+	return spursTraceStop(spurs, spurs->m.traceMode & CELL_SPURS_TRACE_MODE_FLAG_SYNCHRONOUS_START_STOP);
+}
+
+s64 cellSpursTraceFinalize(vm::ptr<CellSpurs> spurs)
+{
+	if (!spurs)
+	{
+		return CELL_SPURS_CORE_ERROR_NULL_POINTER;
+	}
+
+	if (spurs.addr() % CellSpurs::align)
+	{
+		return CELL_SPURS_CORE_ERROR_ALIGN;
+	}
+
+	if (!spurs->m.traceBuffer)
+	{
+		return CELL_SPURS_CORE_ERROR_STAT;
+	}
+
+	spurs->m.sysSrvTraceControl = 0;
+	spurs->m.traceMode          = 0;
+	spurs->m.traceBuffer.set(0);
+	spursTraceStatusUpdate(spurs);
+	return CELL_OK;
+}
+
 void cellSpurs_init(Module *pxThis)
 {
 	cellSpurs = pxThis;
@@ -2940,6 +3840,8 @@ void cellSpurs_init(Module *pxThis)
 	REG_FUNC(cellSpurs, cellSpursEnableExceptionEventHandler);
 	REG_FUNC(cellSpurs, cellSpursSetGlobalExceptionEventHandler);
 	REG_FUNC(cellSpurs, cellSpursUnsetGlobalExceptionEventHandler);
+	REG_FUNC(cellSpurs, cellSpursSetExceptionEventHandler);
+	REG_FUNC(cellSpurs, cellSpursUnsetExceptionEventHandler);
 
 	// Event flag
 	REG_FUNC(cellSpurs, _cellSpursEventFlagInitialize);
@@ -2985,8 +3887,6 @@ void cellSpurs_init(Module *pxThis)
 	REG_FUNC(cellSpurs, cellSpursCreateTask2WithBinInfo);
 	REG_FUNC(cellSpurs, cellSpursLookUpTasksetAddress);
 	REG_FUNC(cellSpurs, cellSpursTasksetGetSpursAddress);
-	REG_FUNC(cellSpurs, cellSpursSetExceptionEventHandler);
-	REG_FUNC(cellSpurs, cellSpursUnsetExceptionEventHandler);
 	REG_FUNC(cellSpurs, cellSpursGetTasksetInfo);
 	REG_FUNC(cellSpurs, cellSpursTasksetSetExceptionEventHandler);
 	REG_FUNC(cellSpurs, cellSpursTasksetUnsetExceptionEventHandler);
@@ -3070,5 +3970,9 @@ void cellSpurs_init(Module *pxThis)
 	REG_FUNC(cellSpurs, _cellSpursSemaphoreInitialize);
 	REG_FUNC(cellSpurs, cellSpursSemaphoreGetTasksetAddress);
 
-	// TODO: some trace funcs
+	// Trace
+	REG_FUNC(cellSpurs, cellSpursTraceInitialize);
+	REG_FUNC(cellSpurs, cellSpursTraceStart);
+	REG_FUNC(cellSpurs, cellSpursTraceStop);
+	REG_FUNC(cellSpurs, cellSpursTraceFinalize);
 }
\ No newline at end of file
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
index 64be4a99cb..348c795653 100644
--- a/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpurs.h
@@ -40,6 +40,7 @@ enum
 {
 	CELL_SPURS_TASK_ERROR_AGAIN        = 0x80410901,
 	CELL_SPURS_TASK_ERROR_INVAL        = 0x80410902,
+	CELL_SPURS_TASK_ERROR_NOSYS        = 0x80410903,
 	CELL_SPURS_TASK_ERROR_NOMEM        = 0x80410904,
 	CELL_SPURS_TASK_ERROR_SRCH         = 0x80410905,
 	CELL_SPURS_TASK_ERROR_NOEXEC       = 0x80410907,
@@ -91,6 +92,7 @@ enum SPURSKernelInterfaces
 	CELL_SPURS_MAX_SPU = 8,
 	CELL_SPURS_MAX_WORKLOAD = 16,
 	CELL_SPURS_MAX_WORKLOAD2 = 32,
+	CELL_SPURS_SYS_SERVICE_WORKLOAD_ID = 32,
 	CELL_SPURS_MAX_PRIORITY = 16,
 	CELL_SPURS_NAME_MAX_LENGTH = 15,
 	CELL_SPURS_SIZE = 4096,
@@ -101,6 +103,12 @@ enum SPURSKernelInterfaces
 	CELL_SPURS_INTERRUPT_VECTOR = 0x0,
 	CELL_SPURS_LOCK_LINE = 0x80,
 	CELL_SPURS_KERNEL_DMA_TAG_ID = 31,
+	CELL_SPURS_KERNEL1_ENTRY_ADDR = 0x818,
+	CELL_SPURS_KERNEL2_ENTRY_ADDR = 0x848,
+	CELL_SPURS_KERNEL1_EXIT_ADDR = 0x808,
+	CELL_SPURS_KERNEL2_EXIT_ADDR = 0x838,
+	CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR = 0x290,
+	CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR = 0x290,
 };
 
 enum RangeofEventQueuePortNumbers
@@ -110,31 +118,6 @@ enum RangeofEventQueuePortNumbers
 	CELL_SPURS_DYNAMIC_PORT_RANGE_BOTTOM = 63,
 };
 
-enum SPURSTraceTypes
-{
-	CELL_SPURS_TRACE_TAG_LOAD = 0x2a,
-	CELL_SPURS_TRACE_TAG_MAP = 0x2b,
-	CELL_SPURS_TRACE_TAG_START = 0x2c,
-	CELL_SPURS_TRACE_TAG_STOP = 0x2d,
-	CELL_SPURS_TRACE_TAG_USER = 0x2e,
-	CELL_SPURS_TRACE_TAG_GUID = 0x2f,
-};
-
-// SPURS task defines.
-enum TaskConstants
-{
-	CELL_SPURS_MAX_TASK = 128,
-	CELL_SPURS_TASK_TOP = 0x3000,
-	CELL_SPURS_TASK_BOTTOM = 0x40000,
-	CELL_SPURS_MAX_TASK_NAME_LENGTH = 32,
-};
-
-class SPURSManager;
-class SPURSManagerEventFlag;
-class SPURSManagerTaskset;
-
-struct CellSpurs;
-
 enum SpursAttrFlags : u32
 {
 	SAF_NONE = 0x0,
@@ -156,11 +139,129 @@ enum SpursAttrFlags : u32
 enum SpursFlags1 : u8
 {
 	SF1_NONE = 0x0,
-	
-	SF1_IS_SECOND = 0x40,
+
+	SF1_32_WORKLOADS    = 0x40,
 	SF1_EXIT_IF_NO_WORK = 0x80,
 };
 
+enum SpursWorkloadConstants : u64
+{
+	// Workload states
+	SPURS_WKL_STATE_NON_EXISTENT    = 0,
+	SPURS_WKL_STATE_PREPARING       = 1,
+	SPURS_WKL_STATE_RUNNABLE        = 2,
+	SPURS_WKL_STATE_SHUTTING_DOWN   = 3,
+	SPURS_WKL_STATE_REMOVABLE       = 4,
+	SPURS_WKL_STATE_INVALID         = 5,
+
+	// GUID
+	SPURS_GUID_SYS_WKL              = 0x1BB841BF38F89D33ull,
+	SPURS_GUID_TASKSET_PM           = 0x836E915B2E654143ull,
+
+	// Image addresses
+	SPURS_IMG_ADDR_SYS_SRV_WORKLOAD = 0x100,
+	SPURS_IMG_ADDR_TASKSET_PM       = 0x200,
+};
+
+enum CellSpursModulePollStatus
+{
+	CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT  = 1,
+	CELL_SPURS_MODULE_POLL_STATUS_SIGNAL      = 2,
+	CELL_SPURS_MODULE_POLL_STATUS_FLAG        = 4
+};
+
+enum SpursTraceConstants
+{
+	// Trace tag types
+	CELL_SPURS_TRACE_TAG_KERNEL     = 0x20,
+	CELL_SPURS_TRACE_TAG_SERVICE    = 0x21,
+	CELL_SPURS_TRACE_TAG_TASK       = 0x22,
+	CELL_SPURS_TRACE_TAG_JOB        = 0x23,
+	CELL_SPURS_TRACE_TAG_OVIS       = 0x24,
+	CELL_SPURS_TRACE_TAG_LOAD       = 0x2a,
+	CELL_SPURS_TRACE_TAG_MAP        = 0x2b,
+	CELL_SPURS_TRACE_TAG_START      = 0x2c,
+	CELL_SPURS_TRACE_TAG_STOP       = 0x2d,
+	CELL_SPURS_TRACE_TAG_USER       = 0x2e,
+	CELL_SPURS_TRACE_TAG_GUID       = 0x2f,
+
+	// Service incident
+	CELL_SPURS_TRACE_SERVICE_INIT   = 0x01,
+	CELL_SPURS_TRACE_SERVICE_WAIT   = 0x02,
+	CELL_SPURS_TRACE_SERVICE_EXIT   = 0x03,
+
+	// Task incident
+	CELL_SPURS_TRACE_TASK_DISPATCH  = 0x01,
+	CELL_SPURS_TRACE_TASK_YIELD     = 0x03,
+	CELL_SPURS_TRACE_TASK_WAIT      = 0x04,
+	CELL_SPURS_TRACE_TASK_EXIT      = 0x05,
+
+	// Trace mode flags
+	CELL_SPURS_TRACE_MODE_FLAG_WRAP_BUFFER              = 0x1,
+	CELL_SPURS_TRACE_MODE_FLAG_SYNCHRONOUS_START_STOP   = 0x2,
+	CELL_SPURS_TRACE_MODE_FLAG_MASK                     = 0x3,
+};
+
+// SPURS task constants
+enum SpursTaskConstants
+{
+	CELL_SPURS_MAX_TASK                     = 128,
+	CELL_SPURS_TASK_TOP                     = 0x3000,
+	CELL_SPURS_TASK_BOTTOM                  = 0x40000,
+	CELL_SPURS_MAX_TASK_NAME_LENGTH         = 32,
+	CELL_SPURS_TASK_ATTRIBUTE_REVISION      = 1,
+	CELL_SPURS_TASKSET_ATTRIBUTE_REVISION   = 1,
+	CELL_SPURS_TASK_EXECUTION_CONTEXT_SIZE  = 1024,
+	CELL_SPURS_TASKSET_PM_ENTRY_ADDR        = 0xA00,
+	CELL_SPURS_TASKSET_PM_SYSCALL_ADDR      = 0xA70,
+
+	// Task syscall numbers
+	CELL_SPURS_TASK_SYSCALL_EXIT            = 0,
+	CELL_SPURS_TASK_SYSCALL_YIELD           = 1,
+	CELL_SPURS_TASK_SYSCALL_WAIT_SIGNAL     = 2,
+	CELL_SPURS_TASK_SYSCALL_POLL            = 3,
+	CELL_SPURS_TASK_SYSCALL_RECV_WKL_FLAG   = 4,
+
+	// Task poll status
+	CELL_SPURS_TASK_POLL_FOUND_TASK         = 1,
+	CELL_SPURS_TASK_POLL_FOUND_WORKLOAD     = 2,
+};
+
+enum CellSpursEventFlagWaitMode
+{
+	CELL_SPURS_EVENT_FLAG_OR             = 0,
+	CELL_SPURS_EVENT_FLAG_AND            = 1,
+	CELL_SPURS_EVENT_FLAG_WAIT_MODE_LAST = CELL_SPURS_EVENT_FLAG_AND,
+};
+
+enum CellSpursEventFlagClearMode
+{
+	CELL_SPURS_EVENT_FLAG_CLEAR_AUTO   = 0,
+	CELL_SPURS_EVENT_FLAG_CLEAR_MANUAL = 1,
+	CELL_SPURS_EVENT_FLAG_CLEAR_LAST   = CELL_SPURS_EVENT_FLAG_CLEAR_MANUAL,
+};
+
+enum CellSpursEventFlagDirection
+{
+	CELL_SPURS_EVENT_FLAG_SPU2SPU,
+	CELL_SPURS_EVENT_FLAG_SPU2PPU,
+	CELL_SPURS_EVENT_FLAG_PPU2SPU,
+	CELL_SPURS_EVENT_FLAG_ANY2ANY,
+	CELL_SPURS_EVENT_FLAG_LAST = CELL_SPURS_EVENT_FLAG_ANY2ANY,
+};
+
+// Event flag constants
+enum SpursEventFlagConstants
+{
+	CELL_SPURS_EVENT_FLAG_MAX_WAIT_SLOTS   = 16,
+	CELL_SPURS_EVENT_FLAG_INVALID_SPU_PORT = 0xFF,
+};
+
+class SPURSManager;
+class SPURSManagerEventFlag;
+class SPURSManagerTaskset;
+struct CellSpurs;
+
 struct CellSpursAttribute
 {
 	static const uint align = 8;
@@ -208,6 +309,72 @@ struct CellSpursWorkloadFlag
 
 typedef void(CellSpursShutdownCompletionEventHook)(vm::ptr<CellSpurs>, u32 wid, vm::ptr<void> arg);
 
+struct CellSpursTraceInfo
+{
+	static const u32 size = 0x80;
+	static const u32 align = 16;
+
+	be_t<u32> spu_thread[8];    // 0x00
+	be_t<u32> count[8];         // 0x20
+	be_t<u32> spu_thread_grp;   // 0x40
+	be_t<u32> nspu;             // 0x44
+	//u8 padding[];
+};
+
+struct CellSpursTracePacket
+{
+	static const u32 size = 16;
+
+	struct
+	{
+		u8 tag;
+		u8 length;
+		u8 spu;
+		u8 workload;
+		be_t<u32> time;
+	} header;
+
+	union
+	{
+		struct
+		{
+			be_t<u32> incident;
+			be_t<u32> reserved;
+		} service;
+
+		struct
+		{
+			be_t<u32> ea;
+			be_t<u16> ls;
+			be_t<u16> size;
+		} load;
+
+		struct
+		{
+			be_t<u32> offset;
+			be_t<u16> ls;
+			be_t<u16> size;
+		} map;
+
+		struct
+		{
+			s8 module[4];
+			be_t<u16> level;
+			be_t<u16> ls;
+		} start;
+
+		struct
+		{
+			be_t<u32> incident;
+			be_t<u32> taskId;
+		} task;
+
+		be_t<u64> user;
+		be_t<u64> guid;
+		be_t<u64> stop;
+	} data;
+};
+
 // Core CellSpurs structures
 struct CellSpurs
 {
@@ -218,7 +385,7 @@ struct CellSpurs
 
 	struct _sub_str1
 	{
-		u8 unk0[0x20];
+		u8 unk0[0x20]; // 0x00 - SPU exceptionh handler 0x08 - SPU exception handler args
 		be_t<u64> sem; // 0x20
 		u8 unk1[0x8];
 		vm::bptr<CellSpursShutdownCompletionEventHook, 1, u64> hook; // 0x30
@@ -228,28 +395,29 @@ struct CellSpurs
 
 	static_assert(sizeof(_sub_str1) == 0x80, "Wrong _sub_str1 size");
 
-	struct _sub_str2
+	struct _sub_str2 // Event port multiplexer
 	{
-		be_t<u32> unk0;
-		be_t<u32> unk1;
-		be_t<u32> unk2;
-		be_t<u32> unk3;
+		be_t<u32> unk0; // 0x00 Outstanding requests
+		be_t<u32> unk1; // 0x04
+		be_t<u32> unk2; // 0x08
+		be_t<u32> unk3; // 0x0C
 		be_t<u64> port; // 0x10
-		u8 unk_[0x68];
+		u8 unk_[0x68];  // 0x18 - The first u64 seems to be the start of a linked list. The linked list struct seems to be {u64 next; u64 data; u64 handler}
 	};
 
 	static_assert(sizeof(_sub_str2) == 0x80, "Wrong _sub_str2 size");
 
-	struct _sub_str3
+	struct WorkloadInfo
 	{
-		vm::bptr<const void, 1, u64> pm; // policy module
-		be_t<u64> data; // spu argument
+		vm::bptr<const void, 1, u64> addr; // Address of the executable
+		be_t<u64> arg; // spu argument
 		be_t<u32> size;
-		atomic_t<u8> copy;
-		be_t<u64> priority;
+		atomic_t<u8> uniqueId; // The unique id is the same for all workloads with the same addr
+		u8 pad[3];
+		u8 priority[8];
 	};
 
-	static_assert(sizeof(_sub_str3) == 0x20, "Wrong _sub_str3 size");
+	static_assert(sizeof(WorkloadInfo) == 0x20, "Wrong WorkloadInfo size");
 
 	struct _sub_str4
 	{
@@ -268,61 +436,68 @@ struct CellSpurs
 		// real data
 		struct
 		{
-			atomic_t<u8> wklReadyCount[0x20]; // 0x0 (index = wid)
-			u8 wklA[0x10];        // 0x20 (packed 4-bit data, index = wid % 16, internal index = wid / 16)
-			u8 wklB[0x10];        // 0x30 (packed 4-bit data, index = wid % 16, internal index = wid / 16)
-			u8 wklMinCnt[0x10];   // 0x40 (seems only for first 0..15 wids)
-			atomic_t<u8> wklMaxCnt[0x10]; // 0x50 (packed 4-bit data, index = wid % 16, internal index = wid / 16)
-			CellSpursWorkloadFlag wklFlag; // 0x60
-			atomic_t<u16> wklSet1; // 0x70 (bitset for 0..15 wids)
-			atomic_t<u8> x72;     // 0x72
-			u8 x73;               // 0x73
-			u8 flags1;            // 0x74
-			u8 x75;               // 0x75
-			u8 nSpus;             // 0x76
-			atomic_t<u8> flagRecv; // 0x77
-			atomic_t<u16> wklSet2; // 0x78 (bitset for 16..32 wids)
-			u8 x7A[6];            // 0x7A
-			atomic_t<u8> wklStat1[0x10]; // 0x80
-			u8 wklD1[0x10];       // 0x90
-			u8 wklE1[0x10];       // 0xA0
-			atomic_t<u32> wklMskA; // 0xB0
-			atomic_t<u32> wklMskB; // 0xB4
-			u8 xB8[5];            // 0xB8
-			atomic_t<u8> xBD;     // 0xBD
-			u8 xBE[2];            // 0xBE
-			u8 xC0[8];            // 0xC0
-			u8 xC8;               // 0xC8
-			u8 spuPort;           // 0xC9
-			u8 xCA;               // 0xCA
-			u8 xCB;               // 0xCB
-			u8 xCC;               // 0xCC
-			u8 xCD;               // 0xCD
-			u8 xCE;               // 0xCE
-			u8 xCF;               // 0xCF
-			atomic_t<u8> wklStat2[0x10]; // 0xD0
-			u8 wklD2[0x10];       // 0xE0
-			u8 wklE2[0x10];       // 0xF0
-			_sub_str1 wklF1[0x10]; // 0x100
-			be_t<u64> unk22;      // 0x900
-			u8 unknown7[0x980 - 0x908];
+			atomic_t<u8> wklReadyCount1[0x10];                  // 0x00 Number of SPUs requested by each workload (0..15 wids).
+			atomic_t<u8> wklIdleSpuCountOrReadyCount2[0x10];    // 0x10 SPURS1: Number of idle SPUs requested by each workload (0..15 wids). SPURS2: Number of SPUs requested by each workload (16..31 wids).
+			u8 wklCurrentContention[0x10];                      // 0x20 Number of SPUs used by each workload. SPURS1: index = wid. SPURS2: packed 4-bit data, index = wid % 16, internal index = wid / 16.
+			u8 wklPendingContention[0x10];                      // 0x30 Number of SPUs that are pending to context switch to the workload. SPURS1: index = wid. SPURS2: packed 4-bit data, index = wid % 16, internal index = wid / 16.
+			u8 wklMinContention[0x10];                          // 0x40 Min SPUs required for each workload. SPURS1: index = wid. SPURS2: Unused.
+			atomic_t<u8> wklMaxContention[0x10];                // 0x50 Max SPUs that may be allocated to each workload. SPURS1: index = wid. SPURS2: packed 4-bit data, index = wid % 16, internal index = wid / 16.
+			CellSpursWorkloadFlag wklFlag;                      // 0x60
+			atomic_t<u16> wklSignal1;                           // 0x70 (bitset for 0..15 wids)
+			atomic_t<u8> sysSrvMessage;                         // 0x72
+			u8 spuIdling;                                       // 0x73
+			u8 flags1;                                          // 0x74 Type is SpursFlags1
+			u8 sysSrvTraceControl;                              // 0x75
+			u8 nSpus;                                           // 0x76
+			atomic_t<u8> wklFlagReceiver;                       // 0x77
+			atomic_t<u16> wklSignal2;                           // 0x78 (bitset for 16..32 wids)
+			u8 x7A[6];                                          // 0x7A
+			atomic_t<u8> wklState1[0x10];                       // 0x80 SPURS_WKL_STATE_*
+			u8 wklStatus1[0x10];                                // 0x90
+			u8 wklEvent1[0x10];                                 // 0xA0
+			atomic_t<u32> wklMskA;                              // 0xB0 - System service - Available workloads (32*u1)
+			atomic_t<u32> wklMskB;                              // 0xB4 - System service - Available module id
+			u32 xB8;                                            // 0xB8
+			u8 sysSrvExitBarrier;                               // 0xBC
+			atomic_t<u8> sysSrvMsgUpdateWorkload;               // 0xBD
+			u8 xBE;                                             // 0xBE
+			u8 sysSrvMsgTerminate;                              // 0xBF
+			u8 sysSrvWorkload[8];                               // 0xC0
+			u8 sysSrvOnSpu;                                     // 0xC8
+			u8 spuPort;                                         // 0xC9
+			u8 xCA;                                             // 0xCA
+			u8 xCB;                                             // 0xCB
+			u8 xCC;                                             // 0xCC
+			u8 xCD;                                             // 0xCD
+			u8 sysSrvMsgUpdateTrace;                            // 0xCE
+			u8 xCF;                                             // 0xCF
+			atomic_t<u8> wklState2[0x10];                       // 0xD0 SPURS_WKL_STATE_*
+			u8 wklStatus2[0x10];                                // 0xE0
+			u8 wklEvent2[0x10];                                 // 0xF0
+			_sub_str1 wklF1[0x10];                              // 0x100
+			vm::bptr<CellSpursTraceInfo, 1, u64> traceBuffer;   // 0x900
+			be_t<u32> traceStartIndex[6];                       // 0x908
+			u8 unknown7[0x948 - 0x920];                         // 0x920
+			be_t<u64> traceDataSize;                            // 0x948
+			be_t<u32> traceMode;                                // 0x950
+			u8 unknown8[0x980 - 0x954];                         // 0x954
 			be_t<u64> semPrv;     // 0x980
 			be_t<u32> unk11;      // 0x988
 			be_t<u32> unk12;      // 0x98C
 			be_t<u64> unk13;      // 0x990
 			u8 unknown4[0xB00 - 0x998];
-			_sub_str3 wklG1[0x10]; // 0xB00
-			_sub_str3 wklSysG;    // 0xD00
+			WorkloadInfo wklInfo1[0x10]; // 0xB00
+			WorkloadInfo wklInfoSysSrv;  // 0xD00
 			be_t<u64> ppu0;       // 0xD20
 			be_t<u64> ppu1;       // 0xD28
-			be_t<u32> spuTG;      // 0xD30
+			be_t<u32> spuTG;      // 0xD30 - SPU thread group
 			be_t<u32> spus[8];    // 0xD34
 			u8 unknown3[0xD5C - 0xD54];
-			be_t<u32> queue;      // 0xD5C
-			be_t<u32> port;       // 0xD60
-			atomic_t<u8> xD64;    // 0xD64
-			atomic_t<u8> xD65;    // 0xD65
-			atomic_t<u8> xD66;    // 0xD66
+			be_t<u32> queue;      // 0xD5C - Event queue
+			be_t<u32> port;       // 0xD60 - Event port
+			atomic_t<u8> xD64;    // 0xD64 - SPURS handler dirty
+			atomic_t<u8> xD65;    // 0xD65 - SPURS handler waiting
+			atomic_t<u8> xD66;    // 0xD66 - SPURS handler exiting
 			atomic_t<u32> enableEH; // 0xD68
 			be_t<u32> exception;  // 0xD6C
 			sys_spu_image spuImg; // 0xD70
@@ -334,14 +509,14 @@ struct CellSpurs
 			be_t<u32> unk5;       // 0xD9C
 			be_t<u32> revision;   // 0xDA0
 			be_t<u32> sdkVersion; // 0xDA4
-			atomic_t<u64> spups;  // 0xDA8
+			atomic_t<u64> spups;  // 0xDA8 - SPU port bits
 			sys_lwmutex_t mutex;  // 0xDB0
 			sys_lwcond_t cond;    // 0xDC8
 			u8 unknown9[0xE00 - 0xDD0];
 			_sub_str4 wklH1[0x10]; // 0xE00
 			_sub_str2 sub3;       // 0xF00
-			u8 unknown6[0x1000 - 0xF80];
-			_sub_str3 wklG2[0x10]; // 0x1000
+			u8 unknown6[0x1000 - 0xF80]; // 0xF80 - Gloabl SPU exception handler 0xF88 - Gloabl SPU exception handlers args
+			WorkloadInfo wklInfo2[0x10]; // 0x1000
 			_sub_str1 wklF2[0x10]; // 0x1200
 			_sub_str4 wklH2[0x10]; // 0x1A00
 		} m;
@@ -353,15 +528,15 @@ struct CellSpurs
 		} c;
 	};
 
-	__forceinline atomic_t<u8>& wklStat(const u32 wid)
+	__forceinline atomic_t<u8>& wklState(const u32 wid)
 	{
 		if (wid & 0x10)
 		{
-			return m.wklStat2[wid & 0xf];
+			return m.wklState2[wid & 0xf];
 		}
 		else
 		{
-			return m.wklStat1[wid & 0xf];
+			return m.wklState1[wid & 0xf];
 		}
 	}
 
@@ -409,12 +584,104 @@ struct CellSpursWorkloadAttribute
 
 struct CellSpursEventFlag
 {
-	SPURSManagerEventFlag *eventFlag;
+	static const u32 align = 128;
+	static const u32 size = 128;
+
+	union
+	{
+		// Raw data
+		u8 _u8[size];
+
+		// Real data
+		struct _CellSpursEventFlag
+		{
+			be_t<u16> events;                    // 0x00 Event bits
+			be_t<u16> spuTaskPendingRecv;        // 0x02 A bit is set to 1 when the condition of the SPU task using the slot are met and back to 0 when the SPU task unblocks
+			be_t<u16> ppuWaitMask;               // 0x04 Wait mask for blocked PPU thread
+			u8 ppuWaitSlotAndMode;               // 0x06 Top 4 bits: Wait slot number of the blocked PPU threa, Bottom 4 bits: Wait mode of the blocked PPU thread
+			u8 ppuPendingRecv;                   // 0x07 Set to 1 when the blocked PPU thread's conditions are met and back to 0 when the PPU thread is unblocked
+			be_t<u16> spuTaskUsedWaitSlots;      // 0x08 A bit is set to 1 if the wait slot corresponding to the bit is used by an SPU task and 0 otherwise
+			be_t<u16> spuTaskWaitMode;           // 0x0A A bit is set to 1 if the wait mode for the SPU task corresponding to the bit is AND and 0 otherwise
+			u8 spuPort;                          // 0x0C
+			u8 isIwl;                            // 0x0D
+			u8 direction;                        // 0x0E
+			u8 clearMode;                        // 0x0F
+			be_t<u16> spuTaskWaitMask[16];       // 0x10 Wait mask for blocked SPU tasks
+			be_t<u16> pendingRecvTaskEvents[16]; // 0x30 The value of event flag when the wait condition for the thread/task was met
+			u8 waitingTaskId[16];                // 0x50 Task id of waiting SPU threads
+			u8 waitingTaskWklId[16];             // 0x60 Workload id of waiting SPU threads
+			be_t<u64> addr;                      // 0x70
+			be_t<u32> eventPortId;               // 0x78
+			be_t<u32> eventQueueId;              // 0x7C
+		} m;
+
+		static_assert(sizeof(_CellSpursEventFlag) == size, "Wrong _CellSpursEventFlag size");
+
+		SPURSManagerEventFlag *eventFlag;
+	};
+};
+
+union CellSpursTaskArgument
+{
+	be_t<u128> _u128;
+};
+
+union CellSpursTaskLsPattern
+{
+	be_t<u128> _u128;
 };
 
 struct CellSpursTaskset
 {
-	SPURSManagerTaskset *taskset;
+	static const u32 align = 128;
+	static const u32 size = 6400;
+
+	struct TaskInfo
+	{
+		CellSpursTaskArgument args;                         // 0x00
+		vm::bptr<u64, 1, u64> elf_addr;                     // 0x10
+		be_t<u64> context_save_storage_and_alloc_ls_blocks; // 0x18 This is (context_save_storage_addr | allocated_ls_blocks)
+		CellSpursTaskLsPattern ls_pattern;                  // 0x20
+	};
+
+	static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size");
+
+	union
+	{
+		// Raw data
+		u8 _u8[size];
+
+		// Real data
+		struct _CellSpursTaskset
+		{
+			be_t<u128> running;                          // 0x00
+			be_t<u128> ready;                            // 0x10
+			be_t<u128> pending_ready;                    // 0x20
+			be_t<u128> enabled;                          // 0x30
+			be_t<u128> signalled;                        // 0x40
+			be_t<u128> waiting;                          // 0x50
+			vm::bptr<CellSpurs, 1, u64> spurs;           // 0x60
+			be_t<u64> args;                              // 0x68
+			u8 enable_clear_ls;                          // 0x70
+			u8 x71;                                      // 0x71
+			u8 wkl_flag_wait_task;                       // 0x72
+			u8 last_scheduled_task;                      // 0x73
+			be_t<u32> wid;                               // 0x74
+			be_t<u64> x78;                               // 0x78
+			TaskInfo task_info[128];                     // 0x80
+			vm::bptr<u64, 1, u64> exception_handler;     // 0x1880
+			vm::bptr<u64, 1, u64> exception_handler_arg; // 0x1888
+			be_t<u32> size;                              // 0x1890
+			u32 unk2;                                    // 0x1894
+			u32 event_flag_id1;                          // 0x1898
+			u32 event_flag_id2;                          // 0x189C
+			u8 unk3[0x60];                               // 0x18A0
+		} m;
+
+		static_assert(sizeof(_CellSpursTaskset) == size, "Wrong _CellSpursTaskset size");
+
+		SPURSManagerTaskset *taskset;
+	};
 };
 
 struct CellSpursInfo
@@ -446,63 +713,6 @@ struct CellSpursExceptionInfo
 	be_t<u64> option;
 };
 
-struct CellSpursTraceInfo
-{
-	be_t<u32> spu_thread[8];
-	be_t<u32> count[8];
-	be_t<u32> spu_thread_grp;
-	be_t<u32> nspu;
-	//u8 padding[];
-};
-
-struct CellTraceHeader
-{
-	u8 tag;
-	u8 length;
-	u8 cpu;
-	u8 thread;
-	be_t<u32> time;
-};
-
-struct CellSpursTracePacket
-{
-	struct header_struct
-	{
-		u8 tag;
-		u8 length;
-		u8 spu;
-		u8 workload;
-		be_t<u32> time;
-	} header;
-
-	struct data_struct
-	{
-		struct load_struct
-		{
-			be_t<u32> ea;
-			be_t<u16> ls;
-			be_t<u16> size;
-		} load;
-
-		struct map_struct
-		{
-			be_t<u32> offset;
-			be_t<u16> ls;
-			be_t<u16> size;
-		} map;
-
-		struct start_struct
-		{
-			s8 module[4];
-			be_t<u16> level;
-			be_t<u16> ls;
-		} start;
-
-		be_t<u64> user;
-		be_t<u64> guid;
-	} data;
-};
-
 // Exception handlers.
 //typedef void (*CellSpursGlobalExceptionEventHandler)(vm::ptr<CellSpurs> spurs, vm::ptr<const CellSpursExceptionInfo> info, 
 //													   u32 id, vm::ptr<void> arg);
@@ -510,6 +720,13 @@ struct CellSpursTracePacket
 //typedef void (*CellSpursTasksetExceptionEventHandler)(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, 
 //													    u32 idTask, vm::ptr<const CellSpursExceptionInfo> info, vm::ptr<void> arg);
 
+struct CellSpursTaskNameBuffer
+{
+	static const u32 align = 16;
+
+	char taskName[CELL_SPURS_MAX_TASK][CELL_SPURS_MAX_TASK_NAME_LENGTH];
+};
+
 struct CellSpursTasksetInfo
 {
 	//CellSpursTaskInfo taskInfo[CELL_SPURS_MAX_TASK];
@@ -525,25 +742,104 @@ struct CellSpursTasksetInfo
 
 struct CellSpursTaskset2
 {
-	be_t<u8> skip[10496];
+	static const u32 align = 128;
+	static const u32 size = 10496;
+
+	struct TaskInfo
+	{
+		CellSpursTaskArgument args;
+		vm::bptr<u64, 1, u64> elf_addr;
+		vm::bptr<u64, 1, u64> context_save_storage; // This is (context_save_storage_addr | allocated_ls_blocks)
+		CellSpursTaskLsPattern ls_pattern;
+	};
+
+	static_assert(sizeof(TaskInfo) == 0x30, "Wrong TaskInfo size");
+
+	union
+	{
+		// Raw data
+		u8 _u8[size];
+
+		// Real data
+		struct _CellSpursTaskset2
+		{
+			be_t<u32> running_set[4];                    // 0x00
+			be_t<u32> ready_set[4];                      // 0x10
+			be_t<u32> ready2_set[4];                     // 0x20 - TODO: Find out what this is
+			be_t<u32> enabled_set[4];                    // 0x30
+			be_t<u32> signal_received_set[4];            // 0x40
+			be_t<u32> waiting_set[4];                    // 0x50
+			vm::bptr<CellSpurs, 1, u64> spurs;           // 0x60
+			be_t<u64> args;                              // 0x68
+			u8 enable_clear_ls;                          // 0x70
+			u8 x71;                                      // 0x71
+			u8 x72;                                      // 0x72
+			u8 last_scheduled_task;                      // 0x73
+			be_t<u32> wid;                               // 0x74
+			be_t<u64> x78;                               // 0x78
+			TaskInfo task_info[128];                     // 0x80
+			vm::bptr<u64, 1, u64> exception_handler;     // 0x1880
+			vm::bptr<u64, 1, u64> exception_handler_arg; // 0x1888
+			be_t<u32> size;                              // 0x1890
+			u32 unk2;                                    // 0x1894
+			u32 event_flag_id1;                          // 0x1898
+			u32 event_flag_id2;                          // 0x189C
+			u8 unk3[0x1980 - 0x18A0];                    // 0x18A0
+			be_t<u128> task_exit_code[128];              // 0x1980
+			u8 unk4[0x2900 - 0x2180];                    // 0x2180
+		} m;
+
+		static_assert(sizeof(_CellSpursTaskset2) == size, "Wrong _CellSpursTaskset2 size");
+	};
+};
+
+struct CellSpursTasksetAttribute
+{
+	static const u32 align = 8;
+	static const u32 size = 512;
+
+	union
+	{
+		// Raw data
+		u8 _u8[size];
+
+		// Real data
+		struct
+		{
+			be_t<u32> revision;             // 0x00
+			be_t<u32> sdk_version;          // 0x04
+			be_t<u64> args;                 // 0x08
+			u8 priority[8];                 // 0x10
+			be_t<u32> max_contention;       // 0x18
+			vm::bptr<const char> name;      // 0x1C
+			be_t<u32> taskset_size;         // 0x20
+			be_t<s32> enable_clear_ls;      // 0x24
+		} m;
+	};
 };
 
 struct CellSpursTasksetAttribute2
 {
-	be_t<u32> revision;
-	be_t<u32> name_addr;
-	be_t<u64> argTaskset;
-	u8 priority[8];
-	be_t<u32> maxContention;
-	be_t<s32> enableClearLs;
-	be_t<s32> CellSpursTaskNameBuffer_addr; //??? *taskNameBuffer
-	//be_t<u32> __reserved__[];
-};
+	static const u32 align = 8;
+	static const u32 size = 512;
 
-// cellSpurs task structures.
-struct CellSpursTaskNameBuffer
-{
-	char taskName[CELL_SPURS_MAX_TASK][CELL_SPURS_MAX_TASK_NAME_LENGTH];
+	union
+	{
+		// Raw data
+		u8 _u8[size];
+
+		// Real data
+		struct
+		{
+			be_t<u32> revision;                                 // 0x00
+			vm::bptr<const char> name;                          // 0x04
+			be_t<u64> args;                                     // 0x08
+			u8 priority[8];                                     // 0x10
+			be_t<u32> max_contention;                           // 0x18
+			be_t<s32> enable_clear_ls;                          // 0x1C
+			vm::bptr<CellSpursTaskNameBuffer> task_name_buffer; // 0x20
+		} m;
+	};
 };
 
 struct CellSpursTraceTaskData
@@ -552,21 +848,6 @@ struct CellSpursTraceTaskData
 	be_t<u32> task;
 };
 
-typedef be_t<u32> be_u32;
-typedef be_t<u64> be_u64;
-
-struct CellSpursTaskArgument
-{
-	be_u32 u32[4];
-	be_u64 u64[2];
-};
-
-struct CellSpursTaskLsPattern
-{
-	be_u32 u32[4];
-	be_u64 u64[2];
-};
-
 struct CellSpursTaskAttribute2
 {
 	be_t<u32> revision;
@@ -604,7 +885,77 @@ struct CellSpursTaskBinInfo
 	CellSpursTaskLsPattern lsPattern;
 };
 
-class PPUThread;
+// The SPURS kernel context. This resides at 0x100 of the LS.
+struct SpursKernelContext
+{
+	u8 tempArea[0x80];                              // 0x100
+	u8 wklLocContention[0x10];                      // 0x180
+	u8 wklLocPendingContention[0x10];               // 0x190
+	u8 priority[0x10];                              // 0x1A0
+	u8 x1B0[0x10];                                  // 0x1B0
+	vm::bptr<CellSpurs, 1, u64> spurs;              // 0x1C0
+	be_t<u32> spuNum;                               // 0x1C8
+	be_t<u32> dmaTagId;                             // 0x1CC
+	vm::bptr<const void, 1, u64> wklCurrentAddr;    // 0x1D0
+	be_t<u32> wklCurrentUniqueId;                   // 0x1D8
+	be_t<u32> wklCurrentId;                         // 0x1DC
+	be_t<u32> exitToKernelAddr;                     // 0x1E0
+	be_t<u32> selectWorkloadAddr;                   // 0x1E4
+	u8 moduleId[2];                                 // 0x1E8
+	u8 sysSrvInitialised;                           // 0x1EA
+	u8 spuIdling;                                   // 0x1EB
+	be_t<u16> wklRunnable1;                         // 0x1EC
+	be_t<u16> wklRunnable2;                         // 0x1EE
+	be_t<u32> x1F0;                                 // 0x1F0
+	be_t<u32> x1F4;                                 // 0x1F4
+	be_t<u32> x1F8;                                 // 0x1F8
+	be_t<u32> x1FC;                                 // 0x1FC
+	be_t<u32> x200;                                 // 0x200
+	be_t<u32> x204;                                 // 0x204
+	be_t<u32> x208;                                 // 0x208
+	be_t<u32> x20C;                                 // 0x20C
+	be_t<u64> traceBuffer;                          // 0x210
+	be_t<u32> traceMsgCount;                        // 0x218
+	be_t<u32> traceMaxCount;                        // 0x21C
+	u8 wklUniqueId[0x10];                           // 0x220
+	u8 x230[0x280 - 0x230];                         // 0x230
+	be_t<u32> guid[4];                              // 0x280
+};
+
+static_assert(sizeof(SpursKernelContext) == 0x190, "Incorrect size for SpursKernelContext");
+
+// The SPURS taskset policy module context. This resides at 0x2700 of the LS.
+struct SpursTasksetContext
+{
+    u8 tempAreaTaskset[0x80];                       // 0x2700
+    u8 tempAreaTaskInfo[0x30];                      // 0x2780
+    be_t<u64> x27B0;                                // 0x27B0
+    vm::bptr<CellSpursTaskset, 1, u64> taskset;     // 0x27B8
+    be_t<u32> kernelMgmtAddr;                       // 0x27C0
+    be_t<u32> syscallAddr;                          // 0x27C4
+    be_t<u32> x27C8;                                // 0x27C8
+    be_t<u32> spuNum;                               // 0x27CC
+    be_t<u32> dmaTagId;                             // 0x27D0
+    be_t<u32> taskId;                               // 0x27D4
+    u8 x27D8[0x2840 - 0x27D8];                      // 0x27D8
+    u8 moduleId[16];                                // 0x2840
+    u8 stackArea[0x2C80 - 0x2850];                  // 0x2850
+    be_t<u128> savedContextLr;                      // 0x2C80
+    be_t<u128> savedContextSp;                      // 0x2C90
+    be_t<u128> savedContextR80ToR127[48];           // 0x2CA0
+    be_t<u128> savedContextFpscr;                   // 0x2FA0
+    be_t<u32> savedWriteTagGroupQueryMask;          // 0x2FB0
+    be_t<u32> savedSpuWriteEventMask;               // 0x2FB4
+    be_t<u32> tasksetMgmtAddr;                      // 0x2FB8
+    be_t<u32> guidAddr;                             // 0x2FBC
+    be_t<u64> x2FC0;                                // 0x2FC0
+    be_t<u64> x2FC8;                                // 0x2FC8
+    be_t<u32> taskExitCode;                         // 0x2FD0
+    be_t<u32> x2FD4;                                // 0x2FD4
+    u8 x2FD8[0x3000 - 0x2FD8];                      // 0x2FD8
+};
+
+static_assert(sizeof(SpursTasksetContext) == 0x900, "Incorrect size for SpursTasksetContext");
 
 s64 spursAttachLv2EventQueue(vm::ptr<CellSpurs> spurs, u32 queue, vm::ptr<u8> port, s32 isDynamic, bool wasCreated);
 s64 spursWakeUp(PPUThread& CPU, vm::ptr<CellSpurs> spurs);
diff --git a/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
new file mode 100644
index 0000000000..d1dc487eeb
--- /dev/null
+++ b/rpcs3/Emu/SysCalls/Modules/cellSpursSpu.cpp
@@ -0,0 +1,1677 @@
+#include "stdafx.h"
+#include "Emu/Memory/Memory.h"
+#include "Emu/System.h"
+#include "Emu/Cell/SPUThread.h"
+#include "Emu/SysCalls/Modules.h"
+#include "Emu/SysCalls/lv2/sys_lwmutex.h"
+#include "Emu/SysCalls/lv2/sys_lwcond.h"
+#include "Emu/SysCalls/lv2/sys_spu.h"
+#include "Emu/SysCalls/Modules/cellSpurs.h"
+#include "Loader/ELF32.h"
+#include "Emu/FS/vfsStreamMemory.h"
+
+//
+// SPURS utility functions
+//
+void cellSpursModulePutTrace(CellSpursTracePacket * packet, u32 dmaTagId);
+u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status);
+void cellSpursModuleExit(SPUThread & spu);
+
+bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag);
+u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask);
+u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll = true);
+void spursHalt(SPUThread & spu);
+
+//
+// SPURS Kernel functions
+//
+bool spursKernel1SelectWorkload(SPUThread & spu);
+bool spursKernel2SelectWorkload(SPUThread & spu);
+void spursKernelDispatchWorkload(SPUThread & spu, u64 widAndPollStatus);
+bool spursKernelWorkloadExit(SPUThread & spu);
+bool spursKernelEntry(SPUThread & spu);
+
+//
+// SPURS System Service functions
+//
+bool spursSysServiceEntry(SPUThread & spu);
+// TODO: Exit
+void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt);
+void spursSysServiceMain(SPUThread & spu, u32 pollStatus);
+void spursSysServiceProcessRequests(SPUThread & spu, SpursKernelContext * ctxt);
+void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt);
+// TODO: Deactivate workload
+void spursSysServiceUpdateShutdownCompletionEvents(SPUThread & spu, SpursKernelContext * ctxt, u32 wklShutdownBitSet);
+void spursSysServiceTraceSaveCount(SPUThread & spu, SpursKernelContext * ctxt);
+void spursSysServiceTraceUpdate(SPUThread & spu, SpursKernelContext * ctxt, u32 arg2, u32 arg3, u32 arg4);
+// TODO: Deactivate trace
+// TODO: System workload entry
+void spursSysServiceCleanupAfterSystemWorkload(SPUThread & spu, SpursKernelContext * ctxt);
+
+//
+// SPURS Taskset Policy Module functions
+//
+bool spursTasksetEntry(SPUThread & spu);
+bool spursTasksetSyscallEntry(SPUThread & spu);
+void spursTasksetResumeTask(SPUThread & spu);
+void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs);
+s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting);
+void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus);
+bool spursTasksetPollStatus(SPUThread & spu);
+void spursTasksetExit(SPUThread & spu);
+void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args);
+s32 spursTasketSaveTaskContext(SPUThread & spu);
+void spursTasksetDispatch(SPUThread & spu);
+s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args);
+void spursTasksetInit(SPUThread & spu, u32 pollStatus);
+s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments);
+
+extern Module *cellSpurs;
+
+//////////////////////////////////////////////////////////////////////////////
+// SPURS utility functions
+//////////////////////////////////////////////////////////////////////////////
+
+/// Output trace information
+void cellSpursModulePutTrace(CellSpursTracePacket * packet, u32 dmaTagId) {
+    // TODO: Implement this
+}
+
+/// Check for execution right requests
+u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) {
+    auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+
+    spu.GPR[3]._u32[3] = 1;
+    if (ctxt->spurs->m.flags1 & SF1_32_WORKLOADS) {
+        spursKernel2SelectWorkload(spu);
+    } else {
+        spursKernel1SelectWorkload(spu);
+    }
+
+    auto result = spu.GPR[3]._u64[1];
+    if (status) {
+        *status = (u32)result;
+    }
+
+    u32 wklId = result >> 32;
+    return wklId == ctxt->wklCurrentId ? 0 : 1;
+}
+
+/// Exit current workload
+void cellSpursModuleExit(SPUThread & spu) {
+    auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+    spu.SetBranch(ctxt->exitToKernelAddr);
+}
+
+/// Execute a DMA operation
+bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag) {
+    spu.WriteChannel(MFC_LSA, u128::from32r(lsa));
+    spu.WriteChannel(MFC_EAH, u128::from32r((u32)(ea >> 32)));
+    spu.WriteChannel(MFC_EAL, u128::from32r((u32)ea));
+    spu.WriteChannel(MFC_Size, u128::from32r(size));
+    spu.WriteChannel(MFC_TagID, u128::from32r(tag));
+    spu.WriteChannel(MFC_Cmd, u128::from32r(cmd));
+
+    if (cmd == MFC_GETLLAR_CMD || cmd == MFC_PUTLLC_CMD || cmd == MFC_PUTLLUC_CMD) {
+        u128 rv;
+
+        spu.ReadChannel(rv, MFC_RdAtomicStat);
+        auto success = rv._u32[3] ? true : false;
+        success      = cmd == MFC_PUTLLC_CMD ? !success : success;
+        return success;
+    }
+
+    return true;
+}
+
+/// Get the status of DMA operations
+u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask) {
+    u128 rv;
+
+    spu.WriteChannel(MFC_WrTagMask, u128::from32r(tagMask));
+    spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_IMMEDIATE));
+    spu.ReadChannel(rv, MFC_RdTagStat);
+    return rv._u32[3];
+}
+
+/// Wait for DMA operations to complete
+u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll) {
+    u128 rv;
+
+    spu.WriteChannel(MFC_WrTagMask, u128::from32r(tagMask));
+    spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(waitForAll ? MFC_TAG_UPDATE_ALL : MFC_TAG_UPDATE_ANY));
+    spu.ReadChannel(rv, MFC_RdTagStat);
+    return rv._u32[3];
+}
+
+/// Halt the SPU
+void spursHalt(SPUThread & spu) {
+    spu.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_HALT);
+    spu.Stop();
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// SPURS kernel functions
+//////////////////////////////////////////////////////////////////////////////
+
+/// Select a workload to run
+bool spursKernel1SelectWorkload(SPUThread & spu) {
+    auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+
+    // The first and only argument to this function is a boolean that is set to false if the function
+    // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus.
+    // If the first argument is true then the shared data is not updated with the result.
+    const auto isPoll = spu.GPR[3]._u32[3];
+
+    u32 wklSelectedId;
+    u32 pollStatus;
+
+    do {
+        // DMA and lock the first 0x80 bytes of spurs
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
+
+        // Calculate the contention (number of SPUs used) for each workload
+        u8 contention[CELL_SPURS_MAX_WORKLOAD];
+        u8 pendingContention[CELL_SPURS_MAX_WORKLOAD];
+        for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+            contention[i] = spurs->m.wklCurrentContention[i] - ctxt->wklLocContention[i];
+
+            // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
+            // to prevent unnecessary jumps to the kernel
+            if (isPoll) {
+                pendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i];
+                if (i != ctxt->wklCurrentId) {
+                    contention[i] += pendingContention[i];
+                }
+            }
+        }
+
+        wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
+        pollStatus    = 0;
+
+        // The system service has the highest priority. Select the system service if
+        // the system service message bit for this SPU is set.
+        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) {
+            ctxt->spuIdling = 0;
+            if (!isPoll || ctxt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+                // Clear the message bit
+                spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << ctxt->spuNum));
+            }
+        } else {
+            // Caclulate the scheduling weight for each workload
+            u16 maxWeight = 0;
+            for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+                u16 runnable     = ctxt->wklRunnable1 & (0x8000 >> i);
+                u16 wklSignal    = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
+                u8  wklFlag      = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+                u8  readyCount   = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed();
+                u8  idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed();
+                u8  requestCount = readyCount + idleSpuCount;
+
+                // For a workload to be considered for scheduling:
+                // 1. Its priority must not be 0
+                // 2. The number of SPUs used by it must be less than the max contention for that workload
+                // 3. The workload should be in runnable state
+                // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
+                //    OR the workload must be signalled
+                //    OR the workload flag is 0 and the workload is configured as the wokload flag receiver
+                if (runnable && ctxt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) {
+                    if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) {
+                        // The scheduling weight of the workload is formed from the following parameters in decreasing order of priority:
+                        // 1. Wokload signal set or workload flag or ready count > contention
+                        // 2. Priority of the workload on the SPU
+                        // 3. Is the workload the last selected workload
+                        // 4. Minimum contention of the workload
+                        // 5. Number of SPUs that are being used by the workload (lesser the number, more the weight)
+                        // 6. Is the workload executable same as the currently loaded executable
+                        // 7. The workload id (lesser the number, more the weight)
+                        u16 weight  = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0;
+                        weight     |= (u16)(ctxt->priority[i] & 0x7F) << 16;
+                        weight     |= i == ctxt->wklCurrentId ? 0x80 : 0x00;
+                        weight     |= (contention[i] > 0 && spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00;
+                        weight     |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2;
+                        weight     |= ctxt->wklUniqueId[i] == ctxt->wklCurrentId ? 0x02 : 0x00;
+                        weight     |= 0x01;
+
+                        // In case of a tie the lower numbered workload is chosen
+                        if (weight > maxWeight) {
+                            wklSelectedId  = i;
+                            maxWeight      = weight;
+                            pollStatus     = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
+                            pollStatus    |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0;
+                            pollStatus    |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0;
+                        }
+                    }
+                }
+            }
+
+            // Not sure what this does. Possibly mark the SPU as idle/in use.
+            ctxt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
+
+            if (!isPoll || wklSelectedId == ctxt->wklCurrentId) {
+                // Clear workload signal for the selected workload
+                spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
+                spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
+
+                // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s
+                if (wklSelectedId == spurs->m.wklFlagReceiver.read_relaxed()) {
+                    spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(0xFFFFFFFF));
+                }
+            }
+        }
+
+        if (!isPoll) {
+            // Called by kernel
+            // Increment the contention for the selected workload
+            if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+                contention[wklSelectedId]++;
+            }
+
+            for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+                spurs->m.wklCurrentContention[i] = contention[i];
+                spurs->m.wklPendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i];
+                ctxt->wklLocContention[i]        = 0;
+                ctxt->wklLocPendingContention[i] = 0;
+            }
+
+            if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+                ctxt->wklLocContention[wklSelectedId] = 1;
+            }
+
+            ctxt->wklCurrentId = wklSelectedId;
+        } else if (wklSelectedId != ctxt->wklCurrentId) {
+            // Not called by kernel but a context switch is required
+            // Increment the pending contention for the selected workload
+            if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+                pendingContention[wklSelectedId]++;
+            }
+
+            for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+                spurs->m.wklPendingContention[i] = pendingContention[i];
+                ctxt->wklLocPendingContention[i] = 0;
+            }
+
+            if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+                ctxt->wklLocPendingContention[wklSelectedId] = 1;
+            }
+        } else {
+            // Not called by kernel and no context switch is required
+            for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+                spurs->m.wklPendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i];
+                ctxt->wklLocPendingContention[i] = 0;
+            }
+        }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    u64 result          = (u64)wklSelectedId << 32;
+    result             |= pollStatus;
+    spu.GPR[3]._u64[1]  = result;
+    return true;
+}
+
+/// Select a workload to run
+bool spursKernel2SelectWorkload(SPUThread & spu) {
+    auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+
+    // The first and only argument to this function is a boolean that is set to false if the function
+    // is called by the SPURS kernel and set to true if called by cellSpursModulePollStatus.
+    // If the first argument is true then the shared data is not updated with the result.
+    const auto isPoll = spu.GPR[3]._u32[3];
+
+    u32 wklSelectedId;
+    u32 pollStatus;
+
+    do {
+        // DMA and lock the first 0x80 bytes of spurs
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
+
+        // Calculate the contention (number of SPUs used) for each workload
+        u8 contention[CELL_SPURS_MAX_WORKLOAD2];
+        u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2];
+        for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
+            contention[i] = spurs->m.wklCurrentContention[i & 0x0F] - ctxt->wklLocContention[i & 0x0F];
+            contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4;
+
+            // If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
+            // to prevent unnecessary jumps to the kernel
+            if (isPoll) {
+                pendingContention[i] = spurs->m.wklPendingContention[i & 0x0F] - ctxt->wklLocPendingContention[i & 0x0F];
+                pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4;
+                if (i != ctxt->wklCurrentId) {
+                    contention[i] += pendingContention[i];
+                }
+            }
+        }
+
+        wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
+        pollStatus    = 0;
+
+        // The system service has the highest priority. Select the system service if
+        // the system service message bit for this SPU is set.
+        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) {
+            // Not sure what this does. Possibly Mark the SPU as in use.
+            ctxt->spuIdling = 0;
+            if (!isPoll || ctxt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+                // Clear the message bit
+                spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << ctxt->spuNum));
+            }
+        } else {
+            // Caclulate the scheduling weight for each workload
+            u8 maxWeight = 0;
+            for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
+                auto j           = i & 0x0F;
+                u16 runnable      = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->wklRunnable1 & (0x8000 >> j) : ctxt->wklRunnable2 & (0x8000 >> j);
+                u8  priority      = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->priority[j] & 0x0F : ctxt->priority[j] >> 4;
+                u8  maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4;
+                u16 wklSignal     = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
+                u8  wklFlag       = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+                u8  readyCount    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed();
+
+                // For a workload to be considered for scheduling:
+                // 1. Its priority must be greater than 0
+                // 2. The number of SPUs used by it must be less than the max contention for that workload
+                // 3. The workload should be in runnable state
+                // 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
+                //    OR the workload must be signalled
+                //    OR the workload flag is 0 and the workload is configured as the wokload receiver
+                if (runnable && priority > 0 && maxContention > contention[i]) {
+                    if (wklFlag || wklSignal || readyCount > contention[i]) {
+                        // The scheduling weight of the workload is equal to the priority of the workload for the SPU.
+                        // The current workload is given a sligtly higher weight presumably to reduce the number of context switches.
+                        // In case of a tie the lower numbered workload is chosen.
+                        u8 weight = priority << 4;
+                        if (ctxt->wklCurrentId == i) {
+                            weight |= 0x04;
+                        }
+
+                        if (weight > maxWeight) {
+                            wklSelectedId  = i;
+                            maxWeight      = weight;
+                            pollStatus     = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
+                            pollStatus    |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0;
+                            pollStatus    |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0;
+                        }
+                    }
+                }
+            }
+
+            // Not sure what this does. Possibly mark the SPU as idle/in use.
+            ctxt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
+
+            if (!isPoll || wklSelectedId == ctxt->wklCurrentId) {
+                // Clear workload signal for the selected workload
+                spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
+                spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
+
+                // If the selected workload is the wklFlag workload then pull the wklFlag to all 1s
+                if (wklSelectedId == spurs->m.wklFlagReceiver.read_relaxed()) {
+                    spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(0xFFFFFFFF));
+                }
+            }
+        }
+
+        if (!isPoll) {
+            // Called by kernel
+            // Increment the contention for the selected workload
+            if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+                contention[wklSelectedId]++;
+            }
+
+            for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) {
+                spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4);
+                spurs->m.wklPendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i];
+                ctxt->wklLocContention[i]        = 0;
+                ctxt->wklLocPendingContention[i] = 0;
+            }
+
+            ctxt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
+            ctxt->wklCurrentId = wklSelectedId;
+        } else if (wklSelectedId != ctxt->wklCurrentId) {
+            // Not called by kernel but a context switch is required
+            // Increment the pending contention for the selected workload
+            if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+                pendingContention[wklSelectedId]++;
+            }
+
+            for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) {
+                spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4);
+                ctxt->wklLocPendingContention[i] = 0;
+            }
+
+            ctxt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
+        } else {
+            // Not called by kernel and no context switch is required
+            for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+                spurs->m.wklPendingContention[i] = spurs->m.wklPendingContention[i] - ctxt->wklLocPendingContention[i];
+                ctxt->wklLocPendingContention[i] = 0;
+            }
+        }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    u64 result          = (u64)wklSelectedId << 32;
+    result             |= pollStatus;
+    spu.GPR[3]._u64[1]  = result;
+    return true;
+}
+
+/// SPURS kernel dispatch workload
+void spursKernelDispatchWorkload(SPUThread & spu, u64 widAndPollStatus) {
+    auto ctxt      = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+    auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
+
+    auto pollStatus = (u32)widAndPollStatus;
+    auto wid        = (u32)(widAndPollStatus >> 32);
+
+    // DMA in the workload info for the selected workload
+    auto wklInfoOffset =  wid < CELL_SPURS_MAX_WORKLOAD ? offsetof(CellSpurs, m.wklInfo1[wid]) :
+                                                          wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? offsetof(CellSpurs, m.wklInfo2[wid & 0xf]) :
+                                                                                                        offsetof(CellSpurs, m.wklInfoSysSrv);
+    spursDma(spu, MFC_GET_CMD, ctxt->spurs.addr() + wklInfoOffset, 0x3FFE0/*LSA*/, 0x20/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
+    spursDmaWaitForCompletion(spu, 0x80000000);
+
+    // Load the workload to LS
+    auto wklInfo = vm::get_ptr<CellSpurs::WorkloadInfo>(spu.ls_offset + 0x3FFE0);
+    if (ctxt->wklCurrentAddr != wklInfo->addr) {
+        switch (wklInfo->addr.addr()) {
+        case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD:
+            spu.RegisterHleFunction(0xA00, spursSysServiceEntry);
+            break;
+        case SPURS_IMG_ADDR_TASKSET_PM:
+            spu.RegisterHleFunction(0xA00, spursTasksetEntry);
+            break;
+        default:
+            spursDma(spu, MFC_GET_CMD, wklInfo-> addr.addr(), 0xA00/*LSA*/, wklInfo->size, CELL_SPURS_KERNEL_DMA_TAG_ID);
+            spursDmaWaitForCompletion(spu, 0x80000000);
+            break;
+        }
+
+        ctxt->wklCurrentAddr     = wklInfo->addr;
+        ctxt->wklCurrentUniqueId = wklInfo->uniqueId.read_relaxed();
+    }
+
+    if (!isKernel2) {
+        ctxt->moduleId[0] = 0;
+        ctxt->moduleId[1] = 0;
+    }
+
+    // Run workload
+    spu.GPR[0]._u32[3] = ctxt->exitToKernelAddr;
+    spu.GPR[1]._u32[3] = 0x3FFB0;
+    spu.GPR[3]._u32[3] = 0x100;
+    spu.GPR[4]._u64[1] = wklInfo->arg;
+    spu.GPR[5]._u32[3] = pollStatus;
+    spu.SetBranch(0xA00);
+}
+
+/// SPURS kernel workload exit
+bool spursKernelWorkloadExit(SPUThread & spu) {
+    auto ctxt      = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+    auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
+
+    // Select next workload to run
+    spu.GPR[3].clear();
+    if (isKernel2) {
+        spursKernel2SelectWorkload(spu);
+    } else {
+        spursKernel1SelectWorkload(spu);
+    }
+
+    spursKernelDispatchWorkload(spu, spu.GPR[3]._u64[1]);
+    return false;
+}
+
+/// SPURS kernel entry point
+bool spursKernelEntry(SPUThread & spu) {
+    while (true) {
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        if (Emu.IsStopped()) {
+            return false;
+        }
+    }
+
+    auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+    memset(ctxt, 0, sizeof(SpursKernelContext));
+
+    // Save arguments
+    ctxt->spuNum = spu.GPR[3]._u32[3];
+    ctxt->spurs.set(spu.GPR[4]._u64[1]);
+
+    auto isKernel2 = ctxt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
+
+    // Initialise the SPURS context to its initial values
+    ctxt->dmaTagId           = CELL_SPURS_KERNEL_DMA_TAG_ID;
+    ctxt->wklCurrentUniqueId = 0x20;
+    ctxt->wklCurrentId       = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
+    ctxt->exitToKernelAddr   = isKernel2 ? CELL_SPURS_KERNEL2_EXIT_ADDR : CELL_SPURS_KERNEL1_EXIT_ADDR;
+    ctxt->selectWorkloadAddr = isKernel2 ? CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR : CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR;
+    if (!isKernel2) {
+        ctxt->x1F0    = 0xF0020000;
+        ctxt->x200    = 0x20000;
+        ctxt->guid[0] = 0x423A3A02;
+        ctxt->guid[1] = 0x43F43A82;
+        ctxt->guid[2] = 0x43F26502;
+        ctxt->guid[3] = 0x420EB382;
+    } else {
+        ctxt->guid[0] = 0x43A08402;
+        ctxt->guid[1] = 0x43FB0A82;
+        ctxt->guid[2] = 0x435E9302;
+        ctxt->guid[3] = 0x43A3C982;
+    }
+
+    // Register SPURS kernel HLE functions
+    spu.UnregisterHleFunctions(0, 0x40000/*LS_BOTTOM*/);
+    spu.RegisterHleFunction(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelEntry);
+    spu.RegisterHleFunction(ctxt->exitToKernelAddr, spursKernelWorkloadExit);
+    spu.RegisterHleFunction(ctxt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload);
+
+    // Start the system service
+    spursKernelDispatchWorkload(spu, ((u64)CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) << 32);
+    return false;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// SPURS system workload functions
+//////////////////////////////////////////////////////////////////////////////
+
+/// Entry point of the system service
+bool spursSysServiceEntry(SPUThread & spu) {
+    auto ctxt       = vm::get_ptr<SpursKernelContext>(spu.ls_offset + spu.GPR[3]._u32[3]);
+    auto arg        = spu.GPR[4]._u64[1];
+    auto pollStatus = spu.GPR[5]._u32[3];
+
+    if (ctxt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
+        spursSysServiceMain(spu, pollStatus);
+    } else {
+        // TODO: If we reach here it means the current workload was preempted to start the
+        // system workload. Need to implement this.
+    }
+    
+    cellSpursModuleExit(spu);
+    return false;
+}
+
+/// Wait for an external event or exit the SPURS thread group if no workloads can be scheduled
+void spursSysServiceIdleHandler(SPUThread & spu, SpursKernelContext * ctxt) {
+    // Monitor only lock line reservation lost events
+    spu.WriteChannel(SPU_WrEventMask, u128::from32r(SPU_EVENT_LR));
+
+    bool shouldExit;
+    while (true) {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
+
+        // Find the number of SPUs that are idling in this SPURS instance
+        u32 nIdlingSpus = 0;
+        for (u32 i = 0; i < 8; i++) {
+            if (spurs->m.spuIdling & (1 << i)) {
+                nIdlingSpus++;
+            }
+        }
+
+        bool allSpusIdle  = nIdlingSpus == spurs->m.nSpus ? true: false;
+        bool exitIfNoWork = spurs->m.flags1 & SF1_EXIT_IF_NO_WORK ? true : false;
+        shouldExit        = allSpusIdle && exitIfNoWork;
+
+        // Check if any workloads can be scheduled
+        bool foundReadyWorkload = false;
+        if (spurs->m.sysSrvMessage.read_relaxed() & (1 << ctxt->spuNum)) {
+            foundReadyWorkload = true;
+        } else {
+            if (spurs->m.flags1 & SF1_32_WORKLOADS) {
+                for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
+                    u32 j            = i & 0x0F;
+                    u16 runnable     = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->wklRunnable1 & (0x8000 >> j) : ctxt->wklRunnable2 & (0x8000 >> j);
+                    u8 priority      = i < CELL_SPURS_MAX_WORKLOAD ? ctxt->priority[j] & 0x0F : ctxt->priority[j] >> 4;
+                    u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4;
+                    u8 contention    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklCurrentContention[j] & 0x0F : spurs->m.wklCurrentContention[j] >> 4;
+                    u16 wklSignal    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
+                    u8 wklFlag       = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+                    u8 readyCount    = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed();
+
+                    if (runnable && priority > 0 && maxContention > contention) {
+                        if (wklFlag || wklSignal || readyCount > contention) {
+                            foundReadyWorkload = true;
+                            break;
+                        }
+                    }
+                }
+            } else {
+                for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+                    u16 runnable    = ctxt->wklRunnable1 & (0x8000 >> i);
+                    u16 wklSignal   = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
+                    u8 wklFlag      = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
+                    u8 readyCount   = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed();
+                    u8 idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed();
+                    u8 requestCount = readyCount + idleSpuCount;
+
+                    if (runnable && ctxt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > spurs->m.wklCurrentContention[i]) {
+                        if (wklFlag || wklSignal || (readyCount != 0 && requestCount > spurs->m.wklCurrentContention[i])) {
+                            foundReadyWorkload = true;
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+
+        bool spuIdling = spurs->m.spuIdling & (1 << ctxt->spuNum) ? true : false;
+        if (foundReadyWorkload && shouldExit == false) {
+            spurs->m.spuIdling &= ~(1 << ctxt->spuNum);
+        } else {
+            spurs->m.spuIdling |= 1 << ctxt->spuNum;
+        }
+
+        // If all SPUs are idling and the exit_if_no_work flag is set then the SPU thread group must exit. Otherwise wait for external events.
+        if (spuIdling && shouldExit == false && foundReadyWorkload == false) {
+            // The system service blocks by making a reservation and waiting on the lock line reservation lost event.
+            u128 r;
+            spu.ReadChannel(r, SPU_RdEventStat);
+            spu.WriteChannel(SPU_WrEventAck, u128::from32r(SPU_EVENT_LR));
+        }
+
+        auto dmaSuccess = spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        if (dmaSuccess && (shouldExit || foundReadyWorkload)) {
+            break;
+        }
+    }
+
+    if (shouldExit) {
+        // TODO: exit spu thread group
+    }
+}
+
+/// Main function for the system service
+void spursSysServiceMain(SPUThread & spu, u32 pollStatus) {
+    auto ctxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+
+    if (ctxt->spurs.addr() % CellSpurs::align) {
+        spursHalt(spu);
+        return;
+    }
+
+    // Initialise the system service if this is the first time its being started on this SPU
+    if (ctxt->sysSrvInitialised == 0) {
+        ctxt->sysSrvInitialised = 1;
+
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+
+        do {
+            spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+            auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+
+            // Halt if already initialised
+            if (spurs->m.sysSrvOnSpu & (1 << ctxt->spuNum)) {
+                spursHalt(spu);
+                return;
+            }
+
+            spurs->m.sysSrvOnSpu |= 1 << ctxt->spuNum;
+        } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+        ctxt->traceBuffer   = 0;
+        ctxt->traceMsgCount = -1;
+        spursSysServiceTraceUpdate(spu, ctxt, 1, 1, 0);
+        spursSysServiceCleanupAfterSystemWorkload(spu, ctxt);
+
+        // Trace - SERVICE: INIT
+        CellSpursTracePacket pkt;
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
+        pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_INIT;
+        cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
+    }
+
+    // Trace - START: Module='SYS '
+    CellSpursTracePacket pkt;
+    memset(&pkt, 0, sizeof(pkt));
+    pkt.header.tag = CELL_SPURS_TRACE_TAG_START;
+    memcpy(pkt.data.start.module, "SYS ", 4);
+    pkt.data.start.level = 1; // Policy module
+    pkt.data.start.ls    = 0xA00 >> 2;
+    cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
+
+    while (true) {
+        // Process requests for the system service
+        spursSysServiceProcessRequests(spu, ctxt);
+
+poll:
+        if (cellSpursModulePollStatus(spu, nullptr)) {
+            // Trace - SERVICE: EXIT
+            CellSpursTracePacket pkt;
+            memset(&pkt, 0, sizeof(pkt));
+            pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
+            pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_EXIT;
+            cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
+
+            // Trace - STOP: GUID
+            memset(&pkt, 0, sizeof(pkt));
+            pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
+            pkt.data.stop  = SPURS_GUID_SYS_WKL;
+            cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
+
+            spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
+            break;
+        }
+
+        // If we reach here it means that either there are more system service messages to be processed
+        // or there are no workloads that can be scheduled.
+
+        // If the SPU is not idling then process the remaining system service messages
+        if (ctxt->spuIdling == 0) {
+            continue;
+        }
+
+        // If we reach here it means that the SPU is idling
+
+        // Trace - SERVICE: WAIT
+        CellSpursTracePacket pkt;
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag            = CELL_SPURS_TRACE_TAG_SERVICE;
+        pkt.data.service.incident = CELL_SPURS_TRACE_SERVICE_WAIT;
+        cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
+
+        spursSysServiceIdleHandler(spu, ctxt);
+        goto poll;
+    }
+}
+
+/// Process any requests
+void spursSysServiceProcessRequests(SPUThread & spu, SpursKernelContext * ctxt) {
+    bool updateTrace    = false;
+    bool updateWorkload = false;
+    bool terminate      = false;
+
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+
+        // Terminate request
+        if (spurs->m.sysSrvMsgTerminate & (1 << ctxt->spuNum)) {
+            spurs->m.sysSrvOnSpu &= ~(1 << ctxt->spuNum);
+            terminate = true;
+        }
+
+        // Update workload message
+        if (spurs->m.sysSrvMsgUpdateWorkload.read_relaxed() & (1 << ctxt->spuNum)) {
+            spurs->m.sysSrvMsgUpdateWorkload &= ~(1 << ctxt->spuNum);
+            updateWorkload = true;
+        }
+
+        // Update trace message
+        if (spurs->m.sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) {
+            updateTrace = true;
+        }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    // Process update workload message
+    if (updateWorkload) {
+        spursSysServiceActivateWorkload(spu, ctxt);
+    }
+
+    // Process update trace message
+    if (updateTrace) {
+        spursSysServiceTraceUpdate(spu, ctxt, 1, 0, 0);
+    }
+
+    // Process terminate request
+    if (terminate) {
+        // TODO: Rest of the terminate processing
+    }
+}
+
+/// Activate a workload
+void spursSysServiceActivateWorkload(SPUThread & spu, SpursKernelContext * ctxt) {
+    auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
+    spursDma(spu, MFC_GET_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklInfo1), 0x30000/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
+    if (spurs->m.flags1 & SF1_32_WORKLOADS) {
+        spursDma(spu, MFC_GET_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklInfo2), 0x30200/*LSA*/, 0x200/*size*/, CELL_SPURS_KERNEL_DMA_TAG_ID);
+    }
+
+    u32 wklShutdownBitSet = 0;
+    ctxt->wklRunnable1    = 0;
+    ctxt->wklRunnable2    = 0;
+    for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+        auto wklInfo1 = vm::get_ptr<CellSpurs::WorkloadInfo>(spu.ls_offset + 0x30000);
+
+        // Copy the priority of the workload for this SPU and its unique id to the LS
+        ctxt->priority[i]    = wklInfo1[i].priority[ctxt->spuNum] == 0 ? 0 : 0x10 - wklInfo1[i].priority[ctxt->spuNum];
+        ctxt->wklUniqueId[i] = wklInfo1[i].uniqueId.read_relaxed();
+
+        if (spurs->m.flags1 & SF1_32_WORKLOADS) {
+            auto wklInfo2 = vm::get_ptr<CellSpurs::WorkloadInfo>(spu.ls_offset + 0x30200);
+
+            // Copy the priority of the workload for this SPU to the LS
+            if (wklInfo2[i].priority[ctxt->spuNum]) {
+                ctxt->priority[i] |= (0x10 - wklInfo2[i].priority[ctxt->spuNum]) << 4;
+            }
+        }
+    }
+
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+
+        for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+            // Update workload status and runnable flag based on the workload state
+            auto wklStatus = spurs->m.wklStatus1[i];
+            if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
+                spurs->m.wklStatus1[i] |= 1 << ctxt->spuNum;
+                ctxt->wklRunnable1     |= 0x8000 >> i;
+            } else {
+                spurs->m.wklStatus1[i] &= ~(1 << ctxt->spuNum);
+            }
+
+            // If the workload is shutting down and if this is the last SPU from which it is being removed then
+            // add it to the shutdown bit set
+            if (spurs->m.wklState1[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
+                if (((wklStatus & (1 << ctxt->spuNum)) != 0) && (spurs->m.wklStatus1[i] == 0)) {
+                    spurs->m.wklState1[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
+                    wklShutdownBitSet |= 0x80000000u >> i;
+                }
+            }
+
+            if (spurs->m.flags1 & SF1_32_WORKLOADS) {
+                // Update workload status and runnable flag based on the workload state
+                wklStatus = spurs->m.wklStatus2[i];
+                if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_RUNNABLE) {
+                    spurs->m.wklStatus2[i] |= 1 << ctxt->spuNum;
+                    ctxt->wklRunnable2     |= 0x8000 >> i;
+                } else {
+                    spurs->m.wklStatus2[i] &= ~(1 << ctxt->spuNum);
+                }
+
+                // If the workload is shutting down and if this is the last SPU from which it is being removed then
+                // add it to the shutdown bit set
+                if (spurs->m.wklState2[i].read_relaxed() == SPURS_WKL_STATE_SHUTTING_DOWN) {
+                    if (((wklStatus & (1 << ctxt->spuNum)) != 0) && (spurs->m.wklStatus2[i] == 0)) {
+                        spurs->m.wklState2[i].write_relaxed(SPURS_WKL_STATE_REMOVABLE);
+                        wklShutdownBitSet |= 0x8000 >> i;
+                    }
+                }
+            }
+        }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    if (wklShutdownBitSet) {
+        spursSysServiceUpdateShutdownCompletionEvents(spu, ctxt, wklShutdownBitSet);
+    }
+}
+
+/// Update shutdown completion events
+void spursSysServiceUpdateShutdownCompletionEvents(SPUThread & spu, SpursKernelContext * ctxt, u32 wklShutdownBitSet) {
+    // Mark the workloads in wklShutdownBitSet as completed and also generate a bit set of the completed
+    // workloads that have a shutdown completion hook registered
+    u32 wklNotifyBitSet;
+    u8  spuPort;
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+
+        wklNotifyBitSet = 0;
+        spuPort         = spurs->m.spuPort;;
+        for (u32 i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
+            if (wklShutdownBitSet & (0x80000000u >> i)) {
+                spurs->m.wklEvent1[i] |= 0x01;
+                if (spurs->m.wklEvent1[i] & 0x02 || spurs->m.wklEvent1[i] & 0x10) {
+                    wklNotifyBitSet |= 0x80000000u >> i;
+                }
+            }
+
+            if (wklShutdownBitSet & (0x8000 >> i)) {
+                spurs->m.wklEvent2[i] |= 0x01;
+                if (spurs->m.wklEvent2[i] & 0x02 || spurs->m.wklEvent2[i] & 0x10) {
+                    wklNotifyBitSet |= 0x8000 >> i;
+                }
+            }
+        }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    if (wklNotifyBitSet) {
+        // TODO: sys_spu_thread_send_event(spuPort, 0, wklNotifyMask);
+    }
+}
+
+/// Update the trace count for this SPU
+void spursSysServiceTraceSaveCount(SPUThread & spu, SpursKernelContext * ctxt) {
+    if (ctxt->traceBuffer) {
+        auto traceInfo = vm::ptr<CellSpursTraceInfo>::make((u32)(ctxt->traceBuffer - (ctxt->spurs->m.traceStartIndex[ctxt->spuNum] << 4)));
+        traceInfo->count[ctxt->spuNum] = ctxt->traceMsgCount;
+    }
+}
+
+/// Update trace control
+void spursSysServiceTraceUpdate(SPUThread & spu, SpursKernelContext * ctxt, u32 arg2, u32 arg3, u32 arg4) {
+    bool notify;
+
+    u8 sysSrvMsgUpdateTrace;
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+
+        sysSrvMsgUpdateTrace           = spurs->m.sysSrvMsgUpdateTrace;
+        spurs->m.sysSrvMsgUpdateTrace &= ~(1 << ctxt->spuNum);
+        spurs->m.xCC                  &= ~(1 << ctxt->spuNum);
+        spurs->m.xCC                  |= arg2 << ctxt->spuNum;
+
+        notify = false;
+        if (((sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) != 0) && (spurs->m.sysSrvMsgUpdateTrace == 0) && (spurs->m.xCD != 0)) {
+            spurs->m.xCD = 0;
+            notify       = true;
+        }
+
+        if (arg4 && spurs->m.xCD != 0) {
+            spurs->m.xCD = 0;
+            notify       = true;
+        }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    // Get trace parameters from CellSpurs and store them in the LS
+    if (((sysSrvMsgUpdateTrace & (1 << ctxt->spuNum)) != 0) || (arg3 != 0)) {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.traceBuffer), 0x80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x80 - offsetof(CellSpurs, m.traceBuffer));
+
+        if (ctxt->traceMsgCount != 0xFF || spurs->m.traceBuffer.addr() == 0) {
+            spursSysServiceTraceSaveCount(spu, ctxt);
+        } else {
+            spursDma(spu, MFC_GET_CMD, spurs->m.traceBuffer.addr() & 0xFFFFFFFC, 0x2C00/*LSA*/, 0x80/*size*/, ctxt->dmaTagId);
+            auto traceBuffer    = vm::get_ptr<CellSpursTraceInfo>(spu.ls_offset + 0x2C00);
+            ctxt->traceMsgCount = traceBuffer->count[ctxt->spuNum];
+        }
+
+        ctxt->traceBuffer   = spurs->m.traceBuffer.addr() + (spurs->m.traceStartIndex[ctxt->spuNum] << 4);
+        ctxt->traceMaxCount = spurs->m.traceStartIndex[1] - spurs->m.traceStartIndex[0];
+        if (ctxt->traceBuffer == 0) {
+            ctxt->traceMsgCount = 0;
+        }
+    }
+
+    if (notify) {
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+        sys_spu_thread_send_event(spu, spurs->m.spuPort, 2, 0);
+    }
+}
+
+/// Restore state after executing the system workload
+void spursSysServiceCleanupAfterSystemWorkload(SPUThread & spu, SpursKernelContext * ctxt) {
+    u8 wklId;
+
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+
+        if (spurs->m.sysSrvWorkload[ctxt->spuNum] == 0xFF) {
+            return;
+        }
+
+        wklId = spurs->m.sysSrvWorkload[ctxt->spuNum];
+        spurs->m.sysSrvWorkload[ctxt->spuNum] = 0xFF;
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr() + offsetof(CellSpurs, m.wklState1), 0x2D80/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    spursSysServiceActivateWorkload(spu, ctxt);
+
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x100);
+
+        if (wklId >= CELL_SPURS_MAX_WORKLOAD) {
+            spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x10;
+            spurs->m.wklReadyCount1[wklId & 0x0F].write_relaxed(spurs->m.wklReadyCount1[wklId & 0x0F].read_relaxed() - 1);
+        } else {
+            spurs->m.wklCurrentContention[wklId & 0x0F] -= 0x01;
+            spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].write_relaxed(spurs->m.wklIdleSpuCountOrReadyCount2[wklId & 0x0F].read_relaxed() - 1);
+        }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    // Set the current workload id to the id of the pre-empted workload since cellSpursModulePutTrace
+    // uses the current worload id to determine the workload to which the trace belongs
+    auto wklIdSaved    = ctxt->wklCurrentId;
+    ctxt->wklCurrentId = wklId;
+
+    // Trace - STOP: GUID
+    CellSpursTracePacket pkt;
+    memset(&pkt, 0, sizeof(pkt));
+    pkt.header.tag = CELL_SPURS_TRACE_TAG_STOP;
+    pkt.data.stop  = SPURS_GUID_SYS_WKL;
+    cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
+
+    ctxt->wklCurrentId = wklIdSaved;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// SPURS taskset policy module functions
+//////////////////////////////////////////////////////////////////////////////
+
+enum SpursTasksetRequest {
+    SPURS_TASKSET_REQUEST_POLL_SIGNAL   = -1,
+    SPURS_TASKSET_REQUEST_DESTROY_TASK  = 0,
+    SPURS_TASKSET_REQUEST_YIELD_TASK    = 1,
+    SPURS_TASKSET_REQUEST_WAIT_SIGNAL   = 2,
+    SPURS_TASKSET_REQUEST_POLL          = 3,
+    SPURS_TASKSET_REQUEST_WAIT_WKL_FLAG = 4,
+    SPURS_TASKSET_REQUEST_SELECT_TASK   = 5,
+    SPURS_TASKSET_REQUEST_RECV_WKL_FLAG = 6,
+};
+
+/// Taskset PM entry point
+bool spursTasksetEntry(SPUThread & spu) {
+    auto ctxt       = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+    auto kernelCtxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + spu.GPR[3]._u32[3]);
+
+    auto arg        = spu.GPR[4]._u64[1];
+    auto pollStatus = spu.GPR[5]._u32[3];
+
+    // Initialise memory and save args
+    memset(ctxt, 0, sizeof(*ctxt));
+    ctxt->taskset.set(arg);
+    memcpy(ctxt->moduleId, "SPURSTASK MODULE", sizeof(ctxt->moduleId));
+    ctxt->kernelMgmtAddr = spu.GPR[3]._u32[3];
+    ctxt->syscallAddr    = CELL_SPURS_TASKSET_PM_SYSCALL_ADDR;
+    ctxt->spuNum         = kernelCtxt->spuNum;
+    ctxt->dmaTagId       = kernelCtxt->dmaTagId;
+    ctxt->taskId         = 0xFFFFFFFF;
+
+    // Register SPURS takset policy module HLE functions
+    spu.UnregisterHleFunctions(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, 0x40000/*LS_BOTTOM*/);
+    spu.RegisterHleFunction(CELL_SPURS_TASKSET_PM_ENTRY_ADDR, spursTasksetEntry);
+    spu.RegisterHleFunction(ctxt->syscallAddr, spursTasksetSyscallEntry);
+
+    // Initialise the taskset policy module
+    spursTasksetInit(spu, pollStatus);
+
+    // Dispatch
+    spursTasksetDispatch(spu);
+    return false;
+}
+
+/// Entry point into the Taskset PM for task syscalls
+bool spursTasksetSyscallEntry(SPUThread & spu) {
+    auto ctxt = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+
+    // Save task context
+    ctxt->savedContextLr = spu.GPR[0];
+    ctxt->savedContextSp = spu.GPR[1];
+    for (auto i = 0; i < 48; i++) {
+        ctxt->savedContextR80ToR127[i] = spu.GPR[80 + i];
+    }
+
+    // Handle the syscall
+    spu.GPR[3]._u32[3] = spursTasksetProcessSyscall(spu, spu.GPR[3]._u32[3], spu.GPR[4]._u32[3]);
+
+    // Resume the previously executing task if the syscall did not cause a context switch
+    if (spu.m_is_branch == false) {
+        spursTasksetResumeTask(spu);
+    }
+
+    return false;
+}
+
+/// Resume a task
+void spursTasksetResumeTask(SPUThread & spu) {
+    auto ctxt = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+
+    // Restore task context
+    spu.GPR[0] = ctxt->savedContextLr;
+    spu.GPR[1] = ctxt->savedContextSp;
+    for (auto i = 0; i < 48; i++) {
+        spu.GPR[80 + i] = ctxt->savedContextR80ToR127[i];
+    }
+
+    spu.SetBranch(spu.GPR[0]._u32[3]);
+}
+
+/// Start a task
+void spursTasksetStartTask(SPUThread & spu, CellSpursTaskArgument & taskArgs) {
+    auto ctxt    = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+    auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
+
+    spu.GPR[2].clear();
+    spu.GPR[3]         = taskArgs._u128;
+    spu.GPR[4]._u64[1] = taskset->m.args;
+    spu.GPR[4]._u64[0] = taskset->m.spurs.addr();
+    for (auto i = 5; i < 128; i++) {
+        spu.GPR[i].clear();
+    }
+
+    spu.SetBranch(ctxt->savedContextLr.value()._u32[3]);
+}
+
+/// Process a request and update the state of the taskset
+s32 spursTasksetProcessRequest(SPUThread & spu, s32 request, u32 * taskId, u32 * isWaiting) {
+    auto kernelCtxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+    auto ctxt       = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+
+    s32 rc = CELL_OK;
+    s32 numNewlyReadyTasks;
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, ctxt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
+
+        // Verify taskset state is valid
+        auto _0 = be_t<u128>::make(u128::from32(0));
+        if ((taskset->m.waiting & taskset->m.running) != _0 || (taskset->m.ready & taskset->m.pending_ready) != _0 ||
+            ((taskset->m.running | taskset->m.ready | taskset->m.pending_ready | taskset->m.signalled | taskset->m.waiting) & be_t<u128>::make(~taskset->m.enabled.value())) != _0) {
+            spursHalt(spu);
+            return CELL_OK;
+        }
+
+        // Find the number of tasks that have become ready since the last iteration
+        auto newlyReadyTasks = (taskset->m.signalled | taskset->m.pending_ready).value() & ~taskset->m.ready.value();
+        numNewlyReadyTasks   = 0;
+        for (auto i = 0; i < 128; i++) {
+            if (newlyReadyTasks._bit[i]) {
+                numNewlyReadyTasks++;
+            }
+        }
+
+        u128 readyButNotRunning;
+        u8   selectedTaskId;
+        auto running   = taskset->m.running.value();
+        auto waiting   = taskset->m.waiting.value();
+        auto enabled   = taskset->m.enabled.value();
+        auto signalled = (taskset->m.signalled & (taskset->m.ready | taskset->m.pending_ready)).value();
+        auto ready     = (taskset->m.signalled | taskset->m.ready | taskset->m.pending_ready).value();
+
+        switch (request) {
+        case SPURS_TASKSET_REQUEST_POLL_SIGNAL:
+            rc = signalled._bit[ctxt->taskId] ? 1 : 0;
+            signalled._bit[ctxt->taskId] = false;
+            break;
+        case SPURS_TASKSET_REQUEST_DESTROY_TASK:
+            numNewlyReadyTasks--;
+            running._bit[ctxt->taskId]   = false;
+            enabled._bit[ctxt->taskId]   = false;
+            signalled._bit[ctxt->taskId] = false;
+            ready._bit[ctxt->taskId]     = false;
+            break;
+        case SPURS_TASKSET_REQUEST_YIELD_TASK:
+            running._bit[ctxt->taskId] = false;
+            waiting._bit[ctxt->taskId] = true;
+            break;
+        case SPURS_TASKSET_REQUEST_WAIT_SIGNAL:
+            if (signalled._bit[ctxt->taskId] == false) {
+                numNewlyReadyTasks--;
+                running._bit[ctxt->taskId]   = false;
+                waiting._bit[ctxt->taskId]   = true;
+                signalled._bit[ctxt->taskId] = false;
+                ready._bit[ctxt->taskId]     = false;
+            }
+            break;
+        case SPURS_TASKSET_REQUEST_POLL:
+            readyButNotRunning = ready & ~running;
+            if (taskset->m.wkl_flag_wait_task < CELL_SPURS_MAX_TASK) {
+                readyButNotRunning = readyButNotRunning & ~(u128::fromBit(taskset->m.wkl_flag_wait_task));
+            }
+
+            rc = readyButNotRunning != _0 ? 1 : 0;
+            break;
+        case SPURS_TASKSET_REQUEST_WAIT_WKL_FLAG:
+            if (taskset->m.wkl_flag_wait_task == 0x81) {
+                // A workload flag is already pending so consume it
+                taskset->m.wkl_flag_wait_task = 0x80;
+                rc                            = 0;
+            } else if (taskset->m.wkl_flag_wait_task == 0x80) {
+                // No tasks are waiting for the workload flag. Mark this task as waiting for the workload flag.
+                taskset->m.wkl_flag_wait_task = ctxt->taskId;
+                running._bit[ctxt->taskId]    = false;
+                waiting._bit[ctxt->taskId]    = true;
+                rc                            = 1;
+                numNewlyReadyTasks--;
+            } else {
+                // Another task is already waiting for the workload signal
+                rc = CELL_SPURS_TASK_ERROR_BUSY;
+            }
+            break;
+        case SPURS_TASKSET_REQUEST_SELECT_TASK:
+            readyButNotRunning = ready & ~running;
+            if (taskset->m.wkl_flag_wait_task < CELL_SPURS_MAX_TASK) {
+                readyButNotRunning = readyButNotRunning & ~(u128::fromBit(taskset->m.wkl_flag_wait_task));
+            }
+
+            // Select a task from the readyButNotRunning set to run. Start from the task after the last scheduled task to ensure fairness.
+            for (selectedTaskId = taskset->m.last_scheduled_task + 1; selectedTaskId < 128; selectedTaskId++) {
+                if (readyButNotRunning._bit[selectedTaskId]) {
+                    break;
+                }
+            }
+
+            if (selectedTaskId == 128) {
+                for (selectedTaskId = 0; selectedTaskId < taskset->m.last_scheduled_task + 1; selectedTaskId++) {
+                    if (readyButNotRunning._bit[selectedTaskId]) {
+                        break;
+                    }
+                }
+
+                if (selectedTaskId == taskset->m.last_scheduled_task + 1) {
+                    selectedTaskId = CELL_SPURS_MAX_TASK;
+                }
+            }
+
+            *taskId    = selectedTaskId;
+            *isWaiting = waiting._bit[selectedTaskId < CELL_SPURS_MAX_TASK ? selectedTaskId : 0] ? 1 : 0;
+            if (selectedTaskId != CELL_SPURS_MAX_TASK) {
+                taskset->m.last_scheduled_task = selectedTaskId;
+                running._bit[selectedTaskId]   = true;
+                waiting._bit[selectedTaskId]   = false;
+            }
+            break;
+        case SPURS_TASKSET_REQUEST_RECV_WKL_FLAG:
+            if (taskset->m.wkl_flag_wait_task < CELL_SPURS_MAX_TASK) {
+                // There is a task waiting for the workload flag
+                taskset->m.wkl_flag_wait_task = 0x80;
+                rc                            = 1;
+                numNewlyReadyTasks++;
+            } else {
+                // No tasks are waiting for the workload flag
+                taskset->m.wkl_flag_wait_task = 0x81;
+                rc                            = 0;
+            }
+            break;
+        default:
+            spursHalt(spu);
+            return CELL_OK;
+        }
+
+        taskset->m.pending_ready = _0;
+        taskset->m.running       = running;
+        taskset->m.waiting       = waiting;
+        taskset->m.enabled       = enabled;
+        taskset->m.signalled     = signalled;
+        taskset->m.ready         = ready;
+    } while (spursDma(spu, MFC_PUTLLC_CMD, ctxt->taskset.addr(), 0x2700/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    // Increment the ready count of the workload by the number of tasks that have become ready
+    do {
+        spursDma(spu, MFC_GETLLAR_CMD, kernelCtxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
+        auto spurs = vm::get_ptr<CellSpurs>(spu.ls_offset + 0x2D80 - offsetof(CellSpurs, m.wklState1));
+
+        s32 readyCount  = kernelCtxt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[kernelCtxt->wklCurrentId].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[kernelCtxt->wklCurrentId & 0x0F].read_relaxed();
+        readyCount     += numNewlyReadyTasks;
+        readyCount      = readyCount < 0 ? 0 : readyCount > 0xFF ? 0xFF : readyCount;
+
+        if (kernelCtxt->wklCurrentId < CELL_SPURS_MAX_WORKLOAD) {
+            spurs->m.wklReadyCount1[kernelCtxt->wklCurrentId].write_relaxed(readyCount);
+        } else {
+            spurs->m.wklIdleSpuCountOrReadyCount2[kernelCtxt->wklCurrentId & 0x0F].write_relaxed(readyCount);
+        }
+    } while (spursDma(spu, MFC_PUTLLC_CMD, kernelCtxt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
+
+    return rc;
+}
+
+/// Process pollStatus received from the SPURS kernel
+void spursTasksetProcessPollStatus(SPUThread & spu, u32 pollStatus) {
+    if (pollStatus & CELL_SPURS_MODULE_POLL_STATUS_FLAG) {
+        spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_RECV_WKL_FLAG, nullptr, nullptr);
+    }
+}
+
+/// Check execution rights
+bool spursTasksetPollStatus(SPUThread & spu) {
+    u32 pollStatus;
+
+    if (cellSpursModulePollStatus(spu, &pollStatus)) {
+        return true;
+    }
+
+    spursTasksetProcessPollStatus(spu, pollStatus);
+    return false;
+}
+
+/// Exit the Taskset PM
+void spursTasksetExit(SPUThread & spu) {
+    auto ctxt = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+
+    // Trace - STOP
+    CellSpursTracePacket pkt;
+    memset(&pkt, 0, sizeof(pkt));
+    pkt.header.tag = 0x54; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_STOP
+    pkt.data.stop  = SPURS_GUID_TASKSET_PM;
+    cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
+
+    // Not sure why this check exists. Perhaps to check for memory corruption.
+    if (memcmp(ctxt->moduleId, "SPURSTASK MODULE", 16) != 0) {
+        spursHalt(spu);
+    }
+
+    cellSpursModuleExit(spu);
+}
+
+/// Invoked when a task exits
+void spursTasksetOnTaskExit(SPUThread & spu, u64 addr, u32 taskId, s32 exitCode, u64 args) {
+    auto ctxt = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+
+    spursDma(spu, MFC_GET_CMD, addr & 0xFFFFFF80, 0x10000/*LSA*/, (addr & 0x7F) << 11/*size*/, 0);
+    spursDmaWaitForCompletion(spu, 1);
+
+    spu.GPR[3]._u64[1] = ctxt->taskset.addr();
+    spu.GPR[4]._u32[3] = taskId;
+    spu.GPR[5]._u32[3] = exitCode;
+    spu.GPR[6]._u64[1] = args;
+    spu.FastCall(0x10000);
+}
+
+/// Save the context of a task
+s32 spursTasketSaveTaskContext(SPUThread & spu) {
+    auto ctxt     = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+    auto taskInfo = vm::get_ptr<CellSpursTaskset::TaskInfo>(spu.ls_offset + 0x2780);
+
+    spursDmaWaitForCompletion(spu, 0xFFFFFFFF);
+
+    if (taskInfo->context_save_storage_and_alloc_ls_blocks == 0) {
+        return CELL_SPURS_TASK_ERROR_STAT;
+    }
+
+    u32 allocLsBlocks = taskInfo->context_save_storage_and_alloc_ls_blocks & 0x7F;
+    u32 lsBlocks      = 0;
+    for (auto i = 0; i < 128; i++) {
+        if (taskInfo->ls_pattern._u128.value()._bit[i]) {
+            lsBlocks++;
+        }
+    }
+
+    if (lsBlocks > allocLsBlocks) {
+        return CELL_SPURS_TASK_ERROR_STAT;
+    }
+
+    // Make sure the stack is area is specified in the ls pattern
+    for (auto i = (ctxt->savedContextSp.value()._u32[3]) >> 11; i < 128; i++) {
+        if (taskInfo->ls_pattern._u128.value()._bit[i] == false) {
+            return CELL_SPURS_TASK_ERROR_STAT;
+        }
+    }
+
+    // Get the processor context
+    u128 r;
+    spu.FPSCR.Read(r);
+    ctxt->savedContextFpscr = r;
+    spu.ReadChannel(r, SPU_RdEventMask);
+    ctxt->savedSpuWriteEventMask = r._u32[3];
+    spu.ReadChannel(r, MFC_RdTagMask);
+    ctxt->savedWriteTagGroupQueryMask = r._u32[3];
+
+    // Store the processor context
+    u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull;
+    spursDma(spu, MFC_PUT_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, ctxt->dmaTagId);
+
+    // Save LS context
+    for (auto i = 6; i < 128; i++) {
+        if (taskInfo->ls_pattern._u128.value()._bit[i]) {
+            // TODO: Combine DMA requests for consecutive blocks into a single request
+            spursDma(spu, MFC_PUT_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId);
+        }
+    }
+
+    spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
+    return CELL_OK;
+}
+
+/// Taskset dispatcher
+void spursTasksetDispatch(SPUThread & spu) {
+    auto ctxt    = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+    auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
+
+    u32 taskId;
+    u32 isWaiting;
+    spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_SELECT_TASK, &taskId, &isWaiting);
+    if (taskId >= CELL_SPURS_MAX_TASK) {
+        spursTasksetExit(spu);
+        return;
+    }
+
+    ctxt->taskId = taskId;
+
+    // DMA in the task info for the selected task
+    spursDma(spu, MFC_GET_CMD, ctxt->taskset.addr() + offsetof(CellSpursTaskset, m.task_info[taskId]), 0x2780/*LSA*/, sizeof(CellSpursTaskset::TaskInfo), ctxt->dmaTagId);
+    spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
+    auto taskInfo = vm::get_ptr<CellSpursTaskset::TaskInfo>(spu.ls_offset + 0x2780);
+    auto elfAddr  = taskInfo->elf_addr.addr().value();
+    taskInfo->elf_addr.set(taskInfo->elf_addr.addr() & 0xFFFFFFFFFFFFFFF8ull);
+
+    // Trace - Task: Incident=dispatch
+    CellSpursTracePacket pkt;
+    memset(&pkt, 0, sizeof(pkt));
+    pkt.header.tag = CELL_SPURS_TRACE_TAG_TASK;
+    pkt.data.task.incident = CELL_SPURS_TRACE_TASK_DISPATCH;
+    pkt.data.task.taskId   = taskId;
+    cellSpursModulePutTrace(&pkt, CELL_SPURS_KERNEL_DMA_TAG_ID);
+
+    if (isWaiting == 0) {
+        // If we reach here it means that the task is being started and not being resumed
+        memset(vm::get_ptr<void>(spu.ls_offset + CELL_SPURS_TASK_TOP), 0, CELL_SPURS_TASK_BOTTOM - CELL_SPURS_TASK_TOP);
+        ctxt->guidAddr = CELL_SPURS_TASK_TOP;
+
+        u32 entryPoint;
+        u32 lowestLoadAddr;
+        if (spursTasksetLoadElf(spu, &entryPoint, &lowestLoadAddr, taskInfo->elf_addr.addr(), false) != CELL_OK) {
+            spursHalt(spu);
+            return;
+        }
+
+        spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
+
+        ctxt->savedContextLr  = u128::from32r(entryPoint);
+        ctxt->guidAddr        = lowestLoadAddr;
+        ctxt->tasksetMgmtAddr = 0x2700;
+        ctxt->x2FC0           = 0;
+        ctxt->taskExitCode    = isWaiting;
+        ctxt->x2FD4           = elfAddr & 5; // TODO: Figure this out
+
+        if ((elfAddr & 5) == 1) {
+            spursDma(spu, MFC_GET_CMD, ctxt->taskset.addr() + offsetof(CellSpursTaskset2, m.task_exit_code[taskId]), 0x2FC0/*LSA*/, 0x10/*size*/, ctxt->dmaTagId);
+        }
+
+        // Trace - GUID
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag = CELL_SPURS_TRACE_TAG_GUID;
+        pkt.data.guid  = 0; // TODO: Put GUID of taskId here
+        cellSpursModulePutTrace(&pkt, 0x1F);
+
+        if (elfAddr & 2) { // TODO: Figure this out
+            spu.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_STOP);
+            spu.Stop();
+            return;
+        }
+
+        spursTasksetStartTask(spu, taskInfo->args);
+    } else {
+        if (taskset->m.enable_clear_ls) {
+            memset(vm::get_ptr<void>(spu.ls_offset + CELL_SPURS_TASK_TOP), 0, CELL_SPURS_TASK_BOTTOM - CELL_SPURS_TASK_TOP);
+        }
+
+        // If the entire LS is saved then there is no need to load the ELF as it will be be saved in the context save area as well
+        if (taskInfo->ls_pattern._u128.value() != u128::from64r(0x03FFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull)) {
+            // Load the ELF
+            u32 entryPoint;
+            if (spursTasksetLoadElf(spu, &entryPoint, nullptr, taskInfo->elf_addr.addr(), true) != CELL_OK) {
+                spursHalt(spu);
+                return;
+            }
+        }
+
+        // Load saved context from main memory to LS
+        u64 contextSaveStorage = taskInfo->context_save_storage_and_alloc_ls_blocks & 0xFFFFFFFFFFFFFF80ull;
+        spursDma(spu, MFC_GET_CMD, contextSaveStorage, 0x2C80/*LSA*/, 0x380/*size*/, ctxt->dmaTagId);
+        for (auto i = 6; i < 128; i++) {
+            if (taskInfo->ls_pattern._u128.value()._bit[i]) {
+                // TODO: Combine DMA requests for consecutive blocks into a single request
+                spursDma(spu, MFC_GET_CMD, contextSaveStorage + 0x400 + ((i - 6) << 11), CELL_SPURS_TASK_TOP + ((i - 6) << 11), 0x800/*size*/, ctxt->dmaTagId);
+            }
+        }
+
+        spursDmaWaitForCompletion(spu, 1 << ctxt->dmaTagId);
+
+        // Restore saved registers
+        spu.FPSCR.Write(ctxt->savedContextFpscr.value());
+        spu.WriteChannel(MFC_WrTagMask, u128::from32r(ctxt->savedWriteTagGroupQueryMask));
+        spu.WriteChannel(SPU_WrEventMask, u128::from32r(ctxt->savedSpuWriteEventMask));
+
+        // Trace - GUID
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag = CELL_SPURS_TRACE_TAG_GUID;
+        pkt.data.guid  = 0; // TODO: Put GUID of taskId here
+        cellSpursModulePutTrace(&pkt, 0x1F);
+
+        if (elfAddr & 2) { // TODO: Figure this out
+            spu.SPU.Status.SetValue(SPU_STATUS_STOPPED_BY_STOP);
+            spu.Stop();
+            return;
+        }
+
+        spu.GPR[3].clear();
+        spursTasksetResumeTask(spu);
+    }
+}
+
+/// Process a syscall request
+s32 spursTasksetProcessSyscall(SPUThread & spu, u32 syscallNum, u32 args) {
+    auto ctxt    = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+    auto taskset = vm::get_ptr<CellSpursTaskset>(spu.ls_offset + 0x2700);
+
+    // If the 0x10 bit is set in syscallNum then its the 2nd version of the
+    // syscall (e.g. cellSpursYield2 instead of cellSpursYield) and so don't wait
+    // for DMA completion
+    if ((syscallNum & 0x10) == 0) {
+        spursDmaWaitForCompletion(spu, 0xFFFFFFFF);
+    }
+
+    s32 rc       = 0;
+    u32 incident = 0;
+    switch (syscallNum & 0x0F) {
+    case CELL_SPURS_TASK_SYSCALL_EXIT:
+        if (ctxt->x2FD4 == 4 || (ctxt->x2FC0 & 0xFFFFFFFF) != 0) { // TODO: Figure this out
+            if (ctxt->x2FD4 != 4) {
+                spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_DESTROY_TASK, nullptr, nullptr);
+            }
+
+            auto addr = ctxt->x2FD4 == 4 ? taskset->m.x78 : ctxt->x2FC0;
+            auto args = ctxt->x2FD4 == 4 ? 0 : ctxt->x2FC8;
+            spursTasksetOnTaskExit(spu, addr, ctxt->taskId, ctxt->taskExitCode, args);
+        }
+
+        incident = CELL_SPURS_TRACE_TASK_EXIT;
+        break;
+    case CELL_SPURS_TASK_SYSCALL_YIELD:
+        if (spursTasksetPollStatus(spu) || spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_POLL, nullptr, nullptr)) {
+            // If we reach here then it means that either another task can be scheduled or another workload can be scheduled
+            // Save the context of the current task
+            rc = spursTasketSaveTaskContext(spu);
+            if (rc == CELL_OK) {
+                spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_YIELD_TASK, nullptr, nullptr);
+                incident = CELL_SPURS_TRACE_TASK_YIELD;
+            }
+        }
+        break;
+    case CELL_SPURS_TASK_SYSCALL_WAIT_SIGNAL:
+        if (spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_POLL_SIGNAL, nullptr, nullptr) == 0) {
+            rc = spursTasketSaveTaskContext(spu);
+            if (rc == CELL_OK) {
+                if (spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_WAIT_SIGNAL, nullptr, nullptr) == 0) {
+                    incident = CELL_SPURS_TRACE_TASK_WAIT;
+                }
+            }
+        }
+        break;
+    case CELL_SPURS_TASK_SYSCALL_POLL:
+        rc  = spursTasksetPollStatus(spu) ? CELL_SPURS_TASK_POLL_FOUND_WORKLOAD : 0;
+        rc |= spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_POLL, nullptr, nullptr) ? CELL_SPURS_TASK_POLL_FOUND_TASK : 0;
+        break;
+    case CELL_SPURS_TASK_SYSCALL_RECV_WKL_FLAG:
+        if (args == 0) { // TODO: Figure this out
+            spursHalt(spu);
+        }
+
+        if (spursTasksetPollStatus(spu) || spursTasksetProcessRequest(spu, SPURS_TASKSET_REQUEST_WAIT_WKL_FLAG, nullptr, nullptr) != 1) {
+            rc = spursTasketSaveTaskContext(spu);
+            if (rc == CELL_OK) {
+                incident = CELL_SPURS_TRACE_TASK_WAIT;
+            }
+        }
+        break;
+    default:
+        rc = CELL_SPURS_TASK_ERROR_NOSYS;
+        break;
+    }
+
+    if (incident) {
+        // Trace - TASK
+        CellSpursTracePacket pkt;
+        memset(&pkt, 0, sizeof(pkt));
+        pkt.header.tag         = CELL_SPURS_TRACE_TAG_TASK;
+        pkt.data.task.incident = incident;
+        pkt.data.task.taskId   = ctxt->taskId;
+        cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
+
+        // Clear the GUID of the task
+        memset(vm::get_ptr<void>(spu.ls_offset + ctxt->guidAddr), 0, 0x10);
+
+        if (spursTasksetPollStatus(spu)) {
+            spursTasksetExit(spu);
+        } else {
+            spursTasksetDispatch(spu);
+        }
+    }
+
+    return rc;
+}
+
+/// Initialise the Taskset PM
+void spursTasksetInit(SPUThread & spu, u32 pollStatus) {
+    auto ctxt       = vm::get_ptr<SpursTasksetContext>(spu.ls_offset + 0x2700);
+    auto kernelCtxt = vm::get_ptr<SpursKernelContext>(spu.ls_offset + 0x100);
+
+    kernelCtxt->moduleId[0] = 'T';
+    kernelCtxt->moduleId[1] = 'K';
+
+    // Trace - START: Module='TKST'
+    CellSpursTracePacket pkt;
+    memset(&pkt, 0, sizeof(pkt));
+    pkt.header.tag = 0x52; // Its not clear what this tag means exactly but it seems similar to CELL_SPURS_TRACE_TAG_START
+    memcpy(pkt.data.start.module, "TKST", 4);
+    pkt.data.start.level = 2;
+    pkt.data.start.ls    = 0xA00 >> 2;
+    cellSpursModulePutTrace(&pkt, ctxt->dmaTagId);
+
+    spursTasksetProcessPollStatus(spu, pollStatus);
+}
+
+/// Load an ELF
+s32 spursTasksetLoadElf(SPUThread & spu, u32 * entryPoint, u32 * lowestLoadAddr, u64 elfAddr, bool skipWriteableSegments) {
+    if (elfAddr == 0 || (elfAddr & 0x0F) != 0) {
+        return CELL_SPURS_TASK_ERROR_INVAL;
+    }
+
+    vfsStreamMemory         stream(elfAddr);
+    loader::handlers::elf32 loader;
+    auto rc = loader.init(stream);
+    if (rc != loader::handler::ok) {
+        return CELL_SPURS_TASK_ERROR_NOEXEC;
+    }
+
+    u32 _lowestLoadAddr = CELL_SPURS_TASK_BOTTOM;
+    for (auto & phdr : loader.m_phdrs) {
+        if (phdr.data_be.p_paddr >= CELL_SPURS_TASK_BOTTOM) {
+            break;
+        }
+
+        if (phdr.data_be.p_type == 1/*PT_LOAD*/) {
+            if (skipWriteableSegments == false || (phdr.data_be.p_flags & 2/*PF_W*/) == 0) {
+                if (phdr.data_be.p_vaddr < CELL_SPURS_TASK_TOP ||
+                    phdr.data_be.p_vaddr + phdr.data_be.p_memsz > CELL_SPURS_TASK_BOTTOM) {
+                    return CELL_SPURS_TASK_ERROR_FAULT;
+                }
+
+                _lowestLoadAddr = _lowestLoadAddr > phdr.data_be.p_vaddr ? phdr.data_be.p_vaddr : _lowestLoadAddr;
+            }
+        }
+    }
+
+    loader.load_data(spu.ls_offset, skipWriteableSegments);
+    *entryPoint = loader.m_ehdr.data_be.e_entry;
+    if (*lowestLoadAddr) {
+        *lowestLoadAddr = _lowestLoadAddr;
+    }
+
+    return CELL_OK;
+}
diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp
index f3fb23b383..8def286a85 100644
--- a/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp
+++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.cpp
@@ -1099,3 +1099,86 @@ s32 sys_raw_spu_get_spu_cfg(u32 id, vm::ptr<u32> value)
 	*value = (u32)t->cfg.value;
 	return CELL_OK;
 }
+
+void sys_spu_thread_exit(SPUThread & spu, s32 status)
+{
+	// Cancel any pending status update requests
+	u128 r;
+	spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(0));
+	while (spu.GetChannelCount(MFC_RdTagStat) != 1);
+	spu.ReadChannel(r, MFC_RdTagStat);
+
+	// Wait for all pending DMA operations to complete
+	spu.WriteChannel(MFC_WrTagMask, u128::from32r(0xFFFFFFFF));
+	spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_ALL));
+	spu.ReadChannel(r, MFC_RdTagStat);
+
+	spu.WriteChannel(SPU_WrOutMbox, u128::from32r(status));
+	spu.StopAndSignal(0x102);
+}
+
+void sys_spu_thread_group_exit(SPUThread & spu, s32 status)
+{
+	// Cancel any pending status update requests
+	u128 r;
+	spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(0));
+	while (spu.GetChannelCount(MFC_RdTagStat) != 1);
+	spu.ReadChannel(r, MFC_RdTagStat);
+
+	// Wait for all pending DMA operations to complete
+	spu.WriteChannel(MFC_WrTagMask, u128::from32r(0xFFFFFFFF));
+	spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_ALL));
+	spu.ReadChannel(r, MFC_RdTagStat);
+
+	spu.WriteChannel(SPU_WrOutMbox, u128::from32r(status));
+	spu.StopAndSignal(0x101);
+}
+
+s32 sys_spu_thread_send_event(SPUThread & spu, u8 spup, u32 data0, u32 data1)
+{
+	if (spup > 0x3F)
+	{
+		return CELL_EINVAL;
+	}
+
+	if (spu.GetChannelCount(SPU_RdInMbox))
+	{
+		return CELL_EBUSY;
+	}
+
+	spu.WriteChannel(SPU_WrOutMbox, u128::from32r(data1));
+	spu.WriteChannel(SPU_WrOutIntrMbox, u128::from32r((spup << 24) | (data0 & 0x00FFFFFF)));
+
+	u128 r;
+	spu.ReadChannel(r, SPU_RdInMbox);
+	return r._u32[3];
+}
+
+s32 sys_spu_thread_switch_system_module(SPUThread & spu, u32 status)
+{
+	if (spu.GetChannelCount(SPU_RdInMbox))
+	{
+		return CELL_EBUSY;
+	}
+
+	// Cancel any pending status update requests
+	u128 r;
+	spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(0));
+	while (spu.GetChannelCount(MFC_RdTagStat) != 1);
+	spu.ReadChannel(r, MFC_RdTagStat);
+
+	// Wait for all pending DMA operations to complete
+	spu.WriteChannel(MFC_WrTagMask, u128::from32r(0xFFFFFFFF));
+	spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_ALL));
+	spu.ReadChannel(r, MFC_RdTagStat);
+
+	do
+	{
+		spu.WriteChannel(SPU_WrOutMbox, u128::from32r(status));
+		spu.StopAndSignal(0x120);
+		spu.ReadChannel(r, SPU_RdInMbox);
+	}
+	while (r._u32[3] == CELL_EBUSY);
+
+	return r._u32[3];
+}
diff --git a/rpcs3/Emu/SysCalls/lv2/sys_spu.h b/rpcs3/Emu/SysCalls/lv2/sys_spu.h
index e71c606bf7..e129455758 100644
--- a/rpcs3/Emu/SysCalls/lv2/sys_spu.h
+++ b/rpcs3/Emu/SysCalls/lv2/sys_spu.h
@@ -204,3 +204,9 @@ s32 sys_raw_spu_get_int_stat(u32 id, u32 class_id, vm::ptr<u64> stat);
 s32 sys_raw_spu_read_puint_mb(u32 id, vm::ptr<u32> value);
 s32 sys_raw_spu_set_spu_cfg(u32 id, u32 value);
 s32 sys_raw_spu_get_spu_cfg(u32 id, vm::ptr<u32> value);
+
+// SPU Calls
+void sys_spu_thread_exit(SPUThread & spu, s32 status);
+void sys_spu_thread_group_exit(SPUThread & spu, s32 status);
+s32 sys_spu_thread_send_event(SPUThread & spu, u8 spup, u32 data0, u32 data1);
+s32 sys_spu_thread_switch_system_module(SPUThread & spu, u32 status);
diff --git a/rpcs3/Loader/ELF32.cpp b/rpcs3/Loader/ELF32.cpp
index 1d0ea7a16e..4eef027421 100644
--- a/rpcs3/Loader/ELF32.cpp
+++ b/rpcs3/Loader/ELF32.cpp
@@ -413,7 +413,7 @@ namespace loader
 			return ok;
 		}
 
-		handler::error_code elf32::load_data(u32 offset)
+		handler::error_code elf32::load_data(u32 offset, bool skip_writeable)
 		{
 			Elf_Machine machine = (Elf_Machine)(u16)(m_ehdr.is_le() ? m_ehdr.data_le.e_machine : m_ehdr.data_be.e_machine);
 
@@ -436,6 +436,11 @@ namespace loader
 							return loading_error;
 						}
 
+						if (skip_writeable == true && (phdr.data_be.p_flags & 2/*PF_W*/) != 0)
+						{
+							continue;
+						}
+
 						if (filesz)
 						{
 							m_stream->Seek(handler::get_stream_offset() + offset);
diff --git a/rpcs3/Loader/ELF32.h b/rpcs3/Loader/ELF32.h
index d3d37f543c..6a13b6f7cb 100644
--- a/rpcs3/Loader/ELF32.h
+++ b/rpcs3/Loader/ELF32.h
@@ -132,7 +132,7 @@ namespace loader
 
 			error_code init(vfsStream& stream) override;
 			error_code load() override;
-			error_code load_data(u32 offset);
+			error_code load_data(u32 offset, bool skip_writeable = false);
 
 			virtual ~elf32() = default;
 		};
diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj
index 5327e220e3..56bc415e11 100644
--- a/rpcs3/emucore.vcxproj
+++ b/rpcs3/emucore.vcxproj
@@ -37,6 +37,7 @@
     <ClCompile Include="..\Utilities\SSemaphore.cpp" />
     <ClCompile Include="..\Utilities\StrFmt.cpp" />
     <ClCompile Include="..\Utilities\Thread.cpp" />
+    <ClCompile Include="Emu\SysCalls\Modules\cellSpursSpu.cpp" />
     <ClCompile Include="Crypto\aes.cpp" />
     <ClCompile Include="Crypto\ec.cpp" />
     <ClCompile Include="Crypto\key_vault.cpp" />
@@ -621,6 +622,7 @@
       <PreprocessorDefinitions>_UNICODE;UNICODE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <PrecompiledHeaderFile>stdafx.h</PrecompiledHeaderFile>
       <ExceptionHandling>Async</ExceptionHandling>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
       <AdditionalIncludeDirectories>
       </AdditionalIncludeDirectories>
       <IgnoreStandardIncludePath>false</IgnoreStandardIncludePath>
@@ -638,6 +640,7 @@
       <PreprocessorDefinitions>_UNICODE;UNICODE;LLVM_AVAILABLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <PrecompiledHeaderFile>stdafx.h</PrecompiledHeaderFile>
       <ExceptionHandling>Async</ExceptionHandling>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
       <AdditionalIncludeDirectories>
       </AdditionalIncludeDirectories>
     </ClCompile>
@@ -658,6 +661,7 @@
       <PreprocessorDefinitions>_UNICODE;UNICODE;MSVC_CRT_MEMLEAK_DETECTION;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <PrecompiledHeaderFile>stdafx.h</PrecompiledHeaderFile>
       <ExceptionHandling>Async</ExceptionHandling>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
       <AdditionalIncludeDirectories>
       </AdditionalIncludeDirectories>
     </ClCompile>
@@ -675,6 +679,7 @@
       <PrecompiledHeader>Use</PrecompiledHeader>
       <PrecompiledHeaderFile>stdafx.h</PrecompiledHeaderFile>
       <ExceptionHandling>Async</ExceptionHandling>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
       <AdditionalIncludeDirectories>
       </AdditionalIncludeDirectories>
     </ClCompile>
@@ -695,6 +700,7 @@
       <PrecompiledHeaderFile>stdafx.h</PrecompiledHeaderFile>
       <ExceptionHandling>Async</ExceptionHandling>
       <PreprocessorDefinitions>LLVM_AVAILABLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <MultiProcessorCompilation>true</MultiProcessorCompilation>
       <AdditionalIncludeDirectories>
       </AdditionalIncludeDirectories>
     </ClCompile>
diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters
index ea11ba34b9..49c65e3094 100644
--- a/rpcs3/emucore.vcxproj.filters
+++ b/rpcs3/emucore.vcxproj.filters
@@ -668,6 +668,9 @@
     <ClCompile Include="Emu\Audio\XAudio2\XAudio2Thread.cpp">
       <Filter>Emu\Audio\XAudio2</Filter>
     </ClCompile>
+    <ClCompile Include="Emu\SysCalls\Modules\cellSpursSpu.cpp">
+      <Filter>Emu\SysCalls\Modules</Filter>
+    </ClCompile>
     <ClCompile Include="Emu\ARMv7\ARMv7Decoder.cpp">
       <Filter>Emu\CPU\ARMv7</Filter>
     </ClCompile>
diff --git a/rpcs3/stdafx.h b/rpcs3/stdafx.h
index 66792d0ea4..f43f0a0cd5 100644
--- a/rpcs3/stdafx.h
+++ b/rpcs3/stdafx.h
@@ -36,6 +36,8 @@
 #include <algorithm>
 #include <random>
 #include <unordered_set>
+#include <map>
+#include <unordered_map>
 
 #include <sys/stat.h>
 #include "Utilities/GNU.h"