mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-08 16:01:42 +12:00
SPURS: Update kernel to use lock line reservations
This commit is contained in:
parent
a7728c9067
commit
62e2d8d9a7
7 changed files with 414 additions and 297 deletions
|
@ -35,6 +35,14 @@ enum
|
||||||
MFC_GETLLAR_SUCCESS = 4,
|
MFC_GETLLAR_SUCCESS = 4,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// MFC Write Tag Status Update Request Channel (ch23) operations
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
MFC_TAG_UPDATE_IMMEDIATE = 0,
|
||||||
|
MFC_TAG_UPDATE_ANY = 1,
|
||||||
|
MFC_TAG_UPDATE_ALL = 2,
|
||||||
|
};
|
||||||
|
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
MFC_SPU_TO_PPU_MAILBOX_STATUS_MASK = 0x000000FF,
|
MFC_SPU_TO_PPU_MAILBOX_STATUS_MASK = 0x000000FF,
|
||||||
|
|
|
@ -1060,7 +1060,14 @@ void SPUThread::StopAndSignal(u32 code)
|
||||||
|
|
||||||
case 0x003:
|
case 0x003:
|
||||||
{
|
{
|
||||||
m_code3_func(*this);
|
auto iter = m_addr_to_hle_function_map.find(PC);
|
||||||
|
assert(iter != m_addr_to_hle_function_map.end());
|
||||||
|
|
||||||
|
auto return_to_caller = iter->second(*this);
|
||||||
|
if (return_to_caller)
|
||||||
|
{
|
||||||
|
SetBranch(GPR[0]._u32[3] & 0x3fffc);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -290,6 +290,8 @@ public:
|
||||||
u32 m_event_mask;
|
u32 m_event_mask;
|
||||||
u32 m_events;
|
u32 m_events;
|
||||||
|
|
||||||
|
std::unordered_map<u32, std::function<bool(SPUThread& SPU)>> m_addr_to_hle_function_map;
|
||||||
|
|
||||||
struct IntrTag
|
struct IntrTag
|
||||||
{
|
{
|
||||||
u32 enabled; // 1 == true
|
u32 enabled; // 1 == true
|
||||||
|
@ -509,8 +511,35 @@ public:
|
||||||
void WriteLS64 (const u32 lsa, const u64& data) const { vm::write64 (lsa + m_offset, data); }
|
void WriteLS64 (const u32 lsa, const u64& data) const { vm::write64 (lsa + m_offset, data); }
|
||||||
void WriteLS128(const u32 lsa, const u128& data) const { vm::write128(lsa + m_offset, data); }
|
void WriteLS128(const u32 lsa, const u128& data) const { vm::write128(lsa + m_offset, data); }
|
||||||
|
|
||||||
|
void RegisterHleFuncion(u32 addr, std::function<bool(SPUThread & SPU)> function)
|
||||||
|
{
|
||||||
|
m_addr_to_hle_function_map[addr] = function;
|
||||||
|
WriteLS32(addr, 0x00000003); // STOP 3
|
||||||
|
}
|
||||||
|
|
||||||
|
void UnregisterHleFunction(u32 addr)
|
||||||
|
{
|
||||||
|
WriteLS32(addr, 0x00200000); // NOP
|
||||||
|
m_addr_to_hle_function_map.erase(addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void UnregisterHleFunctions(u32 start_addr, u32 end_addr)
|
||||||
|
{
|
||||||
|
for (auto iter = m_addr_to_hle_function_map.begin(); iter != m_addr_to_hle_function_map.end();)
|
||||||
|
{
|
||||||
|
if (iter->first >= start_addr && iter->first <= end_addr)
|
||||||
|
{
|
||||||
|
WriteLS32(iter->first, 0x00200000); // NOP
|
||||||
|
m_addr_to_hle_function_map.erase(iter++);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
iter++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::function<void(SPUThread& SPU)> m_custom_task;
|
std::function<void(SPUThread& SPU)> m_custom_task;
|
||||||
std::function<void(SPUThread& SPU)> m_code3_func;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
SPUThread(CPUThreadType type = CPU_THREAD_SPU);
|
SPUThread(CPUThreadType type = CPU_THREAD_SPU);
|
||||||
|
|
|
@ -26,7 +26,7 @@ extern u32 libsre;
|
||||||
extern u32 libsre_rtoc;
|
extern u32 libsre_rtoc;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void spursKernelMain(SPUThread & spu);
|
bool spursKernelMain(SPUThread & spu);
|
||||||
s64 cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u32 id);
|
s64 cellSpursLookUpTasksetAddress(vm::ptr<CellSpurs> spurs, vm::ptr<CellSpursTaskset> taskset, u32 id);
|
||||||
s64 _cellSpursSendSignal(vm::ptr<CellSpursTaskset> taskset, u32 taskID);
|
s64 _cellSpursSendSignal(vm::ptr<CellSpursTaskset> taskset, u32 taskID);
|
||||||
|
|
||||||
|
@ -155,7 +155,8 @@ s64 spursInit(
|
||||||
assert(!"spu_image_import() failed");
|
assert(!"spu_image_import() failed");
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
spurs->m.spuImg.addr = (u32)Memory.Alloc(0x40000, 4096);
|
spurs->m.spuImg.addr = (u32)Memory.Alloc(0x40000, 4096);
|
||||||
|
spurs->m.spuImg.entry_point = isSecond ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
s32 tgt = SYS_SPU_THREAD_GROUP_TYPE_NORMAL;
|
s32 tgt = SYS_SPU_THREAD_GROUP_TYPE_NORMAL;
|
||||||
|
@ -179,17 +180,11 @@ s64 spursInit(
|
||||||
name += "CellSpursKernel0";
|
name += "CellSpursKernel0";
|
||||||
for (s32 num = 0; num < nSpus; num++, name[name.size() - 1]++)
|
for (s32 num = 0; num < nSpus; num++, name[name.size() - 1]++)
|
||||||
{
|
{
|
||||||
spurs->m.spus[num] = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, 0, 0, 0, 0, [spurs, num](SPUThread& SPU)
|
auto spu = spu_thread_initialize(tg, num, spurs->m.spuImg, name, SYS_SPU_THREAD_OPTION_DEC_SYNC_TB_ENABLE, num, spurs.addr(), 0, 0);
|
||||||
{
|
#ifndef PRX_DEBUG_XXX
|
||||||
SPU.GPR[3]._u32[3] = num;
|
spu->RegisterHleFuncion(spurs->m.spuImg.entry_point, spursKernelMain);
|
||||||
SPU.GPR[4]._u64[1] = spurs.addr();
|
|
||||||
|
|
||||||
#ifdef PRX_DEBUG_XXX
|
|
||||||
return SPU.FastCall(SPU.PC);
|
|
||||||
#endif
|
#endif
|
||||||
|
spurs->m.spus[num] = spu->GetId();
|
||||||
spursKernelMain(SPU);
|
|
||||||
})->GetId();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & SAF_SPU_PRINTF_ENABLED)
|
if (flags & SAF_SPU_PRINTF_ENABLED)
|
||||||
|
|
|
@ -102,6 +102,12 @@ enum SPURSKernelInterfaces
|
||||||
CELL_SPURS_INTERRUPT_VECTOR = 0x0,
|
CELL_SPURS_INTERRUPT_VECTOR = 0x0,
|
||||||
CELL_SPURS_LOCK_LINE = 0x80,
|
CELL_SPURS_LOCK_LINE = 0x80,
|
||||||
CELL_SPURS_KERNEL_DMA_TAG_ID = 31,
|
CELL_SPURS_KERNEL_DMA_TAG_ID = 31,
|
||||||
|
CELL_SPURS_KERNEL1_ENTRY_ADDR = 0x818,
|
||||||
|
CELL_SPURS_KERNEL2_ENTRY_ADDR = 0x848,
|
||||||
|
CELL_SPURS_KERNEL1_YIELD_ADDR = 0x808,
|
||||||
|
CELL_SPURS_KERNEL2_YIELD_ADDR = 0x838,
|
||||||
|
CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR = 0x290,
|
||||||
|
CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR = 0x290,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum RangeofEventQueuePortNumbers
|
enum RangeofEventQueuePortNumbers
|
||||||
|
@ -885,14 +891,23 @@ struct SpursKernelMgmtData
|
||||||
u8 spuIdling; // 0x1EB
|
u8 spuIdling; // 0x1EB
|
||||||
be_t<u16> wklRunnable1; // 0x1EC
|
be_t<u16> wklRunnable1; // 0x1EC
|
||||||
be_t<u16> wklRunnable2; // 0x1EE
|
be_t<u16> wklRunnable2; // 0x1EE
|
||||||
u8 x1F0[0x210 - 0x1F0]; // 0x1F0
|
be_t<u32> x1F0; // 0x1F0
|
||||||
|
be_t<u32> x1F4; // 0x1F4
|
||||||
|
be_t<u32> x1F8; // 0x1F8
|
||||||
|
be_t<u32> x1FC; // 0x1FC
|
||||||
|
be_t<u32> x200; // 0x200
|
||||||
|
be_t<u32> x204; // 0x204
|
||||||
|
be_t<u32> x208; // 0x208
|
||||||
|
be_t<u32> x20C; // 0x20C
|
||||||
be_t<u64> traceBuffer; // 0x210
|
be_t<u64> traceBuffer; // 0x210
|
||||||
be_t<u32> traceMsgCount; // 0x218
|
be_t<u32> traceMsgCount; // 0x218
|
||||||
be_t<u32> traceMaxCount; // 0x21C
|
be_t<u32> traceMaxCount; // 0x21C
|
||||||
u8 wklUniqueId[0x10]; // 0x220
|
u8 wklUniqueId[0x10]; // 0x220
|
||||||
|
u8 x230[0x280 - 0x230]; // 0x230
|
||||||
|
be_t<u32> guid[4]; // 0x280
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(sizeof(SpursKernelMgmtData) == 0x130, "Incorrect size for SpursKernelMgmtData");
|
static_assert(sizeof(SpursKernelMgmtData) == 0x190, "Incorrect size for SpursKernelMgmtData");
|
||||||
|
|
||||||
// The SPURS taskset policy module data store. This resides at 0x2700 of the LS.
|
// The SPURS taskset policy module data store. This resides at 0x2700 of the LS.
|
||||||
struct SpursTasksetPmMgmtData
|
struct SpursTasksetPmMgmtData
|
||||||
|
|
|
@ -14,11 +14,15 @@
|
||||||
void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag);
|
void cellSpursModulePutTrace(CellSpursTracePacket * packet, unsigned tag);
|
||||||
u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status);
|
u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status);
|
||||||
|
|
||||||
|
bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag);
|
||||||
|
u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask);
|
||||||
|
u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll = true);
|
||||||
|
|
||||||
//
|
//
|
||||||
// SPURS Kernel functions
|
// SPURS Kernel functions
|
||||||
//
|
//
|
||||||
void spursKernelSelectWorkload(SPUThread & spu);
|
bool spursKernel1SelectWorkload(SPUThread & spu);
|
||||||
void spursKernelSelectWorkload2(SPUThread & spu);
|
bool spursKernel2SelectWorkload(SPUThread & spu);
|
||||||
|
|
||||||
//
|
//
|
||||||
// SPURS system service workload functions
|
// SPURS system service workload functions
|
||||||
|
@ -31,7 +35,7 @@ void spursSysServiceUpdateWorkload(SPUThread & spu, SpursKernelMgmtData * mgmt);
|
||||||
void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt);
|
void spursSysServiceProcessMessages(SPUThread & spu, SpursKernelMgmtData * mgmt);
|
||||||
void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt);
|
void spursSysServiceWaitOrExit(SPUThread & spu, SpursKernelMgmtData * mgmt);
|
||||||
void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus);
|
void spursSysServiceWorkloadMain(SPUThread & spu, u32 pollStatus);
|
||||||
void spursSysServiceWorkloadEntry(SPUThread & spu);
|
bool spursSysServiceWorkloadEntry(SPUThread & spu);
|
||||||
|
|
||||||
//
|
//
|
||||||
// SPURS taskset polict module functions
|
// SPURS taskset polict module functions
|
||||||
|
@ -54,9 +58,9 @@ u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) {
|
||||||
|
|
||||||
spu.GPR[3]._u32[3] = 1;
|
spu.GPR[3]._u32[3] = 1;
|
||||||
if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) {
|
if (mgmt->spurs->m.flags1 & SF1_32_WORKLOADS) {
|
||||||
spursKernelSelectWorkload2(spu);
|
spursKernel2SelectWorkload(spu);
|
||||||
} else {
|
} else {
|
||||||
spursKernelSelectWorkload(spu);
|
spursKernel1SelectWorkload(spu);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto result = spu.GPR[3]._u64[1];
|
auto result = spu.GPR[3]._u64[1];
|
||||||
|
@ -68,14 +72,51 @@ u32 cellSpursModulePollStatus(SPUThread & spu, u32 * status) {
|
||||||
return wklId == mgmt->wklCurrentId ? 0 : 1;
|
return wklId == mgmt->wklCurrentId ? 0 : 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Execute a DMA operation
|
||||||
|
bool spursDma(SPUThread & spu, u32 cmd, u64 ea, u32 lsa, u32 size, u32 tag) {
|
||||||
|
spu.WriteChannel(MFC_LSA, u128::from32r(lsa));
|
||||||
|
spu.WriteChannel(MFC_EAH, u128::from32r((u32)(ea >> 32)));
|
||||||
|
spu.WriteChannel(MFC_EAL, u128::from32r((u32)ea));
|
||||||
|
spu.WriteChannel(MFC_Size, u128::from32r(size));
|
||||||
|
spu.WriteChannel(MFC_TagID, u128::from32r(tag));
|
||||||
|
spu.WriteChannel(MFC_Cmd, u128::from32r(cmd));
|
||||||
|
|
||||||
|
if (cmd == MFC_GETLLAR_CMD || cmd == MFC_PUTLLC_CMD || cmd == MFC_PUTLLUC_CMD) {
|
||||||
|
u128 rv;
|
||||||
|
|
||||||
|
spu.ReadChannel(rv, MFC_RdAtomicStat);
|
||||||
|
return rv._u32[3] ? true : false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the status of DMA operations
|
||||||
|
u32 spursDmaGetCompletionStatus(SPUThread & spu, u32 tagMask) {
|
||||||
|
u128 rv;
|
||||||
|
|
||||||
|
spu.WriteChannel(MFC_WrTagMask, u128::from32r(tagMask));
|
||||||
|
spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(MFC_TAG_UPDATE_IMMEDIATE));
|
||||||
|
spu.ReadChannel(rv, MFC_RdTagStat);
|
||||||
|
return rv._u32[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Wait for DMA operations to complete
|
||||||
|
u32 spursDmaWaitForCompletion(SPUThread & spu, u32 tagMask, bool waitForAll) {
|
||||||
|
u128 rv;
|
||||||
|
|
||||||
|
spu.WriteChannel(MFC_WrTagMask, u128::from32r(tagMask));
|
||||||
|
spu.WriteChannel(MFC_WrTagUpdate, u128::from32r(waitForAll ? MFC_TAG_UPDATE_ALL : MFC_TAG_UPDATE_ANY));
|
||||||
|
spu.ReadChannel(rv, MFC_RdTagStat);
|
||||||
|
return rv._u32[3];
|
||||||
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// SPURS kernel functions
|
// SPURS kernel functions
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
/// Select a workload to run
|
/// Select a workload to run
|
||||||
void spursKernelSelectWorkload(SPUThread & spu) {
|
bool spursKernel1SelectWorkload(SPUThread & spu) {
|
||||||
LV2_LOCK(0); // TODO: lock-free implementation if possible
|
|
||||||
|
|
||||||
auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
|
auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
|
||||||
|
|
||||||
// The first and only argument to this function is a boolean that is set to false if the function
|
// The first and only argument to this function is a boolean that is set to false if the function
|
||||||
|
@ -83,140 +124,148 @@ void spursKernelSelectWorkload(SPUThread & spu) {
|
||||||
// If the first argument is true then the shared data is not updated with the result.
|
// If the first argument is true then the shared data is not updated with the result.
|
||||||
const auto isPoll = spu.GPR[3]._u32[3];
|
const auto isPoll = spu.GPR[3]._u32[3];
|
||||||
|
|
||||||
// Calculate the contention (number of SPUs used) for each workload
|
u32 wklSelectedId;
|
||||||
u8 contention[CELL_SPURS_MAX_WORKLOAD];
|
u32 pollStatus;
|
||||||
u8 pendingContention[CELL_SPURS_MAX_WORKLOAD];
|
|
||||||
for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
|
|
||||||
contention[i] = mgmt->spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i];
|
|
||||||
|
|
||||||
// If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
|
do {
|
||||||
// to prevent unnecessary jumps to the kernel
|
// DMA and lock the first 0x80 bytes of spurs
|
||||||
if (isPoll) {
|
spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
|
||||||
pendingContention[i] = mgmt->spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i];
|
CellSpurs * spurs = (CellSpurs *)mgmt->tempArea;
|
||||||
if (i != mgmt->wklCurrentId) {
|
|
||||||
contention[i] += pendingContention[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
|
// Calculate the contention (number of SPUs used) for each workload
|
||||||
u32 pollStatus = 0;
|
u8 contention[CELL_SPURS_MAX_WORKLOAD];
|
||||||
|
u8 pendingContention[CELL_SPURS_MAX_WORKLOAD];
|
||||||
// The system service workload has the highest priority. Select the system service workload if
|
|
||||||
// the system service message bit for this SPU is set.
|
|
||||||
if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
|
|
||||||
mgmt->spuIdling = 0;
|
|
||||||
if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
|
|
||||||
// Clear the message bit
|
|
||||||
mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Caclulate the scheduling weight for each workload
|
|
||||||
u16 maxWeight = 0;
|
|
||||||
for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
|
for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
|
||||||
u16 runnable = mgmt->wklRunnable1 & (0x8000 >> i);
|
contention[i] = spurs->m.wklCurrentContention[i] - mgmt->wklLocContention[i];
|
||||||
u16 wklSignal = mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
|
|
||||||
u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
|
|
||||||
u8 readyCount = mgmt->spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklReadyCount1[i].read_relaxed();
|
|
||||||
u8 idleSpuCount = mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed();
|
|
||||||
u8 requestCount = readyCount + idleSpuCount;
|
|
||||||
|
|
||||||
// For a workload to be considered for scheduling:
|
// If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
|
||||||
// 1. Its priority must not be 0
|
// to prevent unnecessary jumps to the kernel
|
||||||
// 2. The number of SPUs used by it must be less than the max contention for that workload
|
if (isPoll) {
|
||||||
// 3. The workload should be in runnable state
|
pendingContention[i] = spurs->m.wklPendingContention[i] - mgmt->wklLocPendingContention[i];
|
||||||
// 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
|
if (i != mgmt->wklCurrentId) {
|
||||||
// OR the workload must be signalled
|
contention[i] += pendingContention[i];
|
||||||
// OR the workload flag is 0 and the workload is configured as the wokload flag receiver
|
|
||||||
if (runnable && mgmt->priority[i] != 0 && mgmt->spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) {
|
|
||||||
if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) {
|
|
||||||
// The scheduling weight of the workload is formed from the following parameters in decreasing order of priority:
|
|
||||||
// 1. Wokload signal set or workload flag or ready count > contention
|
|
||||||
// 2. Priority of the workload on the SPU
|
|
||||||
// 3. Is the workload the last selected workload
|
|
||||||
// 4. Minimum contention of the workload
|
|
||||||
// 5. Number of SPUs that are being used by the workload (lesser the number, more the weight)
|
|
||||||
// 6. Is the workload executable same as the currently loaded executable
|
|
||||||
// 7. The workload id (lesser the number, more the weight)
|
|
||||||
u16 weight = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0;
|
|
||||||
weight |= (u16)(mgmt->priority[i] & 0x7F) << 16;
|
|
||||||
weight |= i == mgmt->wklCurrentId ? 0x80 : 0x00;
|
|
||||||
weight |= (contention[i] > 0 && mgmt->spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00;
|
|
||||||
weight |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2;
|
|
||||||
weight |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00;
|
|
||||||
weight |= 0x01;
|
|
||||||
|
|
||||||
// In case of a tie the lower numbered workload is chosen
|
|
||||||
if (weight > maxWeight) {
|
|
||||||
wklSelectedId = i;
|
|
||||||
maxWeight = weight;
|
|
||||||
pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
|
|
||||||
pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0;
|
|
||||||
pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Not sure what this does. Possibly mark the SPU as idle/in use.
|
wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
|
||||||
mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
|
pollStatus = 0;
|
||||||
|
|
||||||
if (!isPoll || wklSelectedId == mgmt->wklCurrentId) {
|
// The system service workload has the highest priority. Select the system service workload if
|
||||||
// Clear workload signal for the selected workload
|
// the system service message bit for this SPU is set.
|
||||||
mgmt->spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
|
if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
|
||||||
mgmt->spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
|
mgmt->spuIdling = 0;
|
||||||
|
if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
|
||||||
|
// Clear the message bit
|
||||||
|
spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Caclulate the scheduling weight for each workload
|
||||||
|
u16 maxWeight = 0;
|
||||||
|
for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
|
||||||
|
u16 runnable = mgmt->wklRunnable1 & (0x8000 >> i);
|
||||||
|
u16 wklSignal = spurs->m.wklSignal1.read_relaxed() & (0x8000 >> i);
|
||||||
|
u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
|
||||||
|
u8 readyCount = spurs->m.wklReadyCount1[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklReadyCount1[i].read_relaxed();
|
||||||
|
u8 idleSpuCount = spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed() > CELL_SPURS_MAX_SPU ? CELL_SPURS_MAX_SPU : spurs->m.wklIdleSpuCountOrReadyCount2[i].read_relaxed();
|
||||||
|
u8 requestCount = readyCount + idleSpuCount;
|
||||||
|
|
||||||
// If the selected workload is the wklFlag workload then pull the wklFlag to all 1s
|
// For a workload to be considered for scheduling:
|
||||||
if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) {
|
// 1. Its priority must not be 0
|
||||||
mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(0xFFFFFFFF));
|
// 2. The number of SPUs used by it must be less than the max contention for that workload
|
||||||
|
// 3. The workload should be in runnable state
|
||||||
|
// 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
|
||||||
|
// OR the workload must be signalled
|
||||||
|
// OR the workload flag is 0 and the workload is configured as the wokload flag receiver
|
||||||
|
if (runnable && mgmt->priority[i] != 0 && spurs->m.wklMaxContention[i].read_relaxed() > contention[i]) {
|
||||||
|
if (wklFlag || wklSignal || (readyCount != 0 && requestCount > contention[i])) {
|
||||||
|
// The scheduling weight of the workload is formed from the following parameters in decreasing order of priority:
|
||||||
|
// 1. Wokload signal set or workload flag or ready count > contention
|
||||||
|
// 2. Priority of the workload on the SPU
|
||||||
|
// 3. Is the workload the last selected workload
|
||||||
|
// 4. Minimum contention of the workload
|
||||||
|
// 5. Number of SPUs that are being used by the workload (lesser the number, more the weight)
|
||||||
|
// 6. Is the workload executable same as the currently loaded executable
|
||||||
|
// 7. The workload id (lesser the number, more the weight)
|
||||||
|
u16 weight = (wklFlag || wklSignal || (readyCount > contention[i])) ? 0x8000 : 0;
|
||||||
|
weight |= (u16)(mgmt->priority[i] & 0x7F) << 16;
|
||||||
|
weight |= i == mgmt->wklCurrentId ? 0x80 : 0x00;
|
||||||
|
weight |= (contention[i] > 0 && spurs->m.wklMinContention[i] > contention[i]) ? 0x40 : 0x00;
|
||||||
|
weight |= ((CELL_SPURS_MAX_SPU - contention[i]) & 0x0F) << 2;
|
||||||
|
weight |= mgmt->wklUniqueId[i] == mgmt->wklCurrentId ? 0x02 : 0x00;
|
||||||
|
weight |= 0x01;
|
||||||
|
|
||||||
|
// In case of a tie the lower numbered workload is chosen
|
||||||
|
if (weight > maxWeight) {
|
||||||
|
wklSelectedId = i;
|
||||||
|
maxWeight = weight;
|
||||||
|
pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
|
||||||
|
pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0;
|
||||||
|
pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not sure what this does. Possibly mark the SPU as idle/in use.
|
||||||
|
mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
|
||||||
|
|
||||||
|
if (!isPoll || wklSelectedId == mgmt->wklCurrentId) {
|
||||||
|
// Clear workload signal for the selected workload
|
||||||
|
spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
|
||||||
|
spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
|
||||||
|
|
||||||
|
// If the selected workload is the wklFlag workload then pull the wklFlag to all 1s
|
||||||
|
if (wklSelectedId == spurs->m.wklFlagReceiver.read_relaxed()) {
|
||||||
|
spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(0xFFFFFFFF));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (!isPoll) {
|
if (!isPoll) {
|
||||||
// Called by kernel
|
// Called by kernel
|
||||||
// Increment the contention for the selected workload
|
// Increment the contention for the selected workload
|
||||||
if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
|
if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
|
||||||
contention[wklSelectedId]++;
|
contention[wklSelectedId]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
|
for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
|
||||||
mgmt->spurs->m.wklCurrentContention[i] = contention[i];
|
spurs->m.wklCurrentContention[i] = contention[i];
|
||||||
mgmt->wklLocContention[i] = 0;
|
mgmt->wklLocContention[i] = 0;
|
||||||
mgmt->wklLocPendingContention[i] = 0;
|
mgmt->wklLocPendingContention[i] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
|
if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
|
||||||
mgmt->wklLocContention[wklSelectedId] = 1;
|
mgmt->wklLocContention[wklSelectedId] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
mgmt->wklCurrentId = wklSelectedId;
|
mgmt->wklCurrentId = wklSelectedId;
|
||||||
} else if (wklSelectedId != mgmt->wklCurrentId) {
|
} else if (wklSelectedId != mgmt->wklCurrentId) {
|
||||||
// Not called by kernel but a context switch is required
|
// Not called by kernel but a context switch is required
|
||||||
// Increment the pending contention for the selected workload
|
// Increment the pending contention for the selected workload
|
||||||
if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
|
if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
|
||||||
pendingContention[wklSelectedId]++;
|
pendingContention[wklSelectedId]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
|
for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD; i++) {
|
||||||
mgmt->spurs->m.wklPendingContention[i] = pendingContention[i];
|
spurs->m.wklPendingContention[i] = pendingContention[i];
|
||||||
mgmt->wklLocPendingContention[i] = 0;
|
mgmt->wklLocPendingContention[i] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
|
if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
|
||||||
mgmt->wklLocPendingContention[wklSelectedId] = 1;
|
mgmt->wklLocPendingContention[wklSelectedId] = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
} while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
|
||||||
|
|
||||||
u64 result = (u64)wklSelectedId << 32;
|
u64 result = (u64)wklSelectedId << 32;
|
||||||
result |= pollStatus;
|
result |= pollStatus;
|
||||||
spu.GPR[3]._u64[1] = result;
|
spu.GPR[3]._u64[1] = result;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Select a workload to run
|
/// Select a workload to run
|
||||||
void spursKernelSelectWorkload2(SPUThread & spu) {
|
bool spursKernel2SelectWorkload(SPUThread & spu) {
|
||||||
LV2_LOCK(0); // TODO: lock-free implementation if possible
|
|
||||||
|
|
||||||
auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
|
auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
|
||||||
|
|
||||||
// The first and only argument to this function is a boolean that is set to false if the function
|
// The first and only argument to this function is a boolean that is set to false if the function
|
||||||
|
@ -224,202 +273,214 @@ void spursKernelSelectWorkload2(SPUThread & spu) {
|
||||||
// If the first argument is true then the shared data is not updated with the result.
|
// If the first argument is true then the shared data is not updated with the result.
|
||||||
const auto isPoll = spu.GPR[3]._u32[3];
|
const auto isPoll = spu.GPR[3]._u32[3];
|
||||||
|
|
||||||
// Calculate the contention (number of SPUs used) for each workload
|
u32 wklSelectedId;
|
||||||
u8 contention[CELL_SPURS_MAX_WORKLOAD2];
|
u32 pollStatus;
|
||||||
u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2];
|
|
||||||
for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
|
|
||||||
contention[i] = mgmt->spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F];
|
|
||||||
contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4;
|
|
||||||
|
|
||||||
// If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
|
do {
|
||||||
// to prevent unnecessary jumps to the kernel
|
// DMA and lock the first 0x80 bytes of spurs
|
||||||
if (isPoll) {
|
spursDma(spu, MFC_GETLLAR_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/);
|
||||||
pendingContention[i] = mgmt->spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F];
|
CellSpurs * spurs = (CellSpurs *)mgmt->tempArea;
|
||||||
pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4;
|
|
||||||
if (i != mgmt->wklCurrentId) {
|
|
||||||
contention[i] += pendingContention[i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
|
// Calculate the contention (number of SPUs used) for each workload
|
||||||
u32 pollStatus = 0;
|
u8 contention[CELL_SPURS_MAX_WORKLOAD2];
|
||||||
|
u8 pendingContention[CELL_SPURS_MAX_WORKLOAD2];
|
||||||
// The system service workload has the highest priority. Select the system service workload if
|
|
||||||
// the system service message bit for this SPU is set.
|
|
||||||
if (mgmt->spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
|
|
||||||
// Not sure what this does. Possibly Mark the SPU as in use.
|
|
||||||
mgmt->spuIdling = 0;
|
|
||||||
if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
|
|
||||||
// Clear the message bit
|
|
||||||
mgmt->spurs->m.sysSrvMessage.write_relaxed(mgmt->spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// Caclulate the scheduling weight for each workload
|
|
||||||
u8 maxWeight = 0;
|
|
||||||
for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
|
for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
|
||||||
auto j = i & 0x0F;
|
contention[i] = spurs->m.wklCurrentContention[i & 0x0F] - mgmt->wklLocContention[i & 0x0F];
|
||||||
u16 runnable = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j);
|
contention[i] = i < CELL_SPURS_MAX_WORKLOAD ? contention[i] & 0x0F : contention[i] >> 4;
|
||||||
u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4;
|
|
||||||
u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : mgmt->spurs->m.wklMaxContention[j].read_relaxed() >> 4;
|
|
||||||
u16 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : mgmt->spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
|
|
||||||
u8 wklFlag = mgmt->spurs->m.wklFlag.flag.read_relaxed() == 0 ? mgmt->spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
|
|
||||||
u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklReadyCount1[j].read_relaxed() : mgmt->spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed();
|
|
||||||
|
|
||||||
// For a workload to be considered for scheduling:
|
// If this is a poll request then the number of SPUs pending to context switch is also added to the contention presumably
|
||||||
// 1. Its priority must be greater than 0
|
// to prevent unnecessary jumps to the kernel
|
||||||
// 2. The number of SPUs used by it must be less than the max contention for that workload
|
if (isPoll) {
|
||||||
// 3. The workload should be in runnable state
|
pendingContention[i] = spurs->m.wklPendingContention[i & 0x0F] - mgmt->wklLocPendingContention[i & 0x0F];
|
||||||
// 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
|
pendingContention[i] = i < CELL_SPURS_MAX_WORKLOAD ? pendingContention[i] & 0x0F : pendingContention[i] >> 4;
|
||||||
// OR the workload must be signalled
|
if (i != mgmt->wklCurrentId) {
|
||||||
// OR the workload flag is 0 and the workload is configured as the wokload receiver
|
contention[i] += pendingContention[i];
|
||||||
if (runnable && priority > 0 && maxContention > contention[i]) {
|
|
||||||
if (wklFlag || wklSignal || readyCount > contention[i]) {
|
|
||||||
// The scheduling weight of the workload is equal to the priority of the workload for the SPU.
|
|
||||||
// The current workload is given a sligtly higher weight presumably to reduce the number of context switches.
|
|
||||||
// In case of a tie the lower numbered workload is chosen.
|
|
||||||
u8 weight = priority << 4;
|
|
||||||
if (mgmt->wklCurrentId == i) {
|
|
||||||
weight |= 0x04;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (weight > maxWeight) {
|
|
||||||
wklSelectedId = i;
|
|
||||||
maxWeight = weight;
|
|
||||||
pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
|
|
||||||
pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0;
|
|
||||||
pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Not sure what this does. Possibly mark the SPU as idle/in use.
|
wklSelectedId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
|
||||||
mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
|
pollStatus = 0;
|
||||||
|
|
||||||
if (!isPoll || wklSelectedId == mgmt->wklCurrentId) {
|
// The system service workload has the highest priority. Select the system service workload if
|
||||||
// Clear workload signal for the selected workload
|
// the system service message bit for this SPU is set.
|
||||||
mgmt->spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
|
if (spurs->m.sysSrvMessage.read_relaxed() & (1 << mgmt->spuNum)) {
|
||||||
mgmt->spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(mgmt->spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
|
// Not sure what this does. Possibly Mark the SPU as in use.
|
||||||
|
mgmt->spuIdling = 0;
|
||||||
|
if (!isPoll || mgmt->wklCurrentId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
|
||||||
|
// Clear the message bit
|
||||||
|
spurs->m.sysSrvMessage.write_relaxed(spurs->m.sysSrvMessage.read_relaxed() & ~(1 << mgmt->spuNum));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Caclulate the scheduling weight for each workload
|
||||||
|
u8 maxWeight = 0;
|
||||||
|
for (auto i = 0; i < CELL_SPURS_MAX_WORKLOAD2; i++) {
|
||||||
|
auto j = i & 0x0F;
|
||||||
|
u16 runnable = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->wklRunnable1 & (0x8000 >> j) : mgmt->wklRunnable2 & (0x8000 >> j);
|
||||||
|
u8 priority = i < CELL_SPURS_MAX_WORKLOAD ? mgmt->priority[j] & 0x0F : mgmt->priority[j] >> 4;
|
||||||
|
u8 maxContention = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklMaxContention[j].read_relaxed() & 0x0F : spurs->m.wklMaxContention[j].read_relaxed() >> 4;
|
||||||
|
u16 wklSignal = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklSignal1.read_relaxed() & (0x8000 >> j) : spurs->m.wklSignal2.read_relaxed() & (0x8000 >> j);
|
||||||
|
u8 wklFlag = spurs->m.wklFlag.flag.read_relaxed() == 0 ? spurs->m.wklFlagReceiver.read_relaxed() == i ? 1 : 0 : 0;
|
||||||
|
u8 readyCount = i < CELL_SPURS_MAX_WORKLOAD ? spurs->m.wklReadyCount1[j].read_relaxed() : spurs->m.wklIdleSpuCountOrReadyCount2[j].read_relaxed();
|
||||||
|
|
||||||
// If the selected workload is the wklFlag workload then pull the wklFlag to all 1s
|
// For a workload to be considered for scheduling:
|
||||||
if (wklSelectedId == mgmt->spurs->m.wklFlagReceiver.read_relaxed()) {
|
// 1. Its priority must be greater than 0
|
||||||
mgmt->spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(0xFFFFFFFF));
|
// 2. The number of SPUs used by it must be less than the max contention for that workload
|
||||||
|
// 3. The workload should be in runnable state
|
||||||
|
// 4. The number of SPUs allocated to it must be less than the number of SPUs requested (i.e. readyCount)
|
||||||
|
// OR the workload must be signalled
|
||||||
|
// OR the workload flag is 0 and the workload is configured as the wokload receiver
|
||||||
|
if (runnable && priority > 0 && maxContention > contention[i]) {
|
||||||
|
if (wklFlag || wklSignal || readyCount > contention[i]) {
|
||||||
|
// The scheduling weight of the workload is equal to the priority of the workload for the SPU.
|
||||||
|
// The current workload is given a sligtly higher weight presumably to reduce the number of context switches.
|
||||||
|
// In case of a tie the lower numbered workload is chosen.
|
||||||
|
u8 weight = priority << 4;
|
||||||
|
if (mgmt->wklCurrentId == i) {
|
||||||
|
weight |= 0x04;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (weight > maxWeight) {
|
||||||
|
wklSelectedId = i;
|
||||||
|
maxWeight = weight;
|
||||||
|
pollStatus = readyCount > contention[i] ? CELL_SPURS_MODULE_POLL_STATUS_READYCOUNT : 0;
|
||||||
|
pollStatus |= wklSignal ? CELL_SPURS_MODULE_POLL_STATUS_SIGNAL : 0;
|
||||||
|
pollStatus |= wklFlag ? CELL_SPURS_MODULE_POLL_STATUS_FLAG : 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Not sure what this does. Possibly mark the SPU as idle/in use.
|
||||||
|
mgmt->spuIdling = wklSelectedId == CELL_SPURS_SYS_SERVICE_WORKLOAD_ID ? 1 : 0;
|
||||||
|
|
||||||
|
if (!isPoll || wklSelectedId == mgmt->wklCurrentId) {
|
||||||
|
// Clear workload signal for the selected workload
|
||||||
|
spurs->m.wklSignal1.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x8000 >> wklSelectedId)));
|
||||||
|
spurs->m.wklSignal2.write_relaxed(be_t<u16>::make(spurs->m.wklSignal1.read_relaxed() & ~(0x80000000u >> wklSelectedId)));
|
||||||
|
|
||||||
|
// If the selected workload is the wklFlag workload then pull the wklFlag to all 1s
|
||||||
|
if (wklSelectedId == spurs->m.wklFlagReceiver.read_relaxed()) {
|
||||||
|
spurs->m.wklFlag.flag.write_relaxed(be_t<u32>::make(0xFFFFFFFF));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (!isPoll) {
|
if (!isPoll) {
|
||||||
// Called by kernel
|
// Called by kernel
|
||||||
// Increment the contention for the selected workload
|
// Increment the contention for the selected workload
|
||||||
if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
|
if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
|
||||||
contention[wklSelectedId]++;
|
contention[wklSelectedId]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) {
|
||||||
|
spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4);
|
||||||
|
mgmt->wklLocContention[i] = 0;
|
||||||
|
mgmt->wklLocPendingContention[i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
|
||||||
|
mgmt->wklCurrentId = wklSelectedId;
|
||||||
|
} else if (wklSelectedId != mgmt->wklCurrentId) {
|
||||||
|
// Not called by kernel but a context switch is required
|
||||||
|
// Increment the pending contention for the selected workload
|
||||||
|
if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
|
||||||
|
pendingContention[wklSelectedId]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) {
|
||||||
|
spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4);
|
||||||
|
mgmt->wklLocPendingContention[i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
|
||||||
}
|
}
|
||||||
|
} while (spursDma(spu, MFC_PUTLLC_CMD, mgmt->spurs.addr(), 0x100/*LSA*/, 0x80/*size*/, 0/*tag*/) == false);
|
||||||
for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) {
|
|
||||||
mgmt->spurs->m.wklCurrentContention[i] = contention[i] | (contention[i + 0x10] << 4);
|
|
||||||
mgmt->wklLocContention[i] = 0;
|
|
||||||
mgmt->wklLocPendingContention[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
mgmt->wklLocContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
|
|
||||||
mgmt->wklCurrentId = wklSelectedId;
|
|
||||||
} else if (wklSelectedId != mgmt->wklCurrentId) {
|
|
||||||
// Not called by kernel but a context switch is required
|
|
||||||
// Increment the pending contention for the selected workload
|
|
||||||
if (wklSelectedId != CELL_SPURS_SYS_SERVICE_WORKLOAD_ID) {
|
|
||||||
pendingContention[wklSelectedId]++;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto i = 0; i < (CELL_SPURS_MAX_WORKLOAD2 >> 1); i++) {
|
|
||||||
mgmt->spurs->m.wklPendingContention[i] = pendingContention[i] | (pendingContention[i + 0x10] << 4);
|
|
||||||
mgmt->wklLocPendingContention[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
mgmt->wklLocPendingContention[wklSelectedId & 0x0F] = wklSelectedId < CELL_SPURS_MAX_WORKLOAD ? 0x01 : wklSelectedId < CELL_SPURS_MAX_WORKLOAD2 ? 0x10 : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
u64 result = (u64)wklSelectedId << 32;
|
u64 result = (u64)wklSelectedId << 32;
|
||||||
result |= pollStatus;
|
result |= pollStatus;
|
||||||
spu.GPR[3]._u64[1] = result;
|
spu.GPR[3]._u64[1] = result;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Entry point of the SPURS kernel
|
/// SPURS kernel main
|
||||||
void spursKernelMain(SPUThread & spu) {
|
bool spursKernelMain(SPUThread & spu) {
|
||||||
SpursKernelMgmtData * mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
|
SpursKernelMgmtData * mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + 0x100);
|
||||||
mgmt->spuNum = spu.GPR[3]._u32[3];
|
|
||||||
mgmt->dmaTagId = 0x1F;
|
|
||||||
mgmt->spurs.set(spu.GPR[4]._u64[1]);
|
|
||||||
mgmt->wklCurrentId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
|
|
||||||
mgmt->wklCurrentUniqueId = 0x20;
|
|
||||||
|
|
||||||
bool isSecond = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
|
bool isKernel2;
|
||||||
mgmt->yieldToKernelAddr = isSecond ? 0x838 : 0x808;
|
u32 pollStatus;
|
||||||
mgmt->selectWorkloadAddr = 0x290;
|
const CellSpurs::WorkloadInfo * wklInfo;
|
||||||
spu.WriteLS32(mgmt->yieldToKernelAddr, 2); // hack for cellSpursModuleExit
|
if (spu.PC == CELL_SPURS_KERNEL1_ENTRY_ADDR || spu.PC == CELL_SPURS_KERNEL2_ENTRY_ADDR) {
|
||||||
spu.WriteLS32(mgmt->selectWorkloadAddr, 3); // hack for cellSpursModulePollStatus
|
// Entry point of SPURS kernel
|
||||||
spu.WriteLS32(mgmt->selectWorkloadAddr + 4, 0x35000000); // bi $0
|
// Save arguments
|
||||||
spu.m_code3_func = isSecond ? spursKernelSelectWorkload2 : spursKernelSelectWorkload;
|
mgmt->spuNum = spu.GPR[3]._u32[3];
|
||||||
|
mgmt->spurs.set(spu.GPR[4]._u64[1]);
|
||||||
|
|
||||||
u32 wid = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
|
isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
|
||||||
u32 pollStatus = 0;
|
|
||||||
while (true) {
|
|
||||||
if (Emu.IsStopped()) {
|
|
||||||
cellSpurs->Warning("Spurs Kernel aborted");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get current workload info
|
memset(mgmt, 0, sizeof(SpursKernelMgmtData));
|
||||||
auto & wkl = wid < CELL_SPURS_MAX_WORKLOAD ? mgmt->spurs->m.wklInfo1[wid] : (wid < CELL_SPURS_MAX_WORKLOAD2 && isSecond ? mgmt->spurs->m.wklInfo2[wid & 0xf] : mgmt->spurs->m.wklInfoSysSrv);
|
|
||||||
|
|
||||||
if (mgmt->wklCurrentAddr != wkl.addr) {
|
// Initialise the SPURS management area to its initial values
|
||||||
if (wkl.addr.addr() != SPURS_IMG_ADDR_SYS_SRV_WORKLOAD) {
|
mgmt->dmaTagId = CELL_SPURS_KERNEL_DMA_TAG_ID;
|
||||||
// Load executable code
|
mgmt->wklCurrentUniqueId = 0x20;
|
||||||
memcpy(vm::get_ptr<void>(spu.ls_offset + 0xA00), wkl.addr.get_ptr(), wkl.size);
|
mgmt->wklCurrentId = CELL_SPURS_SYS_SERVICE_WORKLOAD_ID;
|
||||||
}
|
mgmt->yieldToKernelAddr = isKernel2 ? CELL_SPURS_KERNEL2_YIELD_ADDR : CELL_SPURS_KERNEL1_YIELD_ADDR;
|
||||||
mgmt->wklCurrentAddr = wkl.addr;
|
mgmt->selectWorkloadAddr = isKernel2 ? CELL_SPURS_KERNEL2_SELECT_WORKLOAD_ADDR : CELL_SPURS_KERNEL1_SELECT_WORKLOAD_ADDR;
|
||||||
mgmt->wklCurrentUniqueId = wkl.uniqueId.read_relaxed();
|
if (!isKernel2) {
|
||||||
}
|
mgmt->x1F0 = 0xF0020000;
|
||||||
|
mgmt->x200 = 0x20000;
|
||||||
if (!isSecond) {
|
mgmt->guid[0] = 0x423A3A02;
|
||||||
mgmt->moduleId[0] = 0;
|
mgmt->guid[1] = 0x43F43A82;
|
||||||
mgmt->moduleId[1] = 0;
|
mgmt->guid[2] = 0x43F26502;
|
||||||
}
|
mgmt->guid[3] = 0x420EB382;
|
||||||
|
|
||||||
// Run workload
|
|
||||||
spu.GPR[1]._u32[3] = 0x3FFB0;
|
|
||||||
spu.GPR[3]._u32[3] = 0x100;
|
|
||||||
spu.GPR[4]._u64[1] = wkl.arg;
|
|
||||||
spu.GPR[5]._u32[3] = pollStatus;
|
|
||||||
spu.SetPc(0xA00);
|
|
||||||
switch (mgmt->wklCurrentAddr.addr()) {
|
|
||||||
case SPURS_IMG_ADDR_SYS_SRV_WORKLOAD:
|
|
||||||
spursSysServiceWorkloadEntry(spu);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
spu.FastCall(0xA00);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check status
|
|
||||||
auto status = spu.SPU.Status.GetValue();
|
|
||||||
if (status == SPU_STATUS_STOPPED_BY_STOP) {
|
|
||||||
return;
|
|
||||||
} else {
|
} else {
|
||||||
assert(status == SPU_STATUS_RUNNING);
|
mgmt->guid[0] = 0x43A08402;
|
||||||
|
mgmt->guid[1] = 0x43FB0A82;
|
||||||
|
mgmt->guid[2] = 0x435E9302;
|
||||||
|
mgmt->guid[3] = 0x43A3C982;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
spu.UnregisterHleFunctions(0, 0x40000); // TODO: use a symbolic constant
|
||||||
|
spu.RegisterHleFuncion(isKernel2 ? CELL_SPURS_KERNEL2_ENTRY_ADDR : CELL_SPURS_KERNEL1_ENTRY_ADDR, spursKernelMain);
|
||||||
|
spu.RegisterHleFuncion(mgmt->yieldToKernelAddr, spursKernelMain);
|
||||||
|
spu.RegisterHleFuncion(mgmt->selectWorkloadAddr, isKernel2 ? spursKernel2SelectWorkload : spursKernel1SelectWorkload);
|
||||||
|
|
||||||
|
// Start the system service workload
|
||||||
|
spu.RegisterHleFuncion(0xA00, spursSysServiceWorkloadEntry);
|
||||||
|
wklInfo = &mgmt->spurs->m.wklInfoSysSrv;
|
||||||
|
pollStatus = 0;
|
||||||
|
} else if (spu.PC == mgmt->yieldToKernelAddr) {
|
||||||
|
isKernel2 = mgmt->spurs->m.flags1 & SF1_32_WORKLOADS ? true : false;
|
||||||
|
|
||||||
// Select next workload to run
|
// Select next workload to run
|
||||||
spu.GPR[3].clear();
|
spu.GPR[3].clear();
|
||||||
if (isSecond) {
|
if (isKernel2) {
|
||||||
spursKernelSelectWorkload2(spu);
|
spursKernel2SelectWorkload(spu);
|
||||||
} else {
|
} else {
|
||||||
spursKernelSelectWorkload(spu);
|
spursKernel1SelectWorkload(spu);
|
||||||
}
|
}
|
||||||
u64 res = spu.GPR[3]._u64[1];
|
|
||||||
pollStatus = (u32)(res);
|
pollStatus = (u32)(spu.GPR[3]._u64[1]);
|
||||||
wid = (u32)(res >> 32);
|
auto wid = (u32)(spu.GPR[3]._u64[1] >> 32);
|
||||||
|
wklInfo = wid < CELL_SPURS_MAX_WORKLOAD ? &mgmt->spurs->m.wklInfo1[wid] :
|
||||||
|
(wid < CELL_SPURS_MAX_WORKLOAD2 && isKernel2 ? &mgmt->spurs->m.wklInfo2[wid & 0xf] : &mgmt->spurs->m.wklInfoSysSrv);
|
||||||
|
} else {
|
||||||
|
assert(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!isKernel2) {
|
||||||
|
mgmt->moduleId[0] = 0;
|
||||||
|
mgmt->moduleId[1] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run workload
|
||||||
|
spu.GPR[0]._u32[3] = mgmt->yieldToKernelAddr;
|
||||||
|
spu.GPR[1]._u32[3] = 0x3FFB0;
|
||||||
|
spu.GPR[3]._u32[3] = 0x100;
|
||||||
|
spu.GPR[4]._u64[1] = wklInfo->arg;
|
||||||
|
spu.GPR[5]._u32[3] = pollStatus;
|
||||||
|
spu.SetBranch(0xA00);
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -783,7 +844,7 @@ poll:
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Entry point of the system service workload
|
/// Entry point of the system service workload
|
||||||
void spursSysServiceWorkloadEntry(SPUThread & spu) {
|
bool spursSysServiceWorkloadEntry(SPUThread & spu) {
|
||||||
auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + spu.GPR[3]._u32[3]);
|
auto mgmt = vm::get_ptr<SpursKernelMgmtData>(spu.ls_offset + spu.GPR[3]._u32[3]);
|
||||||
auto arg = spu.GPR[4]._u64[1];
|
auto arg = spu.GPR[4]._u64[1];
|
||||||
auto pollStatus = spu.GPR[5]._u32[3];
|
auto pollStatus = spu.GPR[5]._u32[3];
|
||||||
|
@ -800,7 +861,7 @@ void spursSysServiceWorkloadEntry(SPUThread & spu) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Ensure that this function always returns to the SPURS kernel
|
// TODO: Ensure that this function always returns to the SPURS kernel
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
|
@ -34,6 +34,8 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <random>
|
#include <random>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
|
#include <map>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include "Utilities/GNU.h"
|
#include "Utilities/GNU.h"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue