#pragma once #include "Emu/RSX/RSXFragmentProgram.h" #include "Emu/RSX/RSXVertexProgram.h" #include "Utilities/Log.h" enum class SHADER_TYPE { SHADER_TYPE_VERTEX, SHADER_TYPE_FRAGMENT }; namespace ProgramHashUtil { // Based on // https://github.com/AlexAltea/nucleus/blob/master/nucleus/gpu/rsx_pgraph.cpp union qword { u64 dword[2]; u32 word[4]; }; struct HashVertexProgram { size_t operator()(const std::vector &program) const { // 64-bit Fowler/Noll/Vo FNV-1a hash code size_t hash = 0xCBF29CE484222325ULL; const qword *instbuffer = (const qword*)program.data(); size_t instIndex = 0; bool end = false; for (unsigned i = 0; i < program.size() / 4; i++) { const qword inst = instbuffer[instIndex]; hash ^= inst.dword[0]; hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); hash ^= inst.dword[1]; hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); instIndex++; } return hash; } }; struct VertexProgramCompare { bool operator()(const std::vector &binary1, const std::vector &binary2) const { if (binary1.size() != binary2.size()) return false; const qword *instBuffer1 = (const qword*)binary1.data(); const qword *instBuffer2 = (const qword*)binary2.data(); size_t instIndex = 0; for (unsigned i = 0; i < binary1.size() / 4; i++) { const qword& inst1 = instBuffer1[instIndex]; const qword& inst2 = instBuffer2[instIndex]; if (inst1.dword[0] != inst2.dword[0] || inst1.dword[1] != inst2.dword[1]) return false; instIndex++; } return true; } }; struct FragmentProgramUtil { /** * returns true if the given source Operand is a constant */ static bool isConstant(u32 sourceOperand) { return ((sourceOperand >> 8) & 0x3) == 2; } /** * RSX fragment program constants are inlined inside shader code. * This function takes an instruction from a fragment program and * returns an equivalent instruction where inlined constants * are masked. * This allows to hash/compare fragment programs even if their * inlined constants are modified inbetween */ static qword fragmentMaskConstant(const qword &initialQword) { qword result = initialQword; if (isConstant(initialQword.word[1])) result.word[1] = 0; if (isConstant(initialQword.word[2])) result.word[2] = 0; if (isConstant(initialQword.word[3])) result.word[3] = 0; return result; } static size_t getFPBinarySize(void *ptr) { const qword *instBuffer = (const qword*)ptr; size_t instIndex = 0; while (true) { const qword& inst = instBuffer[instIndex]; bool isSRC0Constant = isConstant(inst.word[1]); bool isSRC1Constant = isConstant(inst.word[2]); bool isSRC2Constant = isConstant(inst.word[3]); bool end = (inst.word[0] >> 8) & 0x1; if (isSRC0Constant || isSRC1Constant || isSRC2Constant) { instIndex += 2; if (end) return instIndex * 4 * 4; continue; } instIndex++; if (end) return (instIndex)* 4 * 4; } } }; struct HashFragmentProgram { size_t operator()(const void *program) const { // 64-bit Fowler/Noll/Vo FNV-1a hash code size_t hash = 0xCBF29CE484222325ULL; const qword *instbuffer = (const qword*)program; size_t instIndex = 0; while (true) { const qword& inst = instbuffer[instIndex]; bool end = (inst.word[0] >> 8) & 0x1; if (end) return hash; const qword& maskedInst = FragmentProgramUtil::fragmentMaskConstant(inst); hash ^= maskedInst.dword[0]; hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); hash ^= maskedInst.dword[1]; hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) + (hash << 8) + (hash << 40); instIndex++; // Skip constants if (FragmentProgramUtil::isConstant(inst.word[1]) || FragmentProgramUtil::isConstant(inst.word[2]) || FragmentProgramUtil::isConstant(inst.word[3])) instIndex++; } return 0; } }; struct FragmentProgramCompare { bool operator()(const void *binary1, const void *binary2) const { const qword *instBuffer1 = (const qword*)binary1; const qword *instBuffer2 = (const qword*)binary2; size_t instIndex = 0; while (true) { const qword& inst1 = instBuffer1[instIndex]; const qword& inst2 = instBuffer2[instIndex]; bool end = ((inst1.word[0] >> 8) & 0x1) && ((inst2.word[0] >> 8) & 0x1); if (end) return true; const qword& maskedInst1 = FragmentProgramUtil::fragmentMaskConstant(inst1); const qword& maskedInst2 = FragmentProgramUtil::fragmentMaskConstant(inst2); if (maskedInst1.dword[0] != maskedInst2.dword[0] || maskedInst1.dword[1] != maskedInst2.dword[1]) return false; instIndex++; // Skip constants if (FragmentProgramUtil::isConstant(inst1.word[1]) || FragmentProgramUtil::isConstant(inst1.word[2]) || FragmentProgramUtil::isConstant(inst1.word[3])) instIndex++; } } }; } /** * Cache for program help structure (blob, string...) * The class is responsible for creating the object so the state only has to call getGraphicPipelineState * Template argument is a struct which has the following type declaration : * - a typedef VertexProgramData to a type that encapsulate vertex program info. It should provide an Id member. * - a typedef FragmentProgramData to a types that encapsulate fragment program info. It should provide an Id member and a fragment constant offset vector. * - a typedef PipelineData encapsulating monolithic program. * - a typedef PipelineProperties to a type that encapsulate various state info relevant to program compilation (alpha test, primitive type,...) * - a typedef ExtraData type that will be passed to the buildProgram function. * It should also contains the following function member : * - static void RecompileFragmentProgram(RSXFragmentProgram *RSXFP, FragmentProgramData& fragmentProgramData, size_t ID); * - static void RecompileVertexProgram(RSXVertexProgram *RSXVP, VertexProgramData& vertexProgramData, size_t ID); * - static PipelineData *BuildProgram(VertexProgramData &vertexProgramData, FragmentProgramData &fragmentProgramData, const PipelineProperties &pipelineProperties, const ExtraData& extraData); * - void DeleteProgram(PipelineData *ptr); */ template class ProgramStateCache { private: typedef std::unordered_map, typename BackendTraits::VertexProgramData, ProgramHashUtil::HashVertexProgram, ProgramHashUtil::VertexProgramCompare> binary2VS; typedef std::unordered_map binary2FS; binary2VS m_cacheVS; binary2FS m_cacheFS; size_t m_currentShaderId; std::vector dummyFragmentConstantCache; struct PSOKey { u32 vpIdx; u32 fpIdx; typename BackendTraits::PipelineProperties properties; }; struct PSOKeyHash { size_t operator()(const PSOKey &key) const { size_t hashValue = 0; hashValue ^= std::hash()(key.vpIdx); hashValue ^= std::hash()(key.fpIdx); hashValue ^= std::hash()(key.properties); return hashValue; } }; struct PSOKeyCompare { size_t operator()(const PSOKey &key1, const PSOKey &key2) const { return (key1.vpIdx == key2.vpIdx) && (key1.fpIdx == key2.fpIdx) && (key1.properties == key2.properties); } }; std::unordered_map m_cachePSO; typename BackendTraits::FragmentProgramData& SearchFp(RSXFragmentProgram* rsx_fp, bool& found) { typename binary2FS::iterator It = m_cacheFS.find(vm::base(rsx_fp->addr)); if (It != m_cacheFS.end()) { found = true; return It->second; } found = false; LOG_WARNING(RSX, "FP not found in buffer!"); size_t actualFPSize = ProgramHashUtil::FragmentProgramUtil::getFPBinarySize(vm::base(rsx_fp->addr)); void *fpShadowCopy = malloc(actualFPSize); std::memcpy(fpShadowCopy, vm::base(rsx_fp->addr), actualFPSize); typename BackendTraits::FragmentProgramData &newShader = m_cacheFS[fpShadowCopy]; BackendTraits::RecompileFragmentProgram(rsx_fp, newShader, m_currentShaderId++); return newShader; } typename BackendTraits::VertexProgramData& SearchVp(RSXVertexProgram* rsx_vp, bool &found) { typename binary2VS::iterator It = m_cacheVS.find(rsx_vp->data); if (It != m_cacheVS.end()) { found = true; return It->second; } found = false; LOG_WARNING(RSX, "VP not found in buffer!"); typename BackendTraits::VertexProgramData& newShader = m_cacheVS[rsx_vp->data]; BackendTraits::RecompileVertexProgram(rsx_vp, newShader, m_currentShaderId++); return newShader; } typename BackendTraits::PipelineData *GetProg(const PSOKey &psoKey) const { typename std::unordered_map::const_iterator It = m_cachePSO.find(psoKey); if (It == m_cachePSO.end()) return nullptr; return It->second; } void Add(typename BackendTraits::PipelineData *prog, const PSOKey& PSOKey) { m_cachePSO.insert(std::make_pair(PSOKey, prog)); } public: ProgramStateCache() : m_currentShaderId(0) {} ~ProgramStateCache() { clear(); } const typename BackendTraits::VertexProgramData* get_transform_program(const RSXVertexProgram& rsx_vp) const { typename binary2VS::const_iterator It = m_cacheVS.find(rsx_vp.data); if (It == m_cacheVS.end()) return nullptr; return &It->second; } const typename BackendTraits::FragmentProgramData* get_shader_program(const RSXFragmentProgram& rsx_fp) const { typename binary2FS::const_iterator It = m_cacheFS.find(vm::base(rsx_fp.addr)); if (It == m_cacheFS.end()) return nullptr; return &It->second; } void clear() { for (auto pair : m_cachePSO) BackendTraits::DeleteProgram(pair.second); m_cachePSO.clear(); for (auto pair : m_cacheFS) free(pair.first); m_cacheFS.clear(); } typename BackendTraits::PipelineData *getGraphicPipelineState( RSXVertexProgram *vertexShader, RSXFragmentProgram *fragmentShader, const typename BackendTraits::PipelineProperties &pipelineProperties, const typename BackendTraits::ExtraData& extraData ) { typename BackendTraits::PipelineData *result = nullptr; bool fpFound, vpFound; typename BackendTraits::VertexProgramData &vertexProg = SearchVp(vertexShader, vpFound); typename BackendTraits::FragmentProgramData &fragmentProg = SearchFp(fragmentShader, fpFound); if (fpFound && vpFound) { result = GetProg({ vertexProg.id, fragmentProg.id, pipelineProperties }); } if (result != nullptr) return result; else { LOG_WARNING(RSX, "Add program :"); LOG_WARNING(RSX, "*** vp id = %d", vertexProg.id); LOG_WARNING(RSX, "*** fp id = %d", fragmentProg.id); result = BackendTraits::BuildProgram(vertexProg, fragmentProg, pipelineProperties, extraData); Add(result, { vertexProg.id, fragmentProg.id, pipelineProperties }); } return result; } size_t get_fragment_constants_buffer_size(const RSXFragmentProgram *fragmentShader) const { typename binary2FS::const_iterator It = m_cacheFS.find(vm::base(fragmentShader->addr)); if (It != m_cacheFS.end()) return It->second.FragmentConstantOffsetCache.size() * 4 * sizeof(float); LOG_ERROR(RSX, "Can't retrieve constant offset cache"); return 0; } void fill_fragment_constans_buffer(void *buffer, const RSXFragmentProgram *fragment_program) const { typename binary2FS::const_iterator It = m_cacheFS.find(vm::base(fragment_program->addr)); if (It == m_cacheFS.end()) return; __m128i mask = _mm_set_epi8(0xE, 0xF, 0xC, 0xD, 0xA, 0xB, 0x8, 0x9, 0x6, 0x7, 0x4, 0x5, 0x2, 0x3, 0x0, 0x1); size_t offset = 0; for (size_t offset_in_fragment_program : It->second.FragmentConstantOffsetCache) { void *data = vm::base(fragment_program->addr + (u32)offset_in_fragment_program); const __m128i &vector = _mm_loadu_si128((__m128i*)data); const __m128i &shuffled_vector = _mm_shuffle_epi8(vector, mask); _mm_stream_si128((__m128i*)((char*)buffer + offset), shuffled_vector); offset += 4 * sizeof(u32); } } };