#include "Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" //#include "Cemu/FileCache/FileCache.h" //#include "config/ActiveSettings.h" #include "Cemu/Logging/CemuLogging.h" #include "Common/precompiled.h" #include "GameProfile/GameProfile.h" #include "util/helpers/helpers.h" static bool s_isLoadingShadersMtl{false}; extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; class ShaderMtlThreadPool { public: void StartThreads() { if (m_threadsActive.exchange(true)) return; // create thread pool const uint32 threadCount = 2; for (uint32 i = 0; i < threadCount; ++i) s_threads.emplace_back(&ShaderMtlThreadPool::CompilerThreadFunc, this); } void StopThreads() { if (!m_threadsActive.exchange(false)) return; for (uint32 i = 0; i < s_threads.size(); ++i) s_compilationQueueCount.increment(); for (auto& it : s_threads) it.join(); s_threads.clear(); } ~ShaderMtlThreadPool() { StopThreads(); } void CompilerThreadFunc() { SetThreadName("mtlShaderComp"); while (m_threadsActive.load(std::memory_order::relaxed)) { s_compilationQueueCount.decrementWithWait(); s_compilationQueueMutex.lock(); if (s_compilationQueue.empty()) { // queue empty again, shaders compiled synchronously via PreponeCompilation() s_compilationQueueMutex.unlock(); continue; } RendererShaderMtl* job = s_compilationQueue.front(); s_compilationQueue.pop_front(); // set compilation state cemu_assert_debug(job->m_compilationState.getValue() == RendererShaderMtl::COMPILATION_STATE::QUEUED); job->m_compilationState.setValue(RendererShaderMtl::COMPILATION_STATE::COMPILING); s_compilationQueueMutex.unlock(); // compile job->CompileInternal(); if (job->ShouldCountCompilation()) ++g_compiled_shaders_async; // mark as compiled cemu_assert_debug(job->m_compilationState.getValue() == RendererShaderMtl::COMPILATION_STATE::COMPILING); job->m_compilationState.setValue(RendererShaderMtl::COMPILATION_STATE::DONE); } } bool HasThreadsRunning() const { return m_threadsActive; } public: std::vector s_threads; std::deque s_compilationQueue; CounterSemaphore s_compilationQueueCount; std::mutex s_compilationQueueMutex; private: std::atomic m_threadsActive; } shaderMtlThreadPool; // TODO: find out if it would be possible to cache compiled Metal shaders void RendererShaderMtl::ShaderCacheLoading_begin(uint64 cacheTitleId) { // Maximize shader compilation speed static_cast(g_renderer.get())->SetShouldMaximizeConcurrentCompilation(true); } void RendererShaderMtl::ShaderCacheLoading_end() { static_cast(g_renderer.get())->SetShouldMaximizeConcurrentCompilation(false); } void RendererShaderMtl::ShaderCacheLoading_Close() { // Do nothing } void RendererShaderMtl::Initialize() { shaderMtlThreadPool.StartThreads(); } void RendererShaderMtl::Shutdown() { shaderMtlThreadPool.StopThreads(); } RendererShaderMtl::RendererShaderMtl(MetalRenderer* mtlRenderer, ShaderType type, uint64 baseHash, uint64 auxHash, bool isGameShader, bool isGfxPackShader, const std::string& mslCode) : RendererShader(type, baseHash, auxHash, isGameShader, isGfxPackShader), m_mtlr{mtlRenderer}, m_mslCode{mslCode} { // start async compilation shaderMtlThreadPool.s_compilationQueueMutex.lock(); m_compilationState.setValue(COMPILATION_STATE::QUEUED); shaderMtlThreadPool.s_compilationQueue.push_back(this); shaderMtlThreadPool.s_compilationQueueCount.increment(); shaderMtlThreadPool.s_compilationQueueMutex.unlock(); cemu_assert_debug(shaderMtlThreadPool.HasThreadsRunning()); // make sure .StartThreads() was called } RendererShaderMtl::~RendererShaderMtl() { if (m_function) m_function->release(); } void RendererShaderMtl::PreponeCompilation(bool isRenderThread) { shaderMtlThreadPool.s_compilationQueueMutex.lock(); bool isStillQueued = m_compilationState.hasState(COMPILATION_STATE::QUEUED); if (isStillQueued) { // remove from queue shaderMtlThreadPool.s_compilationQueue.erase(std::remove(shaderMtlThreadPool.s_compilationQueue.begin(), shaderMtlThreadPool.s_compilationQueue.end(), this), shaderMtlThreadPool.s_compilationQueue.end()); m_compilationState.setValue(COMPILATION_STATE::COMPILING); } shaderMtlThreadPool.s_compilationQueueMutex.unlock(); if (!isStillQueued) { m_compilationState.waitUntilValue(COMPILATION_STATE::DONE); if (ShouldCountCompilation()) --g_compiled_shaders_async; // compilation caused a stall so we don't consider this one async return; } else { // compile synchronously CompileInternal(); m_compilationState.setValue(COMPILATION_STATE::DONE); } } bool RendererShaderMtl::IsCompiled() { return m_compilationState.hasState(COMPILATION_STATE::DONE); }; bool RendererShaderMtl::WaitForCompiled() { m_compilationState.waitUntilValue(COMPILATION_STATE::DONE); return true; } bool RendererShaderMtl::ShouldCountCompilation() const { return !s_isLoadingShadersMtl && m_isGameShader; } void RendererShaderMtl::CompileInternal() { MTL::CompileOptions* options = MTL::CompileOptions::alloc()->init(); // TODO: always disable fast math for problematic shaders if (g_current_game_profile->GetFastMath()) options->setFastMathEnabled(true); if (g_current_game_profile->GetPositionInvariance()) options->setPreserveInvariance(true); NS::Error* error = nullptr; MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(m_mslCode), options, &error); options->release(); if (error) { cemuLog_log(LogType::Force, "failed to create library: {} -> {}", error->localizedDescription()->utf8String(), m_mslCode.c_str()); FinishCompilation(); return; } m_function = library->newFunction(ToNSString("main0")); library->release(); FinishCompilation(); // Count shader compilation if (ShouldCountCompilation()) g_compiled_shaders_total++; } void RendererShaderMtl::FinishCompilation() { m_mslCode.clear(); m_mslCode.shrink_to_fit(); }