diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index 0ad42da6..2e1b69ed 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -524,6 +524,7 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize, if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF()) vsHash += 0x1537; +#if ENABLE_METAL if (g_renderer->GetType() == RendererAPI::Metal) { if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually) @@ -542,27 +543,28 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize, if (!usesGeometryShader) { - // Rasterization - bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); + // Rasterization + bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); - // HACK - if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) - rasterizationEnabled = true; + // HACK + if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) + rasterizationEnabled = true; - const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL; - uint32 cullFront = polygonControlReg.get_CULL_FRONT(); - uint32 cullBack = polygonControlReg.get_CULL_BACK(); - if (cullFront && cullBack) - rasterizationEnabled = false; + const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL; + uint32 cullFront = polygonControlReg.get_CULL_FRONT(); + uint32 cullBack = polygonControlReg.get_CULL_BACK(); + if (cullFront && cullBack) + rasterizationEnabled = false; - if (rasterizationEnabled) - vsHash += 51ULL; + if (rasterizationEnabled) + vsHash += 51ULL; - // Vertex fetch - if (_activeFetchShader->mtlFetchVertexManually) - vsHash += 349ULL; + // Vertex fetch + if (_activeFetchShader->mtlFetchVertexManually) + vsHash += 349ULL; } } +#endif _shaderBaseHash_vs = vsHash; } diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp index f3bd14b9..3c0005ef 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp @@ -2,9 +2,6 @@ #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" -#include "Common/precompiled.h" -#include "Metal/MTLResource.hpp" -#include "Metal/MTLTexture.hpp" LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddress, Latte::E_GX2SURFFMT format, uint32 width, uint32 height, uint32 depth, uint32 pitch, uint32 mipLevels, uint32 swizzle, Latte::E_HWTILEMODE tileMode, bool isDepth) @@ -12,7 +9,7 @@ LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM { MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init(); desc->setStorageMode(MTL::StorageModePrivate); - desc->setCpuCacheMode(MTL::CPUCacheModeWriteCombined); + //desc->setCpuCacheMode(MTL::CPUCacheModeWriteCombined); sint32 effectiveBaseWidth = width; sint32 effectiveBaseHeight = height; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h index 952fd1de..a03e7cae 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h @@ -101,3 +101,79 @@ inline bool FormatIsRenderable(Latte::E_GX2SURFFMT format) { return !Latte::IsCompressedFormat(format); } + +template +inline bool executeCommand(fmt::format_string fmt, T&&... args) { + std::string command = fmt::format(fmt, std::forward(args)...); + int res = system(command.c_str()); + if (res != 0) + { + cemuLog_log(LogType::Force, "command \"{}\" failed with exit code {}", command, res); + return false; + } + + return true; +} + +class MemoryMappedFile +{ +public: + MemoryMappedFile(const std::string& filePath) + { + // Open the file + m_fd = open(filePath.c_str(), O_RDONLY); + if (m_fd == -1) { + cemuLog_log(LogType::Force, "failed to open file: {}", filePath); + return; + } + + // Get the file size + // Use a loop to handle the case where the file size is 0 (more of a safety net) + struct stat fileStat; + while (true) + { + if (fstat(m_fd, &fileStat) == -1) + { + close(m_fd); + cemuLog_log(LogType::Force, "failed to get file size: {}", filePath); + return; + } + m_fileSize = fileStat.st_size; + + if (m_fileSize == 0) + { + cemuLog_logOnce(LogType::Force, "file size is 0: {}", filePath); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + continue; + } + + break; + } + + // Memory map the file + m_data = mmap(nullptr, m_fileSize, PROT_READ, MAP_PRIVATE, m_fd, 0); + if (m_data == MAP_FAILED) + { + close(m_fd); + cemuLog_log(LogType::Force, "failed to memory map file: {}", filePath); + return; + } + } + + ~MemoryMappedFile() + { + if (m_data && m_data != MAP_FAILED) + munmap(m_data, m_fileSize); + + if (m_fd != -1) + close(m_fd); + } + + uint8* data() const { return static_cast(m_data); } + size_t size() const { return m_fileSize; } + +private: + int m_fd = -1; + void* m_data = nullptr; + size_t m_fileSize = 0; +}; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index c4492e3c..09a25499 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -2,14 +2,21 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" -//#include "Cemu/FileCache/FileCache.h" -//#include "config/ActiveSettings.h" +#include "Cemu/FileCache/FileCache.h" +#include "config/ActiveSettings.h" #include "Cemu/Logging/CemuLogging.h" #include "Common/precompiled.h" #include "GameProfile/GameProfile.h" #include "util/helpers/helpers.h" +#define METAL_AIR_CACHE_NAME "Cemu_AIR_cache" +#define METAL_AIR_CACHE_PATH "/Volumes/" METAL_AIR_CACHE_NAME +#define METAL_AIR_CACHE_SIZE (16 * 1024 * 1024) +#define METAL_AIR_CACHE_BLOCK_COUNT (METAL_AIR_CACHE_SIZE / 512) + static bool s_isLoadingShadersMtl{false}; +static bool s_hasRAMFilesystem{false}; +class FileCache* s_airCache{nullptr}; extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; @@ -21,10 +28,14 @@ public: { if (m_threadsActive.exchange(true)) return; - // create thread pool + + // Create thread pool const uint32 threadCount = 2; for (uint32 i = 0; i < threadCount; ++i) s_threads.emplace_back(&ShaderMtlThreadPool::CompilerThreadFunc, this); + + // Create AIR cache thread + s_airCacheThread = new std::thread(&ShaderMtlThreadPool::AIRCacheThreadFunc, this); } void StopThreads() @@ -36,6 +47,9 @@ public: for (auto& it : s_threads) it.join(); s_threads.clear(); + + s_airCacheThread->join(); + delete s_airCacheThread; } ~ShaderMtlThreadPool() @@ -72,15 +86,48 @@ public: } } + void AIRCacheThreadFunc() + { + SetThreadName("mtlAIRCache"); + while (m_threadsActive.load(std::memory_order::relaxed)) + { + s_airCacheQueueCount.decrementWithWait(); + s_airCacheQueueMutex.lock(); + if (s_airCacheQueue.empty()) + { + s_airCacheQueueMutex.unlock(); + continue; + } + + // Create RAM filesystem + if (!s_hasRAMFilesystem) + { + executeCommand("diskutil erasevolume HFS+ {} $(hdiutil attach -nomount ram://{})", METAL_AIR_CACHE_NAME, METAL_AIR_CACHE_BLOCK_COUNT); + s_hasRAMFilesystem = true; + } + + RendererShaderMtl* job = s_airCacheQueue.front(); + s_airCacheQueue.pop_front(); + s_airCacheQueueMutex.unlock(); + // compile + job->CompileToAIR(); + } + } + bool HasThreadsRunning() const { return m_threadsActive; } public: std::vector s_threads; + std::thread* s_airCacheThread{nullptr}; std::deque s_compilationQueue; CounterSemaphore s_compilationQueueCount; std::mutex s_compilationQueueMutex; + std::deque s_airCacheQueue; + CounterSemaphore s_airCacheQueueCount; + std::mutex s_airCacheQueueMutex; + private: std::atomic m_threadsActive; } shaderMtlThreadPool; @@ -88,18 +135,45 @@ private: // TODO: find out if it would be possible to cache compiled Metal shaders void RendererShaderMtl::ShaderCacheLoading_begin(uint64 cacheTitleId) { + s_isLoadingShadersMtl = true; + + // Open AIR cache + if (s_airCache) + { + delete s_airCache; + s_airCache = nullptr; + } + uint32 airCacheMagic = GeneratePrecompiledCacheId(); + const std::string cacheFilename = fmt::format("{:016x}_air.bin", cacheTitleId); + const fs::path cachePath = ActiveSettings::GetCachePath("shaderCache/precompiled/{}", cacheFilename); + s_airCache = FileCache::Open(cachePath, true, airCacheMagic); + if (!s_airCache) + cemuLog_log(LogType::Force, "Unable to open AIR cache {}", cacheFilename); + // Maximize shader compilation speed static_cast(g_renderer.get())->SetShouldMaximizeConcurrentCompilation(true); } void RendererShaderMtl::ShaderCacheLoading_end() { + s_isLoadingShadersMtl = false; + + // Reset shader compilation speed static_cast(g_renderer.get())->SetShouldMaximizeConcurrentCompilation(false); } void RendererShaderMtl::ShaderCacheLoading_Close() { - // Do nothing + // Close the AIR cache + if (s_airCache) + { + delete s_airCache; + s_airCache = nullptr; + } + + // Close RAM filesystem + if (s_hasRAMFilesystem) + executeCommand("diskutil eject {}", METAL_AIR_CACHE_PATH); } void RendererShaderMtl::Initialize() @@ -172,10 +246,10 @@ bool RendererShaderMtl::ShouldCountCompilation() const return !s_isLoadingShadersMtl && m_isGameShader; } -void RendererShaderMtl::CompileInternal() +MTL::Library* RendererShaderMtl::LibraryFromSource() { + // Compile from source MTL::CompileOptions* options = MTL::CompileOptions::alloc()->init(); - // TODO: always disable fast math for problematic shaders if (g_current_game_profile->GetFastMath()) options->setFastMathEnabled(true); if (g_current_game_profile->GetPositionInvariance()) @@ -186,20 +260,107 @@ void RendererShaderMtl::CompileInternal() options->release(); if (error) { - cemuLog_log(LogType::Force, "failed to create library: {} -> {}", error->localizedDescription()->utf8String(), m_mslCode.c_str()); - FinishCompilation(); - return; + cemuLog_log(LogType::Force, "failed to create library from source: {} -> {}", error->localizedDescription()->utf8String(), m_mslCode.c_str()); + return nullptr; } + + return library; +} + +MTL::Library* RendererShaderMtl::LibraryFromAIR(std::span data) +{ + dispatch_data_t dispatchData = dispatch_data_create(data.data(), data.size(), nullptr, DISPATCH_DATA_DESTRUCTOR_DEFAULT); + + NS::Error* error = nullptr; + MTL::Library* library = m_mtlr->GetDevice()->newLibrary(dispatchData, &error); + if (error) + { + cemuLog_log(LogType::Force, "failed to create library from AIR: {}", error->localizedDescription()->utf8String()); + return nullptr; + } + + return library; +} + +void RendererShaderMtl::CompileInternal() +{ + MTL::Library* library = nullptr; + + // First, try to retrieve the compiled shader from the AIR cache + if (s_isLoadingShadersMtl && (m_isGameShader && !m_isGfxPackShader) && s_airCache) + { + cemu_assert_debug(m_baseHash != 0); + uint64 h1, h2; + GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2); + std::vector cacheFileData; + if (s_airCache->GetFile({ h1, h2 }, cacheFileData)) + { + library = LibraryFromAIR(std::span(cacheFileData.data(), cacheFileData.size())); + FinishCompilation(); + } + } + + // Not in the cache, compile from source + if (!library) + { + // Compile from source + library = LibraryFromSource(); + if (!library) + return; + + // Store in the AIR cache + shaderMtlThreadPool.s_airCacheQueueMutex.lock(); + shaderMtlThreadPool.s_airCacheQueue.push_back(this); + shaderMtlThreadPool.s_airCacheQueueCount.increment(); + shaderMtlThreadPool.s_airCacheQueueMutex.unlock(); + } + m_function = library->newFunction(ToNSString("main0")); library->release(); - FinishCompilation(); - // Count shader compilation if (ShouldCountCompilation()) g_compiled_shaders_total++; } +void RendererShaderMtl::CompileToAIR() +{ + uint64 h1, h2; + GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2); + + // The shader is not in the cache, compile it + std::string baseFilename = fmt::format("{}/{}_{}", METAL_AIR_CACHE_PATH, h1, h2); + + // Source + std::ofstream mslFile; + mslFile.open(fmt::format("{}.metal", baseFilename)); + mslFile << m_mslCode; + mslFile.close(); + + // Compile + if (!executeCommand("xcrun -sdk macosx metal -o {}.ir -c {}.metal -w", baseFilename, baseFilename)) + return; + if (!executeCommand("xcrun -sdk macosx metallib -o {}.metallib {}.ir", baseFilename, baseFilename)) + return; + + // Clean up + executeCommand("rm {}.metal", baseFilename); + executeCommand("rm {}.ir", baseFilename); + + // Load from the newly generated AIR + MemoryMappedFile airFile(fmt::format("{}.metallib", baseFilename)); + std::span airData = std::span(airFile.data(), airFile.size()); + //library = LibraryFromAIR(std::span(airData.data(), airData.size())); + + // Store in the cache + s_airCache->AddFile({ h1, h2 }, airData.data(), airData.size()); + + // Clean up + executeCommand("rm {}.metallib", baseFilename); + + FinishCompilation(); +} + void RendererShaderMtl::FinishCompilation() { m_mslCode.clear(); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h index 40d04c87..a749253e 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h @@ -67,7 +67,13 @@ private: bool ShouldCountCompilation() const; + MTL::Library* LibraryFromSource(); + + MTL::Library* LibraryFromAIR(std::span data); + void CompileInternal(); + void CompileToAIR(); + void FinishCompilation(); }; diff --git a/src/gui/components/wxGameList.cpp b/src/gui/components/wxGameList.cpp index 509c4662..fb03843a 100644 --- a/src/gui/components/wxGameList.cpp +++ b/src/gui/components/wxGameList.cpp @@ -69,6 +69,7 @@ std::list _getCachesPaths(const TitleId& titleId) ActiveSettings::GetCachePath(L"shaderCache/driver/vk/{:016x}.bin", titleId), ActiveSettings::GetCachePath(L"shaderCache/precompiled/{:016x}_spirv.bin", titleId), ActiveSettings::GetCachePath(L"shaderCache/precompiled/{:016x}_gl.bin", titleId), + ActiveSettings::GetCachePath(L"shaderCache/precompiled/{:016x}_air.bin", titleId), ActiveSettings::GetCachePath(L"shaderCache/transferable/{:016x}_shaders.bin", titleId), ActiveSettings::GetCachePath(L"shaderCache/transferable/{:016x}_mtlshaders.bin", titleId), ActiveSettings::GetCachePath(L"shaderCache/transferable/{:016x}_vkpipeline.bin", titleId),