From 1e3a3ef2989299fd0993316082f7d1b26a27dcfa Mon Sep 17 00:00:00 2001 From: Samuliak Date: Wed, 15 Jan 2025 14:27:38 +0100 Subject: [PATCH 1/8] prepare for AIR cache --- .../Latte/Renderer/Metal/LatteTextureMtl.cpp | 5 +- .../HW/Latte/Renderer/Metal/MetalCommon.h | 8 ++ .../Renderer/Metal/RendererShaderMtl.cpp | 92 ++++++++++++++++++- .../Latte/Renderer/Metal/RendererShaderMtl.h | 2 + 4 files changed, 101 insertions(+), 6 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp index f3bd14b9..3c0005ef 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/LatteTextureMtl.cpp @@ -2,9 +2,6 @@ #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" -#include "Common/precompiled.h" -#include "Metal/MTLResource.hpp" -#include "Metal/MTLTexture.hpp" LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddress, Latte::E_GX2SURFFMT format, uint32 width, uint32 height, uint32 depth, uint32 pitch, uint32 mipLevels, uint32 swizzle, Latte::E_HWTILEMODE tileMode, bool isDepth) @@ -12,7 +9,7 @@ LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM { MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init(); desc->setStorageMode(MTL::StorageModePrivate); - desc->setCpuCacheMode(MTL::CPUCacheModeWriteCombined); + //desc->setCpuCacheMode(MTL::CPUCacheModeWriteCombined); sint32 effectiveBaseWidth = width; sint32 effectiveBaseHeight = height; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h index 952fd1de..f3dd1733 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h @@ -101,3 +101,11 @@ inline bool FormatIsRenderable(Latte::E_GX2SURFFMT format) { return !Latte::IsCompressedFormat(format); } + +template +inline void executeCommand(fmt::format_string fmt, T&&... args) { + std::string command = fmt::format(fmt, std::forward(args)...); + int res = system(command.c_str()); + if (res != 0) + cemuLog_log(LogType::Force, "command \"{}\" failed with exit code {}", command, res); +} diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index c4492e3c..dc1256a4 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -2,14 +2,21 @@ #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" -//#include "Cemu/FileCache/FileCache.h" -//#include "config/ActiveSettings.h" +#include "Cemu/FileCache/FileCache.h" +#include "config/ActiveSettings.h" #include "Cemu/Logging/CemuLogging.h" #include "Common/precompiled.h" #include "GameProfile/GameProfile.h" #include "util/helpers/helpers.h" +#define METAL_AIR_CACHE_NAME "Cemu_AIR_cache" +#define METAL_AIR_CACHE_PATH "/Volumes/" METAL_AIR_CACHE_NAME +#define METAL_AIR_CACHE_SIZE (512 * 1024 * 1024) +#define METAL_AIR_CACHE_BLOCK_COUNT (METAL_AIR_CACHE_SIZE / 512) + static bool s_isLoadingShadersMtl{false}; +static std::atomic s_hasRAMFilesystem{false}; +class FileCache* s_airCache{nullptr}; extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_async; @@ -88,12 +95,44 @@ private: // TODO: find out if it would be possible to cache compiled Metal shaders void RendererShaderMtl::ShaderCacheLoading_begin(uint64 cacheTitleId) { + s_isLoadingShadersMtl = true; + + // Open AIR cache + if (s_airCache) + { + delete s_airCache; + s_airCache = nullptr; + } + uint32 airCacheMagic = GeneratePrecompiledCacheId(); + const std::string cacheFilename = fmt::format("{:016x}_air.bin", cacheTitleId); + const fs::path cachePath = ActiveSettings::GetCachePath("shaderCache/precompiled/{}", cacheFilename); + s_airCache = FileCache::Open(cachePath, true, airCacheMagic); + if (!s_airCache) + cemuLog_log(LogType::Force, "Unable to open AIR cache {}", cacheFilename); + // Maximize shader compilation speed static_cast(g_renderer.get())->SetShouldMaximizeConcurrentCompilation(true); } void RendererShaderMtl::ShaderCacheLoading_end() { + s_isLoadingShadersMtl = false; + + // Close the AIR cache + if (s_airCache) + { + delete s_airCache; + s_airCache = nullptr; + } + + // Close RAM filesystem + if (s_hasRAMFilesystem) + { + executeCommand("diskutil eject {}", METAL_AIR_CACHE_PATH); + s_hasRAMFilesystem = false; + } + + // Reset shader compilation speed static_cast(g_renderer.get())->SetShouldMaximizeConcurrentCompilation(false); } @@ -174,6 +213,49 @@ bool RendererShaderMtl::ShouldCountCompilation() const void RendererShaderMtl::CompileInternal() { + // First, try to retrieve the compiled shader from the AIR cache + if (s_isLoadingShadersMtl && (m_isGameShader && !m_isGfxPackShader) && s_airCache) + { + cemu_assert_debug(m_baseHash != 0); + uint64 h1, h2; + GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2); + std::vector cacheFileData; + if (s_airCache->GetFile({ h1, h2 }, cacheFileData)) + { + CompileFromAIR(std::span(cacheFileData.data(), cacheFileData.size())); + FinishCompilation(); + } + else + { + // Ensure that RAM filesystem exists + if (!s_hasRAMFilesystem) + { + s_hasRAMFilesystem = true; + executeCommand("diskutil erasevolume HFS+ {} $(hdiutil attach -nomount ram://{})", METAL_AIR_CACHE_NAME, METAL_AIR_CACHE_BLOCK_COUNT); + } + + // The shader is not in the cache, compile it + std::string filename = fmt::format("{}_{}", h1, h2); + // TODO: store the source + executeCommand("xcrun -sdk macosx metal -o {}.ir -c {}.metal", filename, filename); + executeCommand("xcrun -sdk macosx metallib -o {}.metallib {}.ir", filename, filename); + // TODO: clean up + + // Load from the newly Generated AIR + // std::span airData = ; + //CompileFromAIR(std::span((uint8*)cacheFileData.data(), cacheFileData.size() / sizeof(uint8))); + FinishCompilation(); + + // Store in the cache + uint64 h1, h2; + GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2); + //s_airCache->AddFile({ h1, h2 }, airData.data(), airData.size()); + } + + return; + } + + // Compile from source MTL::CompileOptions* options = MTL::CompileOptions::alloc()->init(); // TODO: always disable fast math for problematic shaders if (g_current_game_profile->GetFastMath()) @@ -200,6 +282,12 @@ void RendererShaderMtl::CompileInternal() g_compiled_shaders_total++; } +void RendererShaderMtl::CompileFromAIR(std::span data) +{ + // TODO: implement this + printf("LOADING SHADER FROM AIR CACHE\n"); +} + void RendererShaderMtl::FinishCompilation() { m_mslCode.clear(); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h index 40d04c87..98d18687 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h @@ -69,5 +69,7 @@ private: void CompileInternal(); + void CompileFromAIR(std::span data); + void FinishCompilation(); }; From a8da524dd4232ab7399cf1d63b3fc37f4b22f2ff Mon Sep 17 00:00:00 2001 From: Samuliak Date: Wed, 15 Jan 2025 17:40:45 +0100 Subject: [PATCH 2/8] use precompiled shaders --- .../HW/Latte/Renderer/Metal/MetalCommon.h | 50 ++++++ .../Renderer/Metal/RendererShaderMtl.cpp | 153 +++++++++++------- .../Latte/Renderer/Metal/RendererShaderMtl.h | 6 +- 3 files changed, 152 insertions(+), 57 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h index f3dd1733..28d92225 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h @@ -109,3 +109,53 @@ inline void executeCommand(fmt::format_string fmt, T&&... args) { if (res != 0) cemuLog_log(LogType::Force, "command \"{}\" failed with exit code {}", command, res); } + +class MemoryMappedFile +{ +public: + MemoryMappedFile(const std::string& filePath) + { + // Open the file + m_fd = open(filePath.c_str(), O_RDONLY); + if (m_fd == -1) { + cemuLog_log(LogType::Force, "failed to open file: {}", filePath); + return; + } + + // Get the file size + struct stat fileStat; + if (fstat(m_fd, &fileStat) == -1) + { + close(m_fd); + cemuLog_log(LogType::Force, "failed to get file size: {}", filePath); + return; + } + m_fileSize = fileStat.st_size; + + // Memory map the file + m_data = mmap(nullptr, m_fileSize, PROT_READ, MAP_PRIVATE, m_fd, 0); + if (m_data == MAP_FAILED) + { + close(m_fd); + cemuLog_log(LogType::Force, "failed to memory map file: {}", filePath); + return; + } + } + + ~MemoryMappedFile() + { + if (m_data && m_data != MAP_FAILED) + munmap(m_data, m_fileSize); + + if (m_fd != -1) + close(m_fd); + } + + uint8* data() const { return static_cast(m_data); } + size_t size() const { return m_fileSize; } + +private: + int m_fd = -1; + void* m_data = nullptr; + size_t m_fileSize = 0; +}; diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index dc1256a4..e6aa0cab 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -128,7 +128,7 @@ void RendererShaderMtl::ShaderCacheLoading_end() // Close RAM filesystem if (s_hasRAMFilesystem) { - executeCommand("diskutil eject {}", METAL_AIR_CACHE_PATH); + //executeCommand("diskutil eject {}", METAL_AIR_CACHE_PATH); s_hasRAMFilesystem = false; } @@ -211,50 +211,8 @@ bool RendererShaderMtl::ShouldCountCompilation() const return !s_isLoadingShadersMtl && m_isGameShader; } -void RendererShaderMtl::CompileInternal() +MTL::Library* RendererShaderMtl::LibraryFromSource() { - // First, try to retrieve the compiled shader from the AIR cache - if (s_isLoadingShadersMtl && (m_isGameShader && !m_isGfxPackShader) && s_airCache) - { - cemu_assert_debug(m_baseHash != 0); - uint64 h1, h2; - GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2); - std::vector cacheFileData; - if (s_airCache->GetFile({ h1, h2 }, cacheFileData)) - { - CompileFromAIR(std::span(cacheFileData.data(), cacheFileData.size())); - FinishCompilation(); - } - else - { - // Ensure that RAM filesystem exists - if (!s_hasRAMFilesystem) - { - s_hasRAMFilesystem = true; - executeCommand("diskutil erasevolume HFS+ {} $(hdiutil attach -nomount ram://{})", METAL_AIR_CACHE_NAME, METAL_AIR_CACHE_BLOCK_COUNT); - } - - // The shader is not in the cache, compile it - std::string filename = fmt::format("{}_{}", h1, h2); - // TODO: store the source - executeCommand("xcrun -sdk macosx metal -o {}.ir -c {}.metal", filename, filename); - executeCommand("xcrun -sdk macosx metallib -o {}.metallib {}.ir", filename, filename); - // TODO: clean up - - // Load from the newly Generated AIR - // std::span airData = ; - //CompileFromAIR(std::span((uint8*)cacheFileData.data(), cacheFileData.size() / sizeof(uint8))); - FinishCompilation(); - - // Store in the cache - uint64 h1, h2; - GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2); - //s_airCache->AddFile({ h1, h2 }, airData.data(), airData.size()); - } - - return; - } - // Compile from source MTL::CompileOptions* options = MTL::CompileOptions::alloc()->init(); // TODO: always disable fast math for problematic shaders @@ -266,28 +224,113 @@ void RendererShaderMtl::CompileInternal() NS::Error* error = nullptr; MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(m_mslCode), options, &error); options->release(); + FinishCompilation(); if (error) { - cemuLog_log(LogType::Force, "failed to create library: {} -> {}", error->localizedDescription()->utf8String(), m_mslCode.c_str()); - FinishCompilation(); - return; + cemuLog_log(LogType::Force, "failed to create library from source: {} -> {}", error->localizedDescription()->utf8String(), m_mslCode.c_str()); + return nullptr; } + + return library; +} + +MTL::Library* RendererShaderMtl::LibraryFromAIR(std::span data) +{ + dispatch_data_t dispatchData = dispatch_data_create(data.data(), data.size(), nullptr, DISPATCH_DATA_DESTRUCTOR_DEFAULT); + + NS::Error* error = nullptr; + MTL::Library* library = m_mtlr->GetDevice()->newLibrary(dispatchData, &error); + FinishCompilation(); + printf("AIR size: %zu\n", data.size()); + if (error) + { + cemuLog_log(LogType::Force, "failed to create library from AIR: {}", error->localizedDescription()->utf8String()); + return nullptr; + } + + return library; +} + +void RendererShaderMtl::CompileInternal() +{ + MTL::Library* library; + + // First, try to retrieve the compiled shader from the AIR cache + if (s_isLoadingShadersMtl && (m_isGameShader && !m_isGfxPackShader) && s_airCache) + { + cemu_assert_debug(m_baseHash != 0); + uint64 h1, h2; + GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2); + std::vector cacheFileData; + if (s_airCache->GetFile({ h1, h2 }, cacheFileData)) + { + library = LibraryFromAIR(std::span(cacheFileData.data(), cacheFileData.size())); + } + else + { + // Ensure that RAM filesystem exists + static std::atomic s_creatingRAMFilesystem{false}; + if (!s_hasRAMFilesystem) + { + if (s_creatingRAMFilesystem) + { + while (!s_hasRAMFilesystem) + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + } + else + { + s_creatingRAMFilesystem = true; + executeCommand("diskutil erasevolume HFS+ {} $(hdiutil attach -nomount ram://{})", METAL_AIR_CACHE_NAME, METAL_AIR_CACHE_BLOCK_COUNT); + s_creatingRAMFilesystem = false; + } + s_hasRAMFilesystem = true; + } + + // The shader is not in the cache, compile it + std::string baseFilename = fmt::format("{}/{}_{}", METAL_AIR_CACHE_PATH, h1, h2); + + // Source + std::ofstream mslFile; + mslFile.open(fmt::format("{}.metal", baseFilename)); + mslFile << m_mslCode; + mslFile.close(); + + // Compile + executeCommand("xcrun -sdk macosx metal -o {}.ir -c {}.metal -Wno-unused-variable -Wno-sign-compare", baseFilename, baseFilename); + executeCommand("xcrun -sdk macosx metallib -o {}.metallib {}.ir", baseFilename, baseFilename); + + // Clean up + executeCommand("rm {}.metal", baseFilename); + executeCommand("rm {}.ir", baseFilename); + + // Load from the newly Generated AIR + MemoryMappedFile airFile(fmt::format("{}.metallib", baseFilename)); + std::span airData = std::span(airFile.data(), airFile.size()); + library = LibraryFromAIR(std::span(cacheFileData.data(), cacheFileData.size())); + + // Store in the cache + uint64 h1, h2; + GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2); + s_airCache->AddFile({ h1, h2 }, airData.data(), airData.size()); + } + } + else + { + // Compile from source + library = LibraryFromSource(); + } + + if (!library) + return; + m_function = library->newFunction(ToNSString("main0")); library->release(); - FinishCompilation(); - // Count shader compilation if (ShouldCountCompilation()) g_compiled_shaders_total++; } -void RendererShaderMtl::CompileFromAIR(std::span data) -{ - // TODO: implement this - printf("LOADING SHADER FROM AIR CACHE\n"); -} - void RendererShaderMtl::FinishCompilation() { m_mslCode.clear(); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h index 98d18687..98973a0e 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h @@ -67,9 +67,11 @@ private: bool ShouldCountCompilation() const; - void CompileInternal(); + MTL::Library* LibraryFromSource(); - void CompileFromAIR(std::span data); + MTL::Library* LibraryFromAIR(std::span data); + + void CompileInternal(); void FinishCompilation(); }; From 371c08992362e4831f941eab0c94b61f59171738 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Wed, 15 Jan 2025 19:25:29 +0100 Subject: [PATCH 3/8] fix: AIR shaders not getting loaded properly --- .../HW/Latte/Renderer/Metal/MetalCommon.h | 23 +++++++++++++++---- .../Renderer/Metal/RendererShaderMtl.cpp | 11 ++++----- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h index 28d92225..2543a9fc 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h @@ -123,14 +123,27 @@ public: } // Get the file size + // Use a loop to handle the case where the file size is 0 (more of a safety net) struct stat fileStat; - if (fstat(m_fd, &fileStat) == -1) + while (true) { - close(m_fd); - cemuLog_log(LogType::Force, "failed to get file size: {}", filePath); - return; + if (fstat(m_fd, &fileStat) == -1) + { + close(m_fd); + cemuLog_log(LogType::Force, "failed to get file size: {}", filePath); + return; + } + m_fileSize = fileStat.st_size; + + if (m_fileSize == 0) + { + cemuLog_logOnce(LogType::Force, "file size is 0: {}", filePath); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + continue; + } + + break; } - m_fileSize = fileStat.st_size; // Memory map the file m_data = mmap(nullptr, m_fileSize, PROT_READ, MAP_PRIVATE, m_fd, 0); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index e6aa0cab..303ba1e9 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -29,7 +29,7 @@ public: if (m_threadsActive.exchange(true)) return; // create thread pool - const uint32 threadCount = 2; + const uint32 threadCount = 8; for (uint32 i = 0; i < threadCount; ++i) s_threads.emplace_back(&ShaderMtlThreadPool::CompilerThreadFunc, this); } @@ -128,7 +128,7 @@ void RendererShaderMtl::ShaderCacheLoading_end() // Close RAM filesystem if (s_hasRAMFilesystem) { - //executeCommand("diskutil eject {}", METAL_AIR_CACHE_PATH); + executeCommand("diskutil eject {}", METAL_AIR_CACHE_PATH); s_hasRAMFilesystem = false; } @@ -241,7 +241,6 @@ MTL::Library* RendererShaderMtl::LibraryFromAIR(std::span data) NS::Error* error = nullptr; MTL::Library* library = m_mtlr->GetDevice()->newLibrary(dispatchData, &error); FinishCompilation(); - printf("AIR size: %zu\n", data.size()); if (error) { cemuLog_log(LogType::Force, "failed to create library from AIR: {}", error->localizedDescription()->utf8String()); @@ -296,17 +295,17 @@ void RendererShaderMtl::CompileInternal() mslFile.close(); // Compile - executeCommand("xcrun -sdk macosx metal -o {}.ir -c {}.metal -Wno-unused-variable -Wno-sign-compare", baseFilename, baseFilename); + executeCommand("xcrun -sdk macosx metal -o {}.ir -c {}.metal -w", baseFilename, baseFilename); executeCommand("xcrun -sdk macosx metallib -o {}.metallib {}.ir", baseFilename, baseFilename); // Clean up executeCommand("rm {}.metal", baseFilename); executeCommand("rm {}.ir", baseFilename); - // Load from the newly Generated AIR + // Load from the newly generated AIR MemoryMappedFile airFile(fmt::format("{}.metallib", baseFilename)); std::span airData = std::span(airFile.data(), airFile.size()); - library = LibraryFromAIR(std::span(cacheFileData.data(), cacheFileData.size())); + library = LibraryFromAIR(std::span(airData.data(), airData.size())); // Store in the cache uint64 h1, h2; From f5eb184969361be46f68dbcd9cd55a266d2ac7a8 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Wed, 15 Jan 2025 19:30:46 +0100 Subject: [PATCH 4/8] check if shaders compiled successfully --- src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h | 7 ++++++- src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp | 8 +++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h index 2543a9fc..a03e7cae 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/MetalCommon.h @@ -103,11 +103,16 @@ inline bool FormatIsRenderable(Latte::E_GX2SURFFMT format) } template -inline void executeCommand(fmt::format_string fmt, T&&... args) { +inline bool executeCommand(fmt::format_string fmt, T&&... args) { std::string command = fmt::format(fmt, std::forward(args)...); int res = system(command.c_str()); if (res != 0) + { cemuLog_log(LogType::Force, "command \"{}\" failed with exit code {}", command, res); + return false; + } + + return true; } class MemoryMappedFile diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index 303ba1e9..810c827b 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -29,7 +29,7 @@ public: if (m_threadsActive.exchange(true)) return; // create thread pool - const uint32 threadCount = 8; + const uint32 threadCount = 2; for (uint32 i = 0; i < threadCount; ++i) s_threads.emplace_back(&ShaderMtlThreadPool::CompilerThreadFunc, this); } @@ -295,8 +295,10 @@ void RendererShaderMtl::CompileInternal() mslFile.close(); // Compile - executeCommand("xcrun -sdk macosx metal -o {}.ir -c {}.metal -w", baseFilename, baseFilename); - executeCommand("xcrun -sdk macosx metallib -o {}.metallib {}.ir", baseFilename, baseFilename); + if (!executeCommand("xcrun -sdk macosx metal -o {}.ir -c {}.metal -w", baseFilename, baseFilename)) + return; + if (!executeCommand("xcrun -sdk macosx metallib -o {}.metallib {}.ir", baseFilename, baseFilename)) + return; // Clean up executeCommand("rm {}.metal", baseFilename); From 5af904b5e2630ac1b34885e4d51f4c994a62f497 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Wed, 15 Jan 2025 19:45:59 +0100 Subject: [PATCH 5/8] delete unused metallibs --- src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index 810c827b..0ef4c5b7 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -11,7 +11,7 @@ #define METAL_AIR_CACHE_NAME "Cemu_AIR_cache" #define METAL_AIR_CACHE_PATH "/Volumes/" METAL_AIR_CACHE_NAME -#define METAL_AIR_CACHE_SIZE (512 * 1024 * 1024) +#define METAL_AIR_CACHE_SIZE (16 * 1024 * 1024) #define METAL_AIR_CACHE_BLOCK_COUNT (METAL_AIR_CACHE_SIZE / 512) static bool s_isLoadingShadersMtl{false}; @@ -313,6 +313,9 @@ void RendererShaderMtl::CompileInternal() uint64 h1, h2; GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2); s_airCache->AddFile({ h1, h2 }, airData.data(), airData.size()); + + // Clean up + executeCommand("rm {}.metallib", baseFilename); } } else From 0b1932c206c64adba37070aea9e4c9c10c4801e0 Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 16 Jan 2025 15:53:51 +0100 Subject: [PATCH 6/8] compile shaders to AIR at runtime --- .../Renderer/Metal/RendererShaderMtl.cpp | 172 ++++++++++-------- .../Latte/Renderer/Metal/RendererShaderMtl.h | 2 + 2 files changed, 101 insertions(+), 73 deletions(-) diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp index 0ef4c5b7..09a25499 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.cpp @@ -15,7 +15,7 @@ #define METAL_AIR_CACHE_BLOCK_COUNT (METAL_AIR_CACHE_SIZE / 512) static bool s_isLoadingShadersMtl{false}; -static std::atomic s_hasRAMFilesystem{false}; +static bool s_hasRAMFilesystem{false}; class FileCache* s_airCache{nullptr}; extern std::atomic_int g_compiled_shaders_total; @@ -28,10 +28,14 @@ public: { if (m_threadsActive.exchange(true)) return; - // create thread pool + + // Create thread pool const uint32 threadCount = 2; for (uint32 i = 0; i < threadCount; ++i) s_threads.emplace_back(&ShaderMtlThreadPool::CompilerThreadFunc, this); + + // Create AIR cache thread + s_airCacheThread = new std::thread(&ShaderMtlThreadPool::AIRCacheThreadFunc, this); } void StopThreads() @@ -43,6 +47,9 @@ public: for (auto& it : s_threads) it.join(); s_threads.clear(); + + s_airCacheThread->join(); + delete s_airCacheThread; } ~ShaderMtlThreadPool() @@ -79,15 +86,48 @@ public: } } + void AIRCacheThreadFunc() + { + SetThreadName("mtlAIRCache"); + while (m_threadsActive.load(std::memory_order::relaxed)) + { + s_airCacheQueueCount.decrementWithWait(); + s_airCacheQueueMutex.lock(); + if (s_airCacheQueue.empty()) + { + s_airCacheQueueMutex.unlock(); + continue; + } + + // Create RAM filesystem + if (!s_hasRAMFilesystem) + { + executeCommand("diskutil erasevolume HFS+ {} $(hdiutil attach -nomount ram://{})", METAL_AIR_CACHE_NAME, METAL_AIR_CACHE_BLOCK_COUNT); + s_hasRAMFilesystem = true; + } + + RendererShaderMtl* job = s_airCacheQueue.front(); + s_airCacheQueue.pop_front(); + s_airCacheQueueMutex.unlock(); + // compile + job->CompileToAIR(); + } + } + bool HasThreadsRunning() const { return m_threadsActive; } public: std::vector s_threads; + std::thread* s_airCacheThread{nullptr}; std::deque s_compilationQueue; CounterSemaphore s_compilationQueueCount; std::mutex s_compilationQueueMutex; + std::deque s_airCacheQueue; + CounterSemaphore s_airCacheQueueCount; + std::mutex s_airCacheQueueMutex; + private: std::atomic m_threadsActive; } shaderMtlThreadPool; @@ -118,6 +158,12 @@ void RendererShaderMtl::ShaderCacheLoading_end() { s_isLoadingShadersMtl = false; + // Reset shader compilation speed + static_cast(g_renderer.get())->SetShouldMaximizeConcurrentCompilation(false); +} + +void RendererShaderMtl::ShaderCacheLoading_Close() +{ // Close the AIR cache if (s_airCache) { @@ -127,18 +173,7 @@ void RendererShaderMtl::ShaderCacheLoading_end() // Close RAM filesystem if (s_hasRAMFilesystem) - { executeCommand("diskutil eject {}", METAL_AIR_CACHE_PATH); - s_hasRAMFilesystem = false; - } - - // Reset shader compilation speed - static_cast(g_renderer.get())->SetShouldMaximizeConcurrentCompilation(false); -} - -void RendererShaderMtl::ShaderCacheLoading_Close() -{ - // Do nothing } void RendererShaderMtl::Initialize() @@ -215,7 +250,6 @@ MTL::Library* RendererShaderMtl::LibraryFromSource() { // Compile from source MTL::CompileOptions* options = MTL::CompileOptions::alloc()->init(); - // TODO: always disable fast math for problematic shaders if (g_current_game_profile->GetFastMath()) options->setFastMathEnabled(true); if (g_current_game_profile->GetPositionInvariance()) @@ -224,7 +258,6 @@ MTL::Library* RendererShaderMtl::LibraryFromSource() NS::Error* error = nullptr; MTL::Library* library = m_mtlr->GetDevice()->newLibrary(ToNSString(m_mslCode), options, &error); options->release(); - FinishCompilation(); if (error) { cemuLog_log(LogType::Force, "failed to create library from source: {} -> {}", error->localizedDescription()->utf8String(), m_mslCode.c_str()); @@ -240,7 +273,6 @@ MTL::Library* RendererShaderMtl::LibraryFromAIR(std::span data) NS::Error* error = nullptr; MTL::Library* library = m_mtlr->GetDevice()->newLibrary(dispatchData, &error); - FinishCompilation(); if (error) { cemuLog_log(LogType::Force, "failed to create library from AIR: {}", error->localizedDescription()->utf8String()); @@ -252,7 +284,7 @@ MTL::Library* RendererShaderMtl::LibraryFromAIR(std::span data) void RendererShaderMtl::CompileInternal() { - MTL::Library* library; + MTL::Library* library = nullptr; // First, try to retrieve the compiled shader from the AIR cache if (s_isLoadingShadersMtl && (m_isGameShader && !m_isGfxPackShader) && s_airCache) @@ -264,68 +296,24 @@ void RendererShaderMtl::CompileInternal() if (s_airCache->GetFile({ h1, h2 }, cacheFileData)) { library = LibraryFromAIR(std::span(cacheFileData.data(), cacheFileData.size())); - } - else - { - // Ensure that RAM filesystem exists - static std::atomic s_creatingRAMFilesystem{false}; - if (!s_hasRAMFilesystem) - { - if (s_creatingRAMFilesystem) - { - while (!s_hasRAMFilesystem) - std::this_thread::sleep_for(std::chrono::milliseconds(500)); - } - else - { - s_creatingRAMFilesystem = true; - executeCommand("diskutil erasevolume HFS+ {} $(hdiutil attach -nomount ram://{})", METAL_AIR_CACHE_NAME, METAL_AIR_CACHE_BLOCK_COUNT); - s_creatingRAMFilesystem = false; - } - s_hasRAMFilesystem = true; - } - - // The shader is not in the cache, compile it - std::string baseFilename = fmt::format("{}/{}_{}", METAL_AIR_CACHE_PATH, h1, h2); - - // Source - std::ofstream mslFile; - mslFile.open(fmt::format("{}.metal", baseFilename)); - mslFile << m_mslCode; - mslFile.close(); - - // Compile - if (!executeCommand("xcrun -sdk macosx metal -o {}.ir -c {}.metal -w", baseFilename, baseFilename)) - return; - if (!executeCommand("xcrun -sdk macosx metallib -o {}.metallib {}.ir", baseFilename, baseFilename)) - return; - - // Clean up - executeCommand("rm {}.metal", baseFilename); - executeCommand("rm {}.ir", baseFilename); - - // Load from the newly generated AIR - MemoryMappedFile airFile(fmt::format("{}.metallib", baseFilename)); - std::span airData = std::span(airFile.data(), airFile.size()); - library = LibraryFromAIR(std::span(airData.data(), airData.size())); - - // Store in the cache - uint64 h1, h2; - GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2); - s_airCache->AddFile({ h1, h2 }, airData.data(), airData.size()); - - // Clean up - executeCommand("rm {}.metallib", baseFilename); + FinishCompilation(); } } - else + + // Not in the cache, compile from source + if (!library) { // Compile from source library = LibraryFromSource(); - } + if (!library) + return; - if (!library) - return; + // Store in the AIR cache + shaderMtlThreadPool.s_airCacheQueueMutex.lock(); + shaderMtlThreadPool.s_airCacheQueue.push_back(this); + shaderMtlThreadPool.s_airCacheQueueCount.increment(); + shaderMtlThreadPool.s_airCacheQueueMutex.unlock(); + } m_function = library->newFunction(ToNSString("main0")); library->release(); @@ -335,6 +323,44 @@ void RendererShaderMtl::CompileInternal() g_compiled_shaders_total++; } +void RendererShaderMtl::CompileToAIR() +{ + uint64 h1, h2; + GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2); + + // The shader is not in the cache, compile it + std::string baseFilename = fmt::format("{}/{}_{}", METAL_AIR_CACHE_PATH, h1, h2); + + // Source + std::ofstream mslFile; + mslFile.open(fmt::format("{}.metal", baseFilename)); + mslFile << m_mslCode; + mslFile.close(); + + // Compile + if (!executeCommand("xcrun -sdk macosx metal -o {}.ir -c {}.metal -w", baseFilename, baseFilename)) + return; + if (!executeCommand("xcrun -sdk macosx metallib -o {}.metallib {}.ir", baseFilename, baseFilename)) + return; + + // Clean up + executeCommand("rm {}.metal", baseFilename); + executeCommand("rm {}.ir", baseFilename); + + // Load from the newly generated AIR + MemoryMappedFile airFile(fmt::format("{}.metallib", baseFilename)); + std::span airData = std::span(airFile.data(), airFile.size()); + //library = LibraryFromAIR(std::span(airData.data(), airData.size())); + + // Store in the cache + s_airCache->AddFile({ h1, h2 }, airData.data(), airData.size()); + + // Clean up + executeCommand("rm {}.metallib", baseFilename); + + FinishCompilation(); +} + void RendererShaderMtl::FinishCompilation() { m_mslCode.clear(); diff --git a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h index 98973a0e..a749253e 100644 --- a/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h +++ b/src/Cafe/HW/Latte/Renderer/Metal/RendererShaderMtl.h @@ -73,5 +73,7 @@ private: void CompileInternal(); + void CompileToAIR(); + void FinishCompilation(); }; From 40264302c22bf1bf2c0570230fe2601fa3789ffa Mon Sep 17 00:00:00 2001 From: Samuliak Date: Thu, 16 Jan 2025 16:36:02 +0100 Subject: [PATCH 7/8] use the ENABLE_METAL macro --- src/Cafe/HW/Latte/Core/LatteShader.cpp | 32 ++++++++++++++------------ 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/Cafe/HW/Latte/Core/LatteShader.cpp b/src/Cafe/HW/Latte/Core/LatteShader.cpp index 7ad25884..82ae7990 100644 --- a/src/Cafe/HW/Latte/Core/LatteShader.cpp +++ b/src/Cafe/HW/Latte/Core/LatteShader.cpp @@ -524,6 +524,7 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize, if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF()) vsHash += 0x1537; +#if ENABLE_METAL if (g_renderer->GetType() == RendererAPI::Metal) { if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually) @@ -542,27 +543,28 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize, if (!usesGeometryShader) { - // Rasterization - bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); + // Rasterization + bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); - // HACK - if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) - rasterizationEnabled = true; + // HACK + if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) + rasterizationEnabled = true; - const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL; - uint32 cullFront = polygonControlReg.get_CULL_FRONT(); - uint32 cullBack = polygonControlReg.get_CULL_BACK(); - if (cullFront && cullBack) - rasterizationEnabled = false; + const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL; + uint32 cullFront = polygonControlReg.get_CULL_FRONT(); + uint32 cullBack = polygonControlReg.get_CULL_BACK(); + if (cullFront && cullBack) + rasterizationEnabled = false; - if (rasterizationEnabled) - vsHash += 51ULL; + if (rasterizationEnabled) + vsHash += 51ULL; - // Vertex fetch - if (_activeFetchShader->mtlFetchVertexManually) - vsHash += 349ULL; + // Vertex fetch + if (_activeFetchShader->mtlFetchVertexManually) + vsHash += 349ULL; } } +#endif _shaderBaseHash_vs = vsHash; } From 770063506de62a842c1b1a0f2b359471a289d50d Mon Sep 17 00:00:00 2001 From: Samuliak Date: Fri, 17 Jan 2025 13:52:18 +0100 Subject: [PATCH 8/8] add AIR cache to cache paths --- src/gui/components/wxGameList.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gui/components/wxGameList.cpp b/src/gui/components/wxGameList.cpp index 509c4662..fb03843a 100644 --- a/src/gui/components/wxGameList.cpp +++ b/src/gui/components/wxGameList.cpp @@ -69,6 +69,7 @@ std::list _getCachesPaths(const TitleId& titleId) ActiveSettings::GetCachePath(L"shaderCache/driver/vk/{:016x}.bin", titleId), ActiveSettings::GetCachePath(L"shaderCache/precompiled/{:016x}_spirv.bin", titleId), ActiveSettings::GetCachePath(L"shaderCache/precompiled/{:016x}_gl.bin", titleId), + ActiveSettings::GetCachePath(L"shaderCache/precompiled/{:016x}_air.bin", titleId), ActiveSettings::GetCachePath(L"shaderCache/transferable/{:016x}_shaders.bin", titleId), ActiveSettings::GetCachePath(L"shaderCache/transferable/{:016x}_mtlshaders.bin", titleId), ActiveSettings::GetCachePath(L"shaderCache/transferable/{:016x}_vkpipeline.bin", titleId),