Merge pull request #18 from SamoZ256/metal-air-cache

AIR cache
This commit is contained in:
SamoZ256 2025-01-17 14:02:50 +01:00 committed by GitHub
commit 21e7466670
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 273 additions and 30 deletions

View file

@ -524,6 +524,7 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF()) if (LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_CLIP_SPACE_DEF())
vsHash += 0x1537; vsHash += 0x1537;
#if ENABLE_METAL
if (g_renderer->GetType() == RendererAPI::Metal) if (g_renderer->GetType() == RendererAPI::Metal)
{ {
if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually) if (usesGeometryShader || _activeFetchShader->mtlFetchVertexManually)
@ -542,27 +543,28 @@ void LatteSHRC_UpdateVSBaseHash(uint8* vertexShaderPtr, uint32 vertexShaderSize,
if (!usesGeometryShader) if (!usesGeometryShader)
{ {
// Rasterization // Rasterization
bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL(); bool rasterizationEnabled = !LatteGPUState.contextNew.PA_CL_CLIP_CNTL.get_DX_RASTERIZATION_KILL();
// HACK // HACK
if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA()) if (!LatteGPUState.contextNew.PA_CL_VTE_CNTL.get_VPORT_X_OFFSET_ENA())
rasterizationEnabled = true; rasterizationEnabled = true;
const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL; const auto& polygonControlReg = LatteGPUState.contextNew.PA_SU_SC_MODE_CNTL;
uint32 cullFront = polygonControlReg.get_CULL_FRONT(); uint32 cullFront = polygonControlReg.get_CULL_FRONT();
uint32 cullBack = polygonControlReg.get_CULL_BACK(); uint32 cullBack = polygonControlReg.get_CULL_BACK();
if (cullFront && cullBack) if (cullFront && cullBack)
rasterizationEnabled = false; rasterizationEnabled = false;
if (rasterizationEnabled) if (rasterizationEnabled)
vsHash += 51ULL; vsHash += 51ULL;
// Vertex fetch // Vertex fetch
if (_activeFetchShader->mtlFetchVertexManually) if (_activeFetchShader->mtlFetchVertexManually)
vsHash += 349ULL; vsHash += 349ULL;
} }
} }
#endif
_shaderBaseHash_vs = vsHash; _shaderBaseHash_vs = vsHash;
} }

View file

@ -2,9 +2,6 @@
#include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteTextureViewMtl.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h" #include "Cafe/HW/Latte/Renderer/Metal/LatteToMtl.h"
#include "Common/precompiled.h"
#include "Metal/MTLResource.hpp"
#include "Metal/MTLTexture.hpp"
LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddress, Latte::E_GX2SURFFMT format, uint32 width, uint32 height, uint32 depth, uint32 pitch, uint32 mipLevels, uint32 swizzle, LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM dim, MPTR physAddress, MPTR physMipAddress, Latte::E_GX2SURFFMT format, uint32 width, uint32 height, uint32 depth, uint32 pitch, uint32 mipLevels, uint32 swizzle,
Latte::E_HWTILEMODE tileMode, bool isDepth) Latte::E_HWTILEMODE tileMode, bool isDepth)
@ -12,7 +9,7 @@ LatteTextureMtl::LatteTextureMtl(class MetalRenderer* mtlRenderer, Latte::E_DIM
{ {
MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init(); MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init();
desc->setStorageMode(MTL::StorageModePrivate); desc->setStorageMode(MTL::StorageModePrivate);
desc->setCpuCacheMode(MTL::CPUCacheModeWriteCombined); //desc->setCpuCacheMode(MTL::CPUCacheModeWriteCombined);
sint32 effectiveBaseWidth = width; sint32 effectiveBaseWidth = width;
sint32 effectiveBaseHeight = height; sint32 effectiveBaseHeight = height;

View file

@ -101,3 +101,79 @@ inline bool FormatIsRenderable(Latte::E_GX2SURFFMT format)
{ {
return !Latte::IsCompressedFormat(format); return !Latte::IsCompressedFormat(format);
} }
template <typename... T>
inline bool executeCommand(fmt::format_string<T...> fmt, T&&... args) {
std::string command = fmt::format(fmt, std::forward<T>(args)...);
int res = system(command.c_str());
if (res != 0)
{
cemuLog_log(LogType::Force, "command \"{}\" failed with exit code {}", command, res);
return false;
}
return true;
}
class MemoryMappedFile
{
public:
MemoryMappedFile(const std::string& filePath)
{
// Open the file
m_fd = open(filePath.c_str(), O_RDONLY);
if (m_fd == -1) {
cemuLog_log(LogType::Force, "failed to open file: {}", filePath);
return;
}
// Get the file size
// Use a loop to handle the case where the file size is 0 (more of a safety net)
struct stat fileStat;
while (true)
{
if (fstat(m_fd, &fileStat) == -1)
{
close(m_fd);
cemuLog_log(LogType::Force, "failed to get file size: {}", filePath);
return;
}
m_fileSize = fileStat.st_size;
if (m_fileSize == 0)
{
cemuLog_logOnce(LogType::Force, "file size is 0: {}", filePath);
std::this_thread::sleep_for(std::chrono::milliseconds(10));
continue;
}
break;
}
// Memory map the file
m_data = mmap(nullptr, m_fileSize, PROT_READ, MAP_PRIVATE, m_fd, 0);
if (m_data == MAP_FAILED)
{
close(m_fd);
cemuLog_log(LogType::Force, "failed to memory map file: {}", filePath);
return;
}
}
~MemoryMappedFile()
{
if (m_data && m_data != MAP_FAILED)
munmap(m_data, m_fileSize);
if (m_fd != -1)
close(m_fd);
}
uint8* data() const { return static_cast<uint8*>(m_data); }
size_t size() const { return m_fileSize; }
private:
int m_fd = -1;
void* m_data = nullptr;
size_t m_fileSize = 0;
};

View file

@ -2,14 +2,21 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h" #include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
//#include "Cemu/FileCache/FileCache.h" #include "Cemu/FileCache/FileCache.h"
//#include "config/ActiveSettings.h" #include "config/ActiveSettings.h"
#include "Cemu/Logging/CemuLogging.h" #include "Cemu/Logging/CemuLogging.h"
#include "Common/precompiled.h" #include "Common/precompiled.h"
#include "GameProfile/GameProfile.h" #include "GameProfile/GameProfile.h"
#include "util/helpers/helpers.h" #include "util/helpers/helpers.h"
#define METAL_AIR_CACHE_NAME "Cemu_AIR_cache"
#define METAL_AIR_CACHE_PATH "/Volumes/" METAL_AIR_CACHE_NAME
#define METAL_AIR_CACHE_SIZE (16 * 1024 * 1024)
#define METAL_AIR_CACHE_BLOCK_COUNT (METAL_AIR_CACHE_SIZE / 512)
static bool s_isLoadingShadersMtl{false}; static bool s_isLoadingShadersMtl{false};
static bool s_hasRAMFilesystem{false};
class FileCache* s_airCache{nullptr};
extern std::atomic_int g_compiled_shaders_total; extern std::atomic_int g_compiled_shaders_total;
extern std::atomic_int g_compiled_shaders_async; extern std::atomic_int g_compiled_shaders_async;
@ -21,10 +28,14 @@ public:
{ {
if (m_threadsActive.exchange(true)) if (m_threadsActive.exchange(true))
return; return;
// create thread pool
// Create thread pool
const uint32 threadCount = 2; const uint32 threadCount = 2;
for (uint32 i = 0; i < threadCount; ++i) for (uint32 i = 0; i < threadCount; ++i)
s_threads.emplace_back(&ShaderMtlThreadPool::CompilerThreadFunc, this); s_threads.emplace_back(&ShaderMtlThreadPool::CompilerThreadFunc, this);
// Create AIR cache thread
s_airCacheThread = new std::thread(&ShaderMtlThreadPool::AIRCacheThreadFunc, this);
} }
void StopThreads() void StopThreads()
@ -36,6 +47,9 @@ public:
for (auto& it : s_threads) for (auto& it : s_threads)
it.join(); it.join();
s_threads.clear(); s_threads.clear();
s_airCacheThread->join();
delete s_airCacheThread;
} }
~ShaderMtlThreadPool() ~ShaderMtlThreadPool()
@ -72,15 +86,48 @@ public:
} }
} }
void AIRCacheThreadFunc()
{
SetThreadName("mtlAIRCache");
while (m_threadsActive.load(std::memory_order::relaxed))
{
s_airCacheQueueCount.decrementWithWait();
s_airCacheQueueMutex.lock();
if (s_airCacheQueue.empty())
{
s_airCacheQueueMutex.unlock();
continue;
}
// Create RAM filesystem
if (!s_hasRAMFilesystem)
{
executeCommand("diskutil erasevolume HFS+ {} $(hdiutil attach -nomount ram://{})", METAL_AIR_CACHE_NAME, METAL_AIR_CACHE_BLOCK_COUNT);
s_hasRAMFilesystem = true;
}
RendererShaderMtl* job = s_airCacheQueue.front();
s_airCacheQueue.pop_front();
s_airCacheQueueMutex.unlock();
// compile
job->CompileToAIR();
}
}
bool HasThreadsRunning() const { return m_threadsActive; } bool HasThreadsRunning() const { return m_threadsActive; }
public: public:
std::vector<std::thread> s_threads; std::vector<std::thread> s_threads;
std::thread* s_airCacheThread{nullptr};
std::deque<RendererShaderMtl*> s_compilationQueue; std::deque<RendererShaderMtl*> s_compilationQueue;
CounterSemaphore s_compilationQueueCount; CounterSemaphore s_compilationQueueCount;
std::mutex s_compilationQueueMutex; std::mutex s_compilationQueueMutex;
std::deque<RendererShaderMtl*> s_airCacheQueue;
CounterSemaphore s_airCacheQueueCount;
std::mutex s_airCacheQueueMutex;
private: private:
std::atomic<bool> m_threadsActive; std::atomic<bool> m_threadsActive;
} shaderMtlThreadPool; } shaderMtlThreadPool;
@ -88,18 +135,45 @@ private:
// TODO: find out if it would be possible to cache compiled Metal shaders // TODO: find out if it would be possible to cache compiled Metal shaders
void RendererShaderMtl::ShaderCacheLoading_begin(uint64 cacheTitleId) void RendererShaderMtl::ShaderCacheLoading_begin(uint64 cacheTitleId)
{ {
s_isLoadingShadersMtl = true;
// Open AIR cache
if (s_airCache)
{
delete s_airCache;
s_airCache = nullptr;
}
uint32 airCacheMagic = GeneratePrecompiledCacheId();
const std::string cacheFilename = fmt::format("{:016x}_air.bin", cacheTitleId);
const fs::path cachePath = ActiveSettings::GetCachePath("shaderCache/precompiled/{}", cacheFilename);
s_airCache = FileCache::Open(cachePath, true, airCacheMagic);
if (!s_airCache)
cemuLog_log(LogType::Force, "Unable to open AIR cache {}", cacheFilename);
// Maximize shader compilation speed // Maximize shader compilation speed
static_cast<MetalRenderer*>(g_renderer.get())->SetShouldMaximizeConcurrentCompilation(true); static_cast<MetalRenderer*>(g_renderer.get())->SetShouldMaximizeConcurrentCompilation(true);
} }
void RendererShaderMtl::ShaderCacheLoading_end() void RendererShaderMtl::ShaderCacheLoading_end()
{ {
s_isLoadingShadersMtl = false;
// Reset shader compilation speed
static_cast<MetalRenderer*>(g_renderer.get())->SetShouldMaximizeConcurrentCompilation(false); static_cast<MetalRenderer*>(g_renderer.get())->SetShouldMaximizeConcurrentCompilation(false);
} }
void RendererShaderMtl::ShaderCacheLoading_Close() void RendererShaderMtl::ShaderCacheLoading_Close()
{ {
// Do nothing // Close the AIR cache
if (s_airCache)
{
delete s_airCache;
s_airCache = nullptr;
}
// Close RAM filesystem
if (s_hasRAMFilesystem)
executeCommand("diskutil eject {}", METAL_AIR_CACHE_PATH);
} }
void RendererShaderMtl::Initialize() void RendererShaderMtl::Initialize()
@ -172,10 +246,10 @@ bool RendererShaderMtl::ShouldCountCompilation() const
return !s_isLoadingShadersMtl && m_isGameShader; return !s_isLoadingShadersMtl && m_isGameShader;
} }
void RendererShaderMtl::CompileInternal() MTL::Library* RendererShaderMtl::LibraryFromSource()
{ {
// Compile from source
MTL::CompileOptions* options = MTL::CompileOptions::alloc()->init(); MTL::CompileOptions* options = MTL::CompileOptions::alloc()->init();
// TODO: always disable fast math for problematic shaders
if (g_current_game_profile->GetFastMath()) if (g_current_game_profile->GetFastMath())
options->setFastMathEnabled(true); options->setFastMathEnabled(true);
if (g_current_game_profile->GetPositionInvariance()) if (g_current_game_profile->GetPositionInvariance())
@ -186,20 +260,107 @@ void RendererShaderMtl::CompileInternal()
options->release(); options->release();
if (error) if (error)
{ {
cemuLog_log(LogType::Force, "failed to create library: {} -> {}", error->localizedDescription()->utf8String(), m_mslCode.c_str()); cemuLog_log(LogType::Force, "failed to create library from source: {} -> {}", error->localizedDescription()->utf8String(), m_mslCode.c_str());
FinishCompilation(); return nullptr;
return;
} }
return library;
}
MTL::Library* RendererShaderMtl::LibraryFromAIR(std::span<uint8> data)
{
dispatch_data_t dispatchData = dispatch_data_create(data.data(), data.size(), nullptr, DISPATCH_DATA_DESTRUCTOR_DEFAULT);
NS::Error* error = nullptr;
MTL::Library* library = m_mtlr->GetDevice()->newLibrary(dispatchData, &error);
if (error)
{
cemuLog_log(LogType::Force, "failed to create library from AIR: {}", error->localizedDescription()->utf8String());
return nullptr;
}
return library;
}
void RendererShaderMtl::CompileInternal()
{
MTL::Library* library = nullptr;
// First, try to retrieve the compiled shader from the AIR cache
if (s_isLoadingShadersMtl && (m_isGameShader && !m_isGfxPackShader) && s_airCache)
{
cemu_assert_debug(m_baseHash != 0);
uint64 h1, h2;
GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2);
std::vector<uint8> cacheFileData;
if (s_airCache->GetFile({ h1, h2 }, cacheFileData))
{
library = LibraryFromAIR(std::span<uint8>(cacheFileData.data(), cacheFileData.size()));
FinishCompilation();
}
}
// Not in the cache, compile from source
if (!library)
{
// Compile from source
library = LibraryFromSource();
if (!library)
return;
// Store in the AIR cache
shaderMtlThreadPool.s_airCacheQueueMutex.lock();
shaderMtlThreadPool.s_airCacheQueue.push_back(this);
shaderMtlThreadPool.s_airCacheQueueCount.increment();
shaderMtlThreadPool.s_airCacheQueueMutex.unlock();
}
m_function = library->newFunction(ToNSString("main0")); m_function = library->newFunction(ToNSString("main0"));
library->release(); library->release();
FinishCompilation();
// Count shader compilation // Count shader compilation
if (ShouldCountCompilation()) if (ShouldCountCompilation())
g_compiled_shaders_total++; g_compiled_shaders_total++;
} }
void RendererShaderMtl::CompileToAIR()
{
uint64 h1, h2;
GenerateShaderPrecompiledCacheFilename(m_type, m_baseHash, m_auxHash, h1, h2);
// The shader is not in the cache, compile it
std::string baseFilename = fmt::format("{}/{}_{}", METAL_AIR_CACHE_PATH, h1, h2);
// Source
std::ofstream mslFile;
mslFile.open(fmt::format("{}.metal", baseFilename));
mslFile << m_mslCode;
mslFile.close();
// Compile
if (!executeCommand("xcrun -sdk macosx metal -o {}.ir -c {}.metal -w", baseFilename, baseFilename))
return;
if (!executeCommand("xcrun -sdk macosx metallib -o {}.metallib {}.ir", baseFilename, baseFilename))
return;
// Clean up
executeCommand("rm {}.metal", baseFilename);
executeCommand("rm {}.ir", baseFilename);
// Load from the newly generated AIR
MemoryMappedFile airFile(fmt::format("{}.metallib", baseFilename));
std::span<uint8> airData = std::span<uint8>(airFile.data(), airFile.size());
//library = LibraryFromAIR(std::span<uint8>(airData.data(), airData.size()));
// Store in the cache
s_airCache->AddFile({ h1, h2 }, airData.data(), airData.size());
// Clean up
executeCommand("rm {}.metallib", baseFilename);
FinishCompilation();
}
void RendererShaderMtl::FinishCompilation() void RendererShaderMtl::FinishCompilation()
{ {
m_mslCode.clear(); m_mslCode.clear();

View file

@ -67,7 +67,13 @@ private:
bool ShouldCountCompilation() const; bool ShouldCountCompilation() const;
MTL::Library* LibraryFromSource();
MTL::Library* LibraryFromAIR(std::span<uint8> data);
void CompileInternal(); void CompileInternal();
void CompileToAIR();
void FinishCompilation(); void FinishCompilation();
}; };

View file

@ -69,6 +69,7 @@ std::list<fs::path> _getCachesPaths(const TitleId& titleId)
ActiveSettings::GetCachePath(L"shaderCache/driver/vk/{:016x}.bin", titleId), ActiveSettings::GetCachePath(L"shaderCache/driver/vk/{:016x}.bin", titleId),
ActiveSettings::GetCachePath(L"shaderCache/precompiled/{:016x}_spirv.bin", titleId), ActiveSettings::GetCachePath(L"shaderCache/precompiled/{:016x}_spirv.bin", titleId),
ActiveSettings::GetCachePath(L"shaderCache/precompiled/{:016x}_gl.bin", titleId), ActiveSettings::GetCachePath(L"shaderCache/precompiled/{:016x}_gl.bin", titleId),
ActiveSettings::GetCachePath(L"shaderCache/precompiled/{:016x}_air.bin", titleId),
ActiveSettings::GetCachePath(L"shaderCache/transferable/{:016x}_shaders.bin", titleId), ActiveSettings::GetCachePath(L"shaderCache/transferable/{:016x}_shaders.bin", titleId),
ActiveSettings::GetCachePath(L"shaderCache/transferable/{:016x}_mtlshaders.bin", titleId), ActiveSettings::GetCachePath(L"shaderCache/transferable/{:016x}_mtlshaders.bin", titleId),
ActiveSettings::GetCachePath(L"shaderCache/transferable/{:016x}_vkpipeline.bin", titleId), ActiveSettings::GetCachePath(L"shaderCache/transferable/{:016x}_vkpipeline.bin", titleId),