diff --git a/BUILD.md b/BUILD.md index 41de928e..662be96d 100644 --- a/BUILD.md +++ b/BUILD.md @@ -57,7 +57,7 @@ At Step 3 in [Build Cemu using cmake and clang](#build-cemu-using-cmake-and-clan `cmake -S . -B build -DCMAKE_BUILD_TYPE=release -DCMAKE_C_COMPILER=/usr/bin/clang-15 -DCMAKE_CXX_COMPILER=/usr/bin/clang++-15 -G Ninja -DCMAKE_MAKE_PROGRAM=/usr/bin/ninja` #### For Fedora and derivatives: -`sudo dnf install bluez-libs clang cmake cubeb-devel freeglut-devel git glm-devel gtk3-devel kernel-headers libgcrypt-devel libsecret-devel libtool libusb1-devel llvm nasm ninja-build perl-core systemd-devel zlib-devel zlib-static` +`sudo dnf install bluez-libs-devel clang cmake cubeb-devel freeglut-devel git glm-devel gtk3-devel kernel-headers libgcrypt-devel libsecret-devel libtool libusb1-devel llvm nasm ninja-build perl-core systemd-devel wayland-protocols-devel zlib-devel zlib-static` ### Build Cemu @@ -120,6 +120,9 @@ This section refers to running `cmake -S...` (truncated). * Compiling failed during rebuild after `git pull` with an error that mentions RPATH * Add the following and try running the command again: * `-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON` +* Environment variable `VCPKG_FORCE_SYSTEM_BINARIES` must be set. + * Execute the folowing and then try running the command again: + * `export VCPKG_FORCE_SYSTEM_BINARIES=1` * If you are getting a random error, read the [package-name-and-platform]-out.log and [package-name-and-platform]-err.log for the actual reason to see if you might be lacking the headers from a dependency. diff --git a/CMakeLists.txt b/CMakeLists.txt index b43f7dff..93198e1b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -134,23 +134,6 @@ if (WIN32) endif() option(ENABLE_CUBEB "Enabled cubeb backend" ON) -# usb hid backends -if (WIN32) - option(ENABLE_NSYSHID_WINDOWS_HID "Enables the native Windows HID backend for nsyshid" ON) -endif () -# libusb and windows hid backends shouldn't be active at the same time; otherwise we'd see all devices twice! -if (NOT ENABLE_NSYSHID_WINDOWS_HID) - option(ENABLE_NSYSHID_LIBUSB "Enables the libusb backend for nsyshid" ON) -else () - set(ENABLE_NSYSHID_LIBUSB OFF CACHE BOOL "" FORCE) -endif () -if (ENABLE_NSYSHID_WINDOWS_HID) - add_compile_definitions(NSYSHID_ENABLE_BACKEND_WINDOWS_HID) -endif () -if (ENABLE_NSYSHID_LIBUSB) - add_compile_definitions(NSYSHID_ENABLE_BACKEND_LIBUSB) -endif () - option(ENABLE_WXWIDGETS "Build with wxWidgets UI (Currently required)" ON) set(THREADS_PREFER_PTHREAD_FLAG true) diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index 597809eb..ced42766 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -463,8 +463,6 @@ add_library(CemuCafe OS/libs/nsyshid/BackendEmulated.h OS/libs/nsyshid/BackendLibusb.cpp OS/libs/nsyshid/BackendLibusb.h - OS/libs/nsyshid/BackendWindowsHID.cpp - OS/libs/nsyshid/BackendWindowsHID.h OS/libs/nsyshid/Dimensions.cpp OS/libs/nsyshid/Dimensions.h OS/libs/nsyshid/Infinity.cpp @@ -631,15 +629,16 @@ if (ENABLE_WAYLAND) target_link_libraries(CemuCafe PUBLIC Wayland::Client) endif() -if (ENABLE_NSYSHID_LIBUSB) - if (ENABLE_VCPKG) - find_package(PkgConfig REQUIRED) - pkg_check_modules(libusb REQUIRED IMPORTED_TARGET libusb-1.0) - target_link_libraries(CemuCafe PRIVATE PkgConfig::libusb) - else () - find_package(libusb MODULE REQUIRED) - target_link_libraries(CemuCafe PRIVATE libusb::libusb) - endif () +if (ENABLE_VCPKG) + if(WIN32) + set(PKG_CONFIG_EXECUTABLE "${VCPKG_INSTALLED_DIR}/x64-windows/tools/pkgconf/pkgconf.exe") + endif() + find_package(PkgConfig REQUIRED) + pkg_check_modules(libusb REQUIRED IMPORTED_TARGET libusb-1.0) + target_link_libraries(CemuCafe PRIVATE PkgConfig::libusb) +else () + find_package(libusb MODULE REQUIRED) + target_link_libraries(CemuCafe PRIVATE libusb::libusb) endif () if (ENABLE_WXWIDGETS) diff --git a/src/Cafe/GameProfile/GameProfile.cpp b/src/Cafe/GameProfile/GameProfile.cpp index d06a32dc..ff397860 100644 --- a/src/Cafe/GameProfile/GameProfile.cpp +++ b/src/Cafe/GameProfile/GameProfile.cpp @@ -147,7 +147,7 @@ bool gameProfile_loadEnumOption(IniParser& iniParser, const char* optionName, T& } // test enum name - if(boost::iequals(fmt::format("{}", v), *option_value)) + if(boost::iequals(fmt::format("{}", fmt::underlying(v)), *option_value)) { option = v; return true; diff --git a/src/Cafe/HW/Latte/Core/LatteCommandProcessor.cpp b/src/Cafe/HW/Latte/Core/LatteCommandProcessor.cpp index 167911b6..a8f81901 100644 --- a/src/Cafe/HW/Latte/Core/LatteCommandProcessor.cpp +++ b/src/Cafe/HW/Latte/Core/LatteCommandProcessor.cpp @@ -141,6 +141,14 @@ private: void LatteCP_processCommandBuffer(DrawPassContext& drawPassCtx); +// called whenever the GPU runs out of commands or hits a wait condition (semaphores, HLE waits) +void LatteCP_signalEnterWait() +{ + // based on the assumption that games won't do a rugpull and swap out buffer data in the middle of an uninterrupted sequence of drawcalls, + // we only flush caches when the GPU goes idle or has to wait for any operation + LatteIndices_invalidateAll(); +} + /* * Read a U32 from the command buffer * If no data is available then wait in a busy loop @@ -466,6 +474,8 @@ LatteCMDPtr LatteCP_itWaitRegMem(LatteCMDPtr cmd, uint32 nWords) const uint32 GPU7_WAIT_MEM_OP_GREATER = 6; const uint32 GPU7_WAIT_MEM_OP_NEVER = 7; + LatteCP_signalEnterWait(); + bool stalls = false; if ((word0 & 0x10) != 0) { @@ -594,6 +604,7 @@ LatteCMDPtr LatteCP_itMemSemaphore(LatteCMDPtr cmd, uint32 nWords) else if(SEM_SIGNAL == 7) { // wait + LatteCP_signalEnterWait(); size_t loopCount = 0; while (true) { @@ -1305,11 +1316,13 @@ void LatteCP_processCommandBuffer(DrawPassContext& drawPassCtx) } case IT_HLE_TRIGGER_SCANBUFFER_SWAP: { + LatteCP_signalEnterWait(); LatteCP_itHLESwapScanBuffer(cmdData, nWords); break; } case IT_HLE_WAIT_FOR_FLIP: { + LatteCP_signalEnterWait(); LatteCP_itHLEWaitForFlip(cmdData, nWords); break; } @@ -1594,12 +1607,14 @@ void LatteCP_ProcessRingbuffer() } case IT_HLE_TRIGGER_SCANBUFFER_SWAP: { + LatteCP_signalEnterWait(); LatteCP_itHLESwapScanBuffer(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 64; break; } case IT_HLE_WAIT_FOR_FLIP: { + LatteCP_signalEnterWait(); LatteCP_itHLEWaitForFlip(cmd, nWords); timerRecheck += CP_TIMER_RECHECK / 1; break; diff --git a/src/Cafe/HW/Latte/Core/LatteIndices.cpp b/src/Cafe/HW/Latte/Core/LatteIndices.cpp index 5b6d8495..d5eaaece 100644 --- a/src/Cafe/HW/Latte/Core/LatteIndices.cpp +++ b/src/Cafe/HW/Latte/Core/LatteIndices.cpp @@ -1,6 +1,7 @@ #include "Cafe/HW/Latte/Core/LatteConst.h" #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/ISA/RegDefines.h" +#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h" #include "Common/cpu_features.h" #if defined(ARCH_X86_64) && defined(__GNUC__) @@ -9,32 +10,53 @@ struct { - const void* lastPtr; - uint32 lastCount; - LattePrimitiveMode lastPrimitiveMode; - LatteIndexType lastIndexType; - // output - uint32 indexMin; - uint32 indexMax; - Renderer::INDEX_TYPE renderIndexType; - uint32 outputCount; - uint32 indexBufferOffset; - uint32 indexBufferIndex; + struct CacheEntry + { + // input data + const void* lastPtr; + uint32 lastCount; + LattePrimitiveMode lastPrimitiveMode; + LatteIndexType lastIndexType; + uint64 lastUsed; + // output + uint32 indexMin; + uint32 indexMax; + Renderer::INDEX_TYPE renderIndexType; + uint32 outputCount; + Renderer::IndexAllocation indexAllocation; + }; + std::array entry; + uint64 currentUsageCounter{0}; }LatteIndexCache{}; void LatteIndices_invalidate(const void* memPtr, uint32 size) { - if (LatteIndexCache.lastPtr >= memPtr && (LatteIndexCache.lastPtr < ((uint8*)memPtr + size)) ) + for(auto& entry : LatteIndexCache.entry) { - LatteIndexCache.lastPtr = nullptr; - LatteIndexCache.lastCount = 0; + if (entry.lastPtr >= memPtr && (entry.lastPtr < ((uint8*)memPtr + size)) ) + { + if(entry.lastPtr != nullptr) + g_renderer->indexData_releaseIndexMemory(entry.indexAllocation); + entry.lastPtr = nullptr; + entry.lastCount = 0; + } } } void LatteIndices_invalidateAll() { - LatteIndexCache.lastPtr = nullptr; - LatteIndexCache.lastCount = 0; + for(auto& entry : LatteIndexCache.entry) + { + if (entry.lastPtr != nullptr) + g_renderer->indexData_releaseIndexMemory(entry.indexAllocation); + entry.lastPtr = nullptr; + entry.lastCount = 0; + } +} + +uint64 LatteIndices_GetNextUsageIndex() +{ + return LatteIndexCache.currentUsageCounter++; } uint32 LatteIndices_calculateIndexOutputSize(LattePrimitiveMode primitiveMode, LatteIndexType indexType, uint32 count) @@ -585,7 +607,7 @@ void LatteIndices_alternativeCalculateIndexMinMax(const void* indexData, LatteIn } } -void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 count, LattePrimitiveMode primitiveMode, uint32& indexMin, uint32& indexMax, Renderer::INDEX_TYPE& renderIndexType, uint32& outputCount, uint32& indexBufferOffset, uint32& indexBufferIndex) +void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 count, LattePrimitiveMode primitiveMode, uint32& indexMin, uint32& indexMax, Renderer::INDEX_TYPE& renderIndexType, uint32& outputCount, Renderer::IndexAllocation& indexAllocation) { // what this should do: // [x] use fast SIMD-based index decoding @@ -595,17 +617,18 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 // [ ] better cache implementation, allow to cache across frames // reuse from cache if data didn't change - if (LatteIndexCache.lastPtr == indexData && - LatteIndexCache.lastCount == count && - LatteIndexCache.lastPrimitiveMode == primitiveMode && - LatteIndexCache.lastIndexType == indexType) + auto cacheEntry = std::find_if(LatteIndexCache.entry.begin(), LatteIndexCache.entry.end(), [indexData, count, primitiveMode, indexType](const auto& entry) { - indexMin = LatteIndexCache.indexMin; - indexMax = LatteIndexCache.indexMax; - renderIndexType = LatteIndexCache.renderIndexType; - outputCount = LatteIndexCache.outputCount; - indexBufferOffset = LatteIndexCache.indexBufferOffset; - indexBufferIndex = LatteIndexCache.indexBufferIndex; + return entry.lastPtr == indexData && entry.lastCount == count && entry.lastPrimitiveMode == primitiveMode && entry.lastIndexType == indexType; + }); + if (cacheEntry != LatteIndexCache.entry.end()) + { + indexMin = cacheEntry->indexMin; + indexMax = cacheEntry->indexMax; + renderIndexType = cacheEntry->renderIndexType; + outputCount = cacheEntry->outputCount; + indexAllocation = cacheEntry->indexAllocation; + cacheEntry->lastUsed = LatteIndices_GetNextUsageIndex(); return; } @@ -629,10 +652,12 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 indexMin = 0; indexMax = std::max(count, 1u)-1; renderIndexType = Renderer::INDEX_TYPE::NONE; + indexAllocation = {}; return; // no indices } // query index buffer from renderer - void* indexOutputPtr = g_renderer->indexData_reserveIndexMemory(indexOutputSize, indexBufferOffset, indexBufferIndex); + indexAllocation = g_renderer->indexData_reserveIndexMemory(indexOutputSize); + void* indexOutputPtr = indexAllocation.mem; // decode indices indexMin = std::numeric_limits::max(); @@ -780,16 +805,25 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 // recalculate index range but filter out primitive restart index LatteIndices_alternativeCalculateIndexMinMax(indexData, indexType, count, indexMin, indexMax); } - g_renderer->indexData_uploadIndexMemory(indexBufferIndex, indexBufferOffset, indexOutputSize); + g_renderer->indexData_uploadIndexMemory(indexAllocation); + performanceMonitor.cycle[performanceMonitor.cycleIndex].indexDataUploaded += indexOutputSize; + // get least recently used cache entry + auto lruEntry = std::min_element(LatteIndexCache.entry.begin(), LatteIndexCache.entry.end(), [](const auto& a, const auto& b) + { + return a.lastUsed < b.lastUsed; + }); + // invalidate previous allocation + if(lruEntry->lastPtr != nullptr) + g_renderer->indexData_releaseIndexMemory(lruEntry->indexAllocation); // update cache - LatteIndexCache.lastPtr = indexData; - LatteIndexCache.lastCount = count; - LatteIndexCache.lastPrimitiveMode = primitiveMode; - LatteIndexCache.lastIndexType = indexType; - LatteIndexCache.indexMin = indexMin; - LatteIndexCache.indexMax = indexMax; - LatteIndexCache.renderIndexType = renderIndexType; - LatteIndexCache.outputCount = outputCount; - LatteIndexCache.indexBufferOffset = indexBufferOffset; - LatteIndexCache.indexBufferIndex = indexBufferIndex; + lruEntry->lastPtr = indexData; + lruEntry->lastCount = count; + lruEntry->lastPrimitiveMode = primitiveMode; + lruEntry->lastIndexType = indexType; + lruEntry->indexMin = indexMin; + lruEntry->indexMax = indexMax; + lruEntry->renderIndexType = renderIndexType; + lruEntry->outputCount = outputCount; + lruEntry->indexAllocation = indexAllocation; + lruEntry->lastUsed = LatteIndices_GetNextUsageIndex(); } diff --git a/src/Cafe/HW/Latte/Core/LatteIndices.h b/src/Cafe/HW/Latte/Core/LatteIndices.h index 917d7991..8aace24e 100644 --- a/src/Cafe/HW/Latte/Core/LatteIndices.h +++ b/src/Cafe/HW/Latte/Core/LatteIndices.h @@ -4,4 +4,4 @@ void LatteIndices_invalidate(const void* memPtr, uint32 size); void LatteIndices_invalidateAll(); -void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 count, LattePrimitiveMode primitiveMode, uint32& indexMin, uint32& indexMax, Renderer::INDEX_TYPE& renderIndexType, uint32& outputCount, uint32& indexBufferOffset, uint32& indexBufferIndex); \ No newline at end of file +void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32 count, LattePrimitiveMode primitiveMode, uint32& indexMin, uint32& indexMax, Renderer::INDEX_TYPE& renderIndexType, uint32& outputCount, Renderer::IndexAllocation& indexAllocation); \ No newline at end of file diff --git a/src/Cafe/HW/Latte/Core/LatteOverlay.cpp b/src/Cafe/HW/Latte/Core/LatteOverlay.cpp index 238f85e8..e6edb904 100644 --- a/src/Cafe/HW/Latte/Core/LatteOverlay.cpp +++ b/src/Cafe/HW/Latte/Core/LatteOverlay.cpp @@ -107,7 +107,13 @@ void LatteOverlay_renderOverlay(ImVec2& position, ImVec2& pivot, sint32 directio ImGui::Text("VRAM: %dMB / %dMB", g_state.vramUsage, g_state.vramTotal); if (config.overlay.debug) + { + // general debug info + ImGui::Text("--- Debug info ---"); + ImGui::Text("IndexUploadPerFrame: %dKB", (performanceMonitor.stats.indexDataUploadPerFrame+1023)/1024); + // backend specific info g_renderer->AppendOverlayDebugInfo(); + } position.y += (ImGui::GetWindowSize().y + 10.0f) * direction; } diff --git a/src/Cafe/HW/Latte/Core/LattePerformanceMonitor.cpp b/src/Cafe/HW/Latte/Core/LattePerformanceMonitor.cpp index f2767446..14dfe9a9 100644 --- a/src/Cafe/HW/Latte/Core/LattePerformanceMonitor.cpp +++ b/src/Cafe/HW/Latte/Core/LattePerformanceMonitor.cpp @@ -74,7 +74,6 @@ void LattePerformanceMonitor_frameEnd() uniformBankDataUploadedPerFrame /= 1024ULL; uint32 uniformBankCountUploadedPerFrame = (uint32)(uniformBankUploadedCount / (uint64)elapsedFrames); uint64 indexDataUploadPerFrame = (indexDataUploaded / (uint64)elapsedFrames); - indexDataUploadPerFrame /= 1024ULL; double fps = (double)elapsedFrames2S * 1000.0 / (double)totalElapsedTimeFPS; uint32 shaderBindsPerFrame = shaderBindCounter / elapsedFrames; @@ -82,7 +81,7 @@ void LattePerformanceMonitor_frameEnd() uint32 rlps = (uint32)((uint64)recompilerLeaveCount * 1000ULL / (uint64)totalElapsedTime); uint32 tlps = (uint32)((uint64)threadLeaveCount * 1000ULL / (uint64)totalElapsedTime); // set stats - + performanceMonitor.stats.indexDataUploadPerFrame = indexDataUploadPerFrame; // next counter cycle sint32 nextCycleIndex = (performanceMonitor.cycleIndex + 1) % PERFORMANCE_MONITOR_TRACK_CYCLES; performanceMonitor.cycle[nextCycleIndex].drawCallCounter = 0; diff --git a/src/Cafe/HW/Latte/Core/LattePerformanceMonitor.h b/src/Cafe/HW/Latte/Core/LattePerformanceMonitor.h index ac75bb1b..dbc3cff9 100644 --- a/src/Cafe/HW/Latte/Core/LattePerformanceMonitor.h +++ b/src/Cafe/HW/Latte/Core/LattePerformanceMonitor.h @@ -132,6 +132,12 @@ typedef struct LattePerfStatCounter numDrawBarriersPerFrame; LattePerfStatCounter numBeginRenderpassPerFrame; }vk; + + // calculated stats (per frame) + struct + { + uint32 indexDataUploadPerFrame; + }stats; }performanceMonitor_t; extern performanceMonitor_t performanceMonitor; diff --git a/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp b/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp index d654de2f..68264772 100644 --- a/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp +++ b/src/Cafe/HW/Latte/Core/LatteRenderTarget.cpp @@ -11,7 +11,6 @@ #include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h" #include "Cafe/GraphicPack/GraphicPack2.h" #include "config/ActiveSettings.h" -#include "Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h" #include "gui/guiWrapper.h" #include "Cafe/OS/libs/erreula/erreula.h" #include "input/InputManager.h" diff --git a/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.h b/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.h index 196403b4..ddc7ee22 100644 --- a/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/OpenGL/OpenGLRenderer.h @@ -102,16 +102,21 @@ public: static void SetAttributeArrayState(uint32 index, bool isEnabled, sint32 aluDivisor); static void SetArrayElementBuffer(GLuint arrayElementBuffer); - // index - void* indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) override + // index (not used by OpenGL renderer yet) + IndexAllocation indexData_reserveIndexMemory(uint32 size) override { - assert_dbg(); - return nullptr; + cemu_assert_unimplemented(); + return {}; } - void indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) override + void indexData_releaseIndexMemory(IndexAllocation& allocation) override { - assert_dbg(); + cemu_assert_unimplemented(); + } + + void indexData_uploadIndexMemory(IndexAllocation& allocation) override + { + cemu_assert_unimplemented(); } // uniform diff --git a/src/Cafe/HW/Latte/Renderer/Renderer.h b/src/Cafe/HW/Latte/Renderer/Renderer.h index 1dba52c8..a02eeb10 100644 --- a/src/Cafe/HW/Latte/Renderer/Renderer.h +++ b/src/Cafe/HW/Latte/Renderer/Renderer.h @@ -140,8 +140,15 @@ public: virtual void draw_endSequence() = 0; // index - virtual void* indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) = 0; - virtual void indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) = 0; + struct IndexAllocation + { + void* mem; // pointer to index data inside buffer + void* rendererInternal; // for renderer use + }; + + virtual IndexAllocation indexData_reserveIndexMemory(uint32 size) = 0; + virtual void indexData_releaseIndexMemory(IndexAllocation& allocation) = 0; + virtual void indexData_uploadIndexMemory(IndexAllocation& allocation) = 0; // occlusion queries virtual LatteQueryObject* occlusionQuery_create() = 0; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/TextureReadbackVk.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/TextureReadbackVk.cpp index b055fe7e..bce23b59 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/TextureReadbackVk.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/TextureReadbackVk.cpp @@ -22,7 +22,7 @@ uint32 LatteTextureReadbackInfoVk::GetImageSize(LatteTextureView* textureView) cemu_assert(textureFormat == VK_FORMAT_R8G8B8A8_UNORM); return baseTexture->width * baseTexture->height * 4; } - else if (textureView->format == Latte::E_GX2SURFFMT::R8_UNORM) + else if (textureView->format == Latte::E_GX2SURFFMT::R8_UNORM ) { cemu_assert(textureFormat == VK_FORMAT_R8_UNORM); return baseTexture->width * baseTexture->height * 1; @@ -79,6 +79,13 @@ uint32 LatteTextureReadbackInfoVk::GetImageSize(LatteTextureView* textureView) // todo - if driver does not support VK_FORMAT_D24_UNORM_S8_UINT this is represented as VK_FORMAT_D32_SFLOAT_S8_UINT which is 8 bytes return baseTexture->width * baseTexture->height * 4; } + else if (textureView->format == Latte::E_GX2SURFFMT::R5_G6_B5_UNORM ) + { + if(textureFormat == VK_FORMAT_R5G6B5_UNORM_PACK16){ + return baseTexture->width * baseTexture->height * 2; + } + return 0; + } else { cemuLog_log(LogType::Force, "Unsupported texture readback format {:04x}", (uint32)textureView->format); diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.cpp index c4f47a2b..3494dbc5 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.cpp @@ -23,11 +23,11 @@ void VKRSynchronizedRingAllocator::allocateAdditionalUploadBuffer(uint32 sizeReq AllocatorBuffer_t newBuffer{}; newBuffer.writeIndex = 0; newBuffer.basePtr = nullptr; - if (m_bufferType == BUFFER_TYPE::STAGING) + if (m_bufferType == VKR_BUFFER_TYPE::STAGING) m_vkrMemMgr->CreateBuffer(bufferAllocSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, newBuffer.vk_buffer, newBuffer.vk_mem); - else if (m_bufferType == BUFFER_TYPE::INDEX) + else if (m_bufferType == VKR_BUFFER_TYPE::INDEX) m_vkrMemMgr->CreateBuffer(bufferAllocSize, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, newBuffer.vk_buffer, newBuffer.vk_mem); - else if (m_bufferType == BUFFER_TYPE::STRIDE) + else if (m_bufferType == VKR_BUFFER_TYPE::STRIDE) m_vkrMemMgr->CreateBuffer(bufferAllocSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, newBuffer.vk_buffer, newBuffer.vk_mem); else cemu_assert_debug(false); @@ -53,7 +53,7 @@ VKRSynchronizedRingAllocator::AllocatorReservation_t VKRSynchronizedRingAllocato uint32 distanceToSyncPoint; if (!itr.queue_syncPoints.empty()) { - if(itr.queue_syncPoints.front().offset < itr.writeIndex) + if (itr.queue_syncPoints.front().offset < itr.writeIndex) distanceToSyncPoint = 0xFFFFFFFF; else distanceToSyncPoint = itr.queue_syncPoints.front().offset - itr.writeIndex; @@ -100,7 +100,7 @@ VKRSynchronizedRingAllocator::AllocatorReservation_t VKRSynchronizedRingAllocato void VKRSynchronizedRingAllocator::FlushReservation(AllocatorReservation_t& uploadReservation) { - cemu_assert_debug(m_bufferType == BUFFER_TYPE::STAGING); // only the staging buffer isn't coherent + cemu_assert_debug(m_bufferType == VKR_BUFFER_TYPE::STAGING); // only the staging buffer isn't coherent // todo - use nonCoherentAtomSize for flush size (instead of hardcoded constant) VkMappedMemoryRange flushedRange{}; flushedRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; @@ -167,6 +167,70 @@ void VKRSynchronizedRingAllocator::GetStats(uint32& numBuffers, size_t& totalBuf } } +/* VKRSynchronizedHeapAllocator */ + +VKRSynchronizedHeapAllocator::VKRSynchronizedHeapAllocator(class VKRMemoryManager* vkMemoryManager, VKR_BUFFER_TYPE bufferType, size_t minimumBufferAllocSize) + : m_vkrMemMgr(vkMemoryManager), m_chunkedHeap(bufferType, minimumBufferAllocSize) {}; + +VKRSynchronizedHeapAllocator::AllocatorReservation* VKRSynchronizedHeapAllocator::AllocateBufferMemory(uint32 size, uint32 alignment) +{ + CHAddr addr = m_chunkedHeap.alloc(size, alignment); + m_activeAllocations.emplace_back(addr); + AllocatorReservation* res = m_poolAllocatorReservation.allocObj(); + res->bufferIndex = addr.chunkIndex; + res->bufferOffset = addr.offset; + res->size = size; + res->memPtr = m_chunkedHeap.GetChunkPtr(addr.chunkIndex) + addr.offset; + m_chunkedHeap.GetChunkVkMemInfo(addr.chunkIndex, res->vkBuffer, res->vkMem); + return res; +} + +void VKRSynchronizedHeapAllocator::FreeReservation(AllocatorReservation* uploadReservation) +{ + // put the allocation on a delayed release queue for the current command buffer + uint64 currentCommandBufferId = VulkanRenderer::GetInstance()->GetCurrentCommandBufferId(); + auto it = std::find_if(m_activeAllocations.begin(), m_activeAllocations.end(), [&uploadReservation](const TrackedAllocation& allocation) { return allocation.allocation.chunkIndex == uploadReservation->bufferIndex && allocation.allocation.offset == uploadReservation->bufferOffset; }); + cemu_assert_debug(it != m_activeAllocations.end()); + m_releaseQueue[currentCommandBufferId].emplace_back(it->allocation); + m_activeAllocations.erase(it); + m_poolAllocatorReservation.freeObj(uploadReservation); +} + +void VKRSynchronizedHeapAllocator::FlushReservation(AllocatorReservation* uploadReservation) +{ + if (m_chunkedHeap.RequiresFlush(uploadReservation->bufferIndex)) + { + VkMappedMemoryRange flushedRange{}; + flushedRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE; + flushedRange.memory = uploadReservation->vkMem; + flushedRange.offset = uploadReservation->bufferOffset; + flushedRange.size = uploadReservation->size; + vkFlushMappedMemoryRanges(VulkanRenderer::GetInstance()->GetLogicalDevice(), 1, &flushedRange); + } +} + +void VKRSynchronizedHeapAllocator::CleanupBuffer(uint64 latestFinishedCommandBufferId) +{ + auto it = m_releaseQueue.begin(); + while (it != m_releaseQueue.end()) + { + if (it->first <= latestFinishedCommandBufferId) + { + // release allocations + for(auto& addr : it->second) + m_chunkedHeap.free(addr); + it = m_releaseQueue.erase(it); + continue; + } + it++; + } +} + +void VKRSynchronizedHeapAllocator::GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const +{ + m_chunkedHeap.GetStats(numBuffers, totalBufferSize, freeBufferSize); +} + /* VkTextureChunkedHeap */ uint32 VkTextureChunkedHeap::allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize) @@ -175,7 +239,7 @@ uint32 VkTextureChunkedHeap::allocateNewChunk(uint32 chunkIndex, uint32 minimumA m_list_chunkInfo.resize(m_list_chunkInfo.size() + 1); // pad minimumAllocationSize to 32KB alignment - minimumAllocationSize = (minimumAllocationSize + (32*1024-1)) & ~(32 * 1024 - 1); + minimumAllocationSize = (minimumAllocationSize + (32 * 1024 - 1)) & ~(32 * 1024 - 1); uint32 allocationSize = 1024 * 1024 * 128; if (chunkIndex == 0) @@ -189,8 +253,7 @@ uint32 VkTextureChunkedHeap::allocateNewChunk(uint32 chunkIndex, uint32 minimumA std::vector deviceLocalMemoryTypeIndices = m_vkrMemoryManager->FindMemoryTypes(m_typeFilter, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); std::vector hostLocalMemoryTypeIndices = m_vkrMemoryManager->FindMemoryTypes(m_typeFilter, 0); // remove device local memory types from host local vector - auto pred = [&deviceLocalMemoryTypeIndices](const uint32& v) ->bool - { + auto pred = [&deviceLocalMemoryTypeIndices](const uint32& v) -> bool { return std::find(deviceLocalMemoryTypeIndices.begin(), deviceLocalMemoryTypeIndices.end(), v) != deviceLocalMemoryTypeIndices.end(); }; hostLocalMemoryTypeIndices.erase(std::remove_if(hostLocalMemoryTypeIndices.begin(), hostLocalMemoryTypeIndices.end(), pred), hostLocalMemoryTypeIndices.end()); @@ -206,7 +269,7 @@ uint32 VkTextureChunkedHeap::allocateNewChunk(uint32 chunkIndex, uint32 minimumA allocInfo.memoryTypeIndex = memType; VkDeviceMemory imageMemory; - VkResult r = vkAllocateMemory(m_device, &allocInfo, nullptr, &imageMemory); + VkResult r = vkAllocateMemory(VulkanRenderer::GetInstance()->GetLogicalDevice(), &allocInfo, nullptr, &imageMemory); if (r != VK_SUCCESS) continue; m_list_chunkInfo[chunkIndex].mem = imageMemory; @@ -221,7 +284,7 @@ uint32 VkTextureChunkedHeap::allocateNewChunk(uint32 chunkIndex, uint32 minimumA allocInfo.memoryTypeIndex = memType; VkDeviceMemory imageMemory; - VkResult r = vkAllocateMemory(m_device, &allocInfo, nullptr, &imageMemory); + VkResult r = vkAllocateMemory(VulkanRenderer::GetInstance()->GetLogicalDevice(), &allocInfo, nullptr, &imageMemory); if (r != VK_SUCCESS) continue; m_list_chunkInfo[chunkIndex].mem = imageMemory; @@ -238,6 +301,68 @@ uint32 VkTextureChunkedHeap::allocateNewChunk(uint32 chunkIndex, uint32 minimumA return 0; } +/* VkBufferChunkedHeap */ + +VKRBuffer* VKRBuffer::Create(VKR_BUFFER_TYPE bufferType, size_t bufferSize, VkMemoryPropertyFlags properties) +{ + auto* memMgr = VulkanRenderer::GetInstance()->GetMemoryManager(); + VkBuffer buffer; + VkDeviceMemory bufferMemory; + bool allocSuccess; + if (bufferType == VKR_BUFFER_TYPE::STAGING) + allocSuccess = memMgr->CreateBuffer2(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, properties, buffer, bufferMemory); + else if (bufferType == VKR_BUFFER_TYPE::INDEX) + allocSuccess = memMgr->CreateBuffer2(bufferSize, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, properties, buffer, bufferMemory); + else if (bufferType == VKR_BUFFER_TYPE::STRIDE) + allocSuccess = memMgr->CreateBuffer2(bufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, properties, buffer, bufferMemory); + else + cemu_assert_debug(false); + if (!allocSuccess) + return nullptr; + + VKRBuffer* bufferObj = new VKRBuffer(buffer, bufferMemory); + // if host visible, then map buffer + void* data = nullptr; + if (properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + { + vkMapMemory(VulkanRenderer::GetInstance()->GetLogicalDevice(), bufferMemory, 0, bufferSize, 0, &data); + bufferObj->m_requiresFlush = !HAS_FLAG(properties, VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + } + bufferObj->m_mappedMemory = (uint8*)data; + return bufferObj; +} + +VKRBuffer::~VKRBuffer() +{ + if (m_mappedMemory) + vkUnmapMemory(VulkanRenderer::GetInstance()->GetLogicalDevice(), m_bufferMemory); + if (m_bufferMemory != VK_NULL_HANDLE) + vkFreeMemory(VulkanRenderer::GetInstance()->GetLogicalDevice(), m_bufferMemory, nullptr); + if (m_buffer != VK_NULL_HANDLE) + vkDestroyBuffer(VulkanRenderer::GetInstance()->GetLogicalDevice(), m_buffer, nullptr); +} + +VkBufferChunkedHeap::~VkBufferChunkedHeap() +{ + for (auto& chunk : m_chunkBuffers) + delete chunk; +} + +uint32 VkBufferChunkedHeap::allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize) +{ + size_t allocationSize = std::max(m_minimumBufferAllocationSize, minimumAllocationSize); + VKRBuffer* buffer = VKRBuffer::Create(m_bufferType, allocationSize, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if(!buffer) + buffer = VKRBuffer::Create(m_bufferType, allocationSize, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + if(!buffer) + VulkanRenderer::GetInstance()->UnrecoverableError("Failed to allocate buffer memory for VkBufferChunkedHeap"); + cemu_assert_debug(buffer); + cemu_assert_debug(m_chunkBuffers.size() == chunkIndex); + m_chunkBuffers.emplace_back(buffer); + // todo - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT might be worth it? + return allocationSize; +} + uint32_t VKRMemoryManager::FindMemoryType(uint32_t typeFilter, VkMemoryPropertyFlags properties) const { VkPhysicalDeviceMemoryProperties memProperties; @@ -423,7 +548,7 @@ bool VKRMemoryManager::CreateBufferFromHostMemory(void* hostPointer, VkDeviceSiz importHostMem.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT; importHostMem.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT; importHostMem.pHostPointer = hostPointer; - // VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT or + // VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT or // VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT // whats the difference ? @@ -469,7 +594,7 @@ VkImageMemAllocation* VKRMemoryManager::imageMemoryAllocate(VkImage image) auto it = map_textureHeap.find(typeFilter); if (it == map_textureHeap.end()) { - texHeap = new VkTextureChunkedHeap(this, typeFilter, m_vkr->GetLogicalDevice()); + texHeap = new VkTextureChunkedHeap(this, typeFilter); map_textureHeap.emplace(typeFilter, texHeap); } else diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.h index bf2d919b..08af5882 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.h @@ -2,6 +2,36 @@ #include "Cafe/HW/Latte/Renderer/Renderer.h" #include "Cafe/HW/Latte/Renderer/Vulkan/VulkanAPI.h" #include "util/ChunkedHeap/ChunkedHeap.h" +#include "util/helpers/MemoryPool.h" + +enum class VKR_BUFFER_TYPE +{ + STAGING, // staging upload buffer + INDEX, // buffer for index data + STRIDE, // buffer for stride-adjusted vertex data +}; + +class VKRBuffer +{ + public: + static VKRBuffer* Create(VKR_BUFFER_TYPE bufferType, size_t bufferSize, VkMemoryPropertyFlags properties); + ~VKRBuffer(); + + VkBuffer GetVkBuffer() const { return m_buffer; } + VkDeviceMemory GetVkBufferMemory() const { return m_bufferMemory; } + + uint8* GetPtr() const { return m_mappedMemory; } + + bool RequiresFlush() const { return m_requiresFlush; } + + private: + VKRBuffer(VkBuffer buffer, VkDeviceMemory bufferMem) : m_buffer(buffer), m_bufferMemory(bufferMem) { }; + + VkBuffer m_buffer; + VkDeviceMemory m_bufferMemory; + uint8* m_mappedMemory; + bool m_requiresFlush{false}; +}; struct VkImageMemAllocation { @@ -14,18 +44,16 @@ struct VkImageMemAllocation uint32 getAllocationSize() { return allocationSize; } }; -class VkTextureChunkedHeap : private ChunkedHeap +class VkTextureChunkedHeap : private ChunkedHeap<> { public: - VkTextureChunkedHeap(class VKRMemoryManager* memoryManager, uint32 typeFilter, VkDevice device) : m_vkrMemoryManager(memoryManager), m_typeFilter(typeFilter), m_device(device) { }; + VkTextureChunkedHeap(class VKRMemoryManager* memoryManager, uint32 typeFilter) : m_vkrMemoryManager(memoryManager), m_typeFilter(typeFilter) { }; struct ChunkInfo { VkDeviceMemory mem; }; - uint32 allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize) override; - CHAddr allocMem(uint32 size, uint32 alignment) { if (alignment < 4) @@ -43,11 +71,6 @@ public: this->free(addr); } - void setDevice(VkDevice dev) - { - m_device = dev; - } - VkDeviceMemory getChunkMem(uint32 index) { if (index >= m_list_chunkInfo.size()) @@ -57,28 +80,73 @@ public: void getStatistics(uint32& totalHeapSize, uint32& allocatedBytes) const { - totalHeapSize = numHeapBytes; - allocatedBytes = numAllocatedBytes; + totalHeapSize = m_numHeapBytes; + allocatedBytes = m_numAllocatedBytes; } - VkDevice m_device; + private: + uint32 allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize) override; + uint32 m_typeFilter{ 0xFFFFFFFF }; class VKRMemoryManager* m_vkrMemoryManager; std::vector m_list_chunkInfo; }; +class VkBufferChunkedHeap : private ChunkedHeap<> +{ + public: + VkBufferChunkedHeap(VKR_BUFFER_TYPE bufferType, size_t minimumBufferAllocationSize) : m_bufferType(bufferType), m_minimumBufferAllocationSize(minimumBufferAllocationSize) { }; + ~VkBufferChunkedHeap(); + + using ChunkedHeap::alloc; + using ChunkedHeap::free; + + uint8* GetChunkPtr(uint32 index) const + { + if (index >= m_chunkBuffers.size()) + return nullptr; + return m_chunkBuffers[index]->GetPtr(); + } + + void GetChunkVkMemInfo(uint32 index, VkBuffer& buffer, VkDeviceMemory& mem) + { + if (index >= m_chunkBuffers.size()) + { + buffer = VK_NULL_HANDLE; + mem = VK_NULL_HANDLE; + return; + } + buffer = m_chunkBuffers[index]->GetVkBuffer(); + mem = m_chunkBuffers[index]->GetVkBufferMemory(); + } + + void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const + { + numBuffers = m_chunkBuffers.size(); + totalBufferSize = m_numHeapBytes; + freeBufferSize = m_numHeapBytes - m_numAllocatedBytes; + } + + bool RequiresFlush(uint32 index) const + { + if (index >= m_chunkBuffers.size()) + return false; + return m_chunkBuffers[index]->RequiresFlush(); + } + + private: + uint32 allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize) override; + + VKR_BUFFER_TYPE m_bufferType; + std::vector m_chunkBuffers; + size_t m_minimumBufferAllocationSize; +}; + // a circular ring-buffer which tracks and releases memory per command-buffer class VKRSynchronizedRingAllocator { public: - enum class BUFFER_TYPE - { - STAGING, // staging upload buffer - INDEX, // buffer for index data - STRIDE, // buffer for stride-adjusted vertex data - }; - - VKRSynchronizedRingAllocator(class VulkanRenderer* vkRenderer, class VKRMemoryManager* vkMemoryManager, BUFFER_TYPE bufferType, uint32 minimumBufferAllocSize) : m_vkr(vkRenderer), m_vkrMemMgr(vkMemoryManager), m_bufferType(bufferType), m_minimumBufferAllocSize(minimumBufferAllocSize) {}; + VKRSynchronizedRingAllocator(class VulkanRenderer* vkRenderer, class VKRMemoryManager* vkMemoryManager, VKR_BUFFER_TYPE bufferType, uint32 minimumBufferAllocSize) : m_vkr(vkRenderer), m_vkrMemMgr(vkMemoryManager), m_bufferType(bufferType), m_minimumBufferAllocSize(minimumBufferAllocSize) {}; VKRSynchronizedRingAllocator(const VKRSynchronizedRingAllocator&) = delete; // disallow copy struct BufferSyncPoint_t @@ -126,13 +194,53 @@ private: const class VulkanRenderer* m_vkr; const class VKRMemoryManager* m_vkrMemMgr; - const BUFFER_TYPE m_bufferType; + const VKR_BUFFER_TYPE m_bufferType; const uint32 m_minimumBufferAllocSize; std::vector m_buffers; }; +// heap style allocator with released memory being freed after the current command buffer finishes +class VKRSynchronizedHeapAllocator +{ + struct TrackedAllocation + { + TrackedAllocation(CHAddr allocation) : allocation(allocation) {}; + CHAddr allocation; + }; + + public: + VKRSynchronizedHeapAllocator(class VKRMemoryManager* vkMemoryManager, VKR_BUFFER_TYPE bufferType, size_t minimumBufferAllocSize); + VKRSynchronizedHeapAllocator(const VKRSynchronizedHeapAllocator&) = delete; // disallow copy + + struct AllocatorReservation + { + VkBuffer vkBuffer; + VkDeviceMemory vkMem; + uint8* memPtr; + uint32 bufferOffset; + uint32 size; + uint32 bufferIndex; + }; + + AllocatorReservation* AllocateBufferMemory(uint32 size, uint32 alignment); + void FreeReservation(AllocatorReservation* uploadReservation); + void FlushReservation(AllocatorReservation* uploadReservation); + + void CleanupBuffer(uint64 latestFinishedCommandBufferId); + + void GetStats(uint32& numBuffers, size_t& totalBufferSize, size_t& freeBufferSize) const; + private: + const class VKRMemoryManager* m_vkrMemMgr; + VkBufferChunkedHeap m_chunkedHeap; + // allocations + std::vector m_activeAllocations; + MemoryPool m_poolAllocatorReservation{32}; + // release queue + std::unordered_map> m_releaseQueue; +}; + void LatteIndices_invalidateAll(); class VKRMemoryManager @@ -140,9 +248,9 @@ class VKRMemoryManager friend class VKRSynchronizedRingAllocator; public: VKRMemoryManager(class VulkanRenderer* renderer) : - m_stagingBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::STAGING, 32u * 1024 * 1024), - m_indexBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::INDEX, 4u * 1024 * 1024), - m_vertexStrideMetalBuffer(renderer, this, VKRSynchronizedRingAllocator::BUFFER_TYPE::STRIDE, 4u * 1024 * 1024) + m_stagingBuffer(renderer, this, VKR_BUFFER_TYPE::STAGING, 32u * 1024 * 1024), + m_indexBuffer(this, VKR_BUFFER_TYPE::INDEX, 4u * 1024 * 1024), + m_vertexStrideMetalBuffer(renderer, this, VKR_BUFFER_TYPE::STRIDE, 4u * 1024 * 1024) { m_vkr = renderer; } @@ -167,7 +275,7 @@ public: } VKRSynchronizedRingAllocator& getStagingAllocator() { return m_stagingBuffer; }; // allocator for texture/attribute/uniform uploads - VKRSynchronizedRingAllocator& getIndexAllocator() { return m_indexBuffer; }; // allocator for index data + VKRSynchronizedHeapAllocator& GetIndexAllocator() { return m_indexBuffer; }; // allocator for index data VKRSynchronizedRingAllocator& getMetalStrideWorkaroundAllocator() { return m_vertexStrideMetalBuffer; }; // allocator for stride-adjusted vertex data void cleanupBuffers(uint64 latestFinishedCommandBufferId) @@ -202,6 +310,6 @@ public: private: class VulkanRenderer* m_vkr; VKRSynchronizedRingAllocator m_stagingBuffer; - VKRSynchronizedRingAllocator m_indexBuffer; + VKRSynchronizedHeapAllocator m_indexBuffer; VKRSynchronizedRingAllocator m_vertexStrideMetalBuffer; }; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp index 98959b2c..6428b42a 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.cpp @@ -681,6 +681,9 @@ VulkanRenderer::~VulkanRenderer() vkDestroyDebugUtilsMessengerEXT(m_instance, m_debugCallback, nullptr); } + // destroy memory manager + delete memoryManager; + // destroy instance, devices if (m_instance != VK_NULL_HANDLE) { @@ -692,9 +695,6 @@ VulkanRenderer::~VulkanRenderer() vkDestroyInstance(m_instance, nullptr); } - // destroy memory manager - delete memoryManager; - // crashes? //glslang::FinalizeProcess(); } @@ -3701,7 +3701,7 @@ void VulkanRenderer::bufferCache_copyStreamoutToMainBuffer(uint32 srcOffset, uin void VulkanRenderer::AppendOverlayDebugInfo() { - ImGui::Text("--- Vulkan info ---"); + ImGui::Text("--- Vulkan debug info ---"); ImGui::Text("GfxPipelines %u", performanceMonitor.vk.numGraphicPipelines.get()); ImGui::Text("DescriptorSets %u", performanceMonitor.vk.numDescriptorSets.get()); ImGui::Text("DS ImgSamplers %u", performanceMonitor.vk.numDescriptorSamplerTextures.get()); @@ -3719,7 +3719,7 @@ void VulkanRenderer::AppendOverlayDebugInfo() ImGui::Text("BeginRP/f %u", performanceMonitor.vk.numBeginRenderpassPerFrame.get()); ImGui::Text("Barriers/f %u", performanceMonitor.vk.numDrawBarriersPerFrame.get()); - ImGui::Text("--- Cache info ---"); + ImGui::Text("--- Cache debug info ---"); uint32 bufferCacheHeapSize = 0; uint32 bufferCacheAllocationSize = 0; @@ -3739,7 +3739,7 @@ void VulkanRenderer::AppendOverlayDebugInfo() ImGui::SameLine(60.0f); ImGui::Text("%06uKB / %06uKB Buffers: %u", ((uint32)(totalSize - freeSize) + 1023) / 1024, ((uint32)totalSize + 1023) / 1024, (uint32)numBuffers); - memoryManager->getIndexAllocator().GetStats(numBuffers, totalSize, freeSize); + memoryManager->GetIndexAllocator().GetStats(numBuffers, totalSize, freeSize); ImGui::Text("Index"); ImGui::SameLine(60.0f); ImGui::Text("%06uKB / %06uKB Buffers: %u", ((uint32)(totalSize - freeSize) + 1023) / 1024, ((uint32)totalSize + 1023) / 1024, (uint32)numBuffers); diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h index 867647a3..01b3def3 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h @@ -327,8 +327,9 @@ public: RendererShader* shader_create(RendererShader::ShaderType type, uint64 baseHash, uint64 auxHash, const std::string& source, bool isGameShader, bool isGfxPackShader) override; - void* indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) override; - void indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) override; + IndexAllocation indexData_reserveIndexMemory(uint32 size) override; + void indexData_releaseIndexMemory(IndexAllocation& allocation) override; + void indexData_uploadIndexMemory(IndexAllocation& allocation) override; // externally callable void GetTextureFormatInfoVK(Latte::E_GX2SURFFMT format, bool isDepth, Latte::E_DIM dim, sint32 width, sint32 height, FormatInfoVK* formatInfoOut); diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index 036ae895..9a57eeab 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -357,18 +357,20 @@ PipelineInfo* VulkanRenderer::draw_getOrCreateGraphicsPipeline(uint32 indexCount return draw_createGraphicsPipeline(indexCount); } -void* VulkanRenderer::indexData_reserveIndexMemory(uint32 size, uint32& offset, uint32& bufferIndex) +Renderer::IndexAllocation VulkanRenderer::indexData_reserveIndexMemory(uint32 size) { - auto& indexAllocator = this->memoryManager->getIndexAllocator(); - auto resv = indexAllocator.AllocateBufferMemory(size, 32); - offset = resv.bufferOffset; - bufferIndex = resv.bufferIndex; - return resv.memPtr; + VKRSynchronizedHeapAllocator::AllocatorReservation* resv = memoryManager->GetIndexAllocator().AllocateBufferMemory(size, 32); + return { resv->memPtr, resv }; } -void VulkanRenderer::indexData_uploadIndexMemory(uint32 bufferIndex, uint32 offset, uint32 size) +void VulkanRenderer::indexData_releaseIndexMemory(IndexAllocation& allocation) { - // does nothing since the index buffer memory is coherent + memoryManager->GetIndexAllocator().FreeReservation((VKRSynchronizedHeapAllocator::AllocatorReservation*)allocation.rendererInternal); +} + +void VulkanRenderer::indexData_uploadIndexMemory(IndexAllocation& allocation) +{ + memoryManager->GetIndexAllocator().FlushReservation((VKRSynchronizedHeapAllocator::AllocatorReservation*)allocation.rendererInternal); } float s_vkUniformData[512 * 4]; @@ -1413,14 +1415,15 @@ void VulkanRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 uint32 hostIndexCount; uint32 indexMin = 0; uint32 indexMax = 0; - uint32 indexBufferOffset = 0; - uint32 indexBufferIndex = 0; - LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexBufferOffset, indexBufferIndex); - + Renderer::IndexAllocation indexAllocation; + LatteIndices_decode(memory_getPointerFromVirtualOffset(indexDataMPTR), indexType, count, primitiveMode, indexMin, indexMax, hostIndexType, hostIndexCount, indexAllocation); + VKRSynchronizedHeapAllocator::AllocatorReservation* indexReservation = (VKRSynchronizedHeapAllocator::AllocatorReservation*)indexAllocation.rendererInternal; // update index binding bool isPrevIndexData = false; if (hostIndexType != INDEX_TYPE::NONE) { + uint32 indexBufferIndex = indexReservation->bufferIndex; + uint32 indexBufferOffset = indexReservation->bufferOffset; if (m_state.activeIndexBufferOffset != indexBufferOffset || m_state.activeIndexBufferIndex != indexBufferIndex || m_state.activeIndexType != hostIndexType) { m_state.activeIndexType = hostIndexType; @@ -1433,7 +1436,7 @@ void VulkanRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32 vkType = VK_INDEX_TYPE_UINT32; else cemu_assert(false); - vkCmdBindIndexBuffer(m_state.currentCommandBuffer, memoryManager->getIndexAllocator().GetBufferByIndex(indexBufferIndex), indexBufferOffset, vkType); + vkCmdBindIndexBuffer(m_state.currentCommandBuffer, indexReservation->vkBuffer, indexBufferOffset, vkType); } else isPrevIndexData = true; diff --git a/src/Cafe/OS/libs/nsyshid/AttachDefaultBackends.cpp b/src/Cafe/OS/libs/nsyshid/AttachDefaultBackends.cpp index fc8e496c..67eb0240 100644 --- a/src/Cafe/OS/libs/nsyshid/AttachDefaultBackends.cpp +++ b/src/Cafe/OS/libs/nsyshid/AttachDefaultBackends.cpp @@ -1,24 +1,12 @@ #include "nsyshid.h" #include "Backend.h" #include "BackendEmulated.h" - -#if NSYSHID_ENABLE_BACKEND_LIBUSB - #include "BackendLibusb.h" -#endif - -#if NSYSHID_ENABLE_BACKEND_WINDOWS_HID - -#include "BackendWindowsHID.h" - -#endif - namespace nsyshid::backend { void AttachDefaultBackends() { -#if NSYSHID_ENABLE_BACKEND_LIBUSB // add libusb backend { auto backendLibusb = std::make_shared(); @@ -27,17 +15,6 @@ namespace nsyshid::backend AttachBackend(backendLibusb); } } -#endif // NSYSHID_ENABLE_BACKEND_LIBUSB -#if NSYSHID_ENABLE_BACKEND_WINDOWS_HID - // add windows hid backend - { - auto backendWindowsHID = std::make_shared(); - if (backendWindowsHID->IsInitialisedOk()) - { - AttachBackend(backendWindowsHID); - } - } -#endif // NSYSHID_ENABLE_BACKEND_WINDOWS_HID // add emulated backend { auto backendEmulated = std::make_shared(); diff --git a/src/Cafe/OS/libs/nsyshid/Backend.h b/src/Cafe/OS/libs/nsyshid/Backend.h index 12362773..67dad4fe 100644 --- a/src/Cafe/OS/libs/nsyshid/Backend.h +++ b/src/Cafe/OS/libs/nsyshid/Backend.h @@ -1,5 +1,4 @@ -#ifndef CEMU_NSYSHID_BACKEND_H -#define CEMU_NSYSHID_BACKEND_H +#pragma once #include #include @@ -26,9 +25,9 @@ namespace nsyshid struct TransferCommand { uint8* data; - sint32 length; + uint32 length; - TransferCommand(uint8* data, sint32 length) + TransferCommand(uint8* data, uint32 length) : data(data), length(length) { } @@ -39,7 +38,7 @@ namespace nsyshid { sint32 bytesRead; - ReadMessage(uint8* data, sint32 length, sint32 bytesRead) + ReadMessage(uint8* data, uint32 length, sint32 bytesRead) : bytesRead(bytesRead), TransferCommand(data, length) { } @@ -50,7 +49,7 @@ namespace nsyshid { sint32 bytesWritten; - WriteMessage(uint8* data, sint32 length, sint32 bytesWritten) + WriteMessage(uint8* data, uint32 length, sint32 bytesWritten) : bytesWritten(bytesWritten), TransferCommand(data, length) { } @@ -59,14 +58,11 @@ namespace nsyshid struct ReportMessage final : TransferCommand { - uint8* reportData; - sint32 length; - uint8* originalData; - sint32 originalLength; + uint8 reportType; + uint8 reportId; - ReportMessage(uint8* reportData, sint32 length, uint8* originalData, sint32 originalLength) - : reportData(reportData), length(length), originalData(originalData), - originalLength(originalLength), TransferCommand(reportData, length) + ReportMessage(uint8 reportType, uint8 reportId, uint8* data, uint32 length) + : reportType(reportType), reportId(reportId), TransferCommand(data, length) { } using TransferCommand::TransferCommand; @@ -77,7 +73,8 @@ namespace nsyshid static_assert(offsetof(HID_t, ifIndex) == 0xC, ""); static_assert(offsetof(HID_t, protocol) == 0xE, ""); - class Device { + class Device + { public: Device() = delete; @@ -131,16 +128,21 @@ namespace nsyshid virtual bool GetDescriptor(uint8 descType, uint8 descIndex, - uint8 lang, + uint16 lang, uint8* output, uint32 outputMaxLength) = 0; + virtual bool SetIdle(uint8 ifIndex, + uint8 reportId, + uint8 duration) = 0; + virtual bool SetProtocol(uint8 ifIndex, uint8 protocol) = 0; virtual bool SetReport(ReportMessage* message) = 0; }; - class Backend { + class Backend + { public: Backend(); @@ -188,5 +190,3 @@ namespace nsyshid void AttachDefaultBackends(); } } // namespace nsyshid - -#endif // CEMU_NSYSHID_BACKEND_H diff --git a/src/Cafe/OS/libs/nsyshid/BackendLibusb.cpp b/src/Cafe/OS/libs/nsyshid/BackendLibusb.cpp index ab355136..b5dd0e0f 100644 --- a/src/Cafe/OS/libs/nsyshid/BackendLibusb.cpp +++ b/src/Cafe/OS/libs/nsyshid/BackendLibusb.cpp @@ -1,7 +1,5 @@ #include "BackendLibusb.h" -#if NSYSHID_ENABLE_BACKEND_LIBUSB - namespace nsyshid::backend::libusb { BackendLibusb::BackendLibusb() @@ -16,7 +14,7 @@ namespace nsyshid::backend::libusb { m_ctx = nullptr; cemuLog_logDebug(LogType::Force, "nsyshid::BackendLibusb: failed to initialize libusb, return code: {}", - m_initReturnCode); + m_initReturnCode); return; } @@ -35,8 +33,8 @@ namespace nsyshid::backend::libusb if (ret != LIBUSB_SUCCESS) { cemuLog_logDebug(LogType::Force, - "nsyshid::BackendLibusb: failed to register hotplug callback with return code {}", - ret); + "nsyshid::BackendLibusb: failed to register hotplug callback with return code {}", + ret); } else { @@ -53,8 +51,8 @@ namespace nsyshid::backend::libusb if (ret != 0) { cemuLog_logDebug(LogType::Force, - "nsyshid::BackendLibusb: hotplug thread: error handling events: {}", - ret); + "nsyshid::BackendLibusb: hotplug thread: error handling events: {}", + ret); std::this_thread::sleep_for(std::chrono::milliseconds(1000)); } } @@ -139,8 +137,8 @@ namespace nsyshid::backend::libusb case LIBUSB_HOTPLUG_EVENT_DEVICE_ARRIVED: { cemuLog_logDebug(LogType::Force, "nsyshid::BackendLibusb::OnHotplug(): device arrived: {:04x}:{:04x}", - desc.idVendor, - desc.idProduct); + desc.idVendor, + desc.idProduct); auto device = CheckAndCreateDevice(dev); if (device != nullptr) { @@ -167,8 +165,8 @@ namespace nsyshid::backend::libusb case LIBUSB_HOTPLUG_EVENT_DEVICE_LEFT: { cemuLog_logDebug(LogType::Force, "nsyshid::BackendLibusb::OnHotplug(): device left: {:04x}:{:04x}", - desc.idVendor, - desc.idProduct); + desc.idVendor, + desc.idProduct); auto device = FindLibusbDevice(dev); if (device != nullptr) { @@ -204,7 +202,7 @@ namespace nsyshid::backend::libusb if (ret < 0) { cemuLog_logDebug(LogType::Force, - "nsyshid::BackendLibusb::FindLibusbDevice(): failed to get device descriptor"); + "nsyshid::BackendLibusb::FindLibusbDevice(): failed to get device descriptor"); return nullptr; } uint8 busNumber = libusb_get_bus_number(dev); @@ -269,12 +267,12 @@ namespace nsyshid::backend::libusb if (desc.idVendor == 0x0e6f && desc.idProduct == 0x0241) { cemuLog_logDebug(LogType::Force, - "nsyshid::BackendLibusb::CheckAndCreateDevice(): lego dimensions portal detected"); + "nsyshid::BackendLibusb::CheckAndCreateDevice(): lego dimensions portal detected"); } auto device = std::make_shared(m_ctx, desc.idVendor, desc.idProduct, - 1, + 0, 2, 0, libusb_get_bus_number(dev), @@ -446,12 +444,13 @@ namespace nsyshid::backend::libusb } this->m_handleInUseCounter = 0; } + + int ret = ClaimAllInterfaces(0); + + if (ret != 0) { - int ret = ClaimAllInterfaces(0); - if (ret != 0) - { - cemuLog_logDebug(LogType::Force, "nsyshid::DeviceLibusb::open(): cannot claim interface"); - } + cemuLog_log(LogType::Force, "nsyshid::DeviceLibusb::open(): cannot claim interface for config 0"); + return false; } } @@ -475,7 +474,7 @@ namespace nsyshid::backend::libusb { m_handleInUseCounterDecremented.wait(lock); } - libusb_release_interface(handle, 0); + ReleaseAllInterfacesForCurrentConfig(); libusb_close(handle); m_handleInUseCounter = -1; m_handleInUseCounterDecremented.notify_all(); @@ -493,21 +492,26 @@ namespace nsyshid::backend::libusb if (!handleLock->IsValid()) { cemuLog_logDebug(LogType::Force, - "nsyshid::DeviceLibusb::read(): cannot read from a non-opened device\n"); + "nsyshid::DeviceLibusb::read(): cannot read from a non-opened device\n"); return ReadResult::Error; } + for (int i = 0; i < m_config_descriptors.size(); i++) + { + ClaimAllInterfaces(i); + } + const unsigned int timeout = 50; int actualLength = 0; int ret = 0; do { - ret = libusb_bulk_transfer(handleLock->GetHandle(), - this->m_libusbEndpointIn, - message->data, - message->length, - &actualLength, - timeout); + ret = libusb_interrupt_transfer(handleLock->GetHandle(), + this->m_libusbEndpointIn, + message->data, + message->length, + &actualLength, + timeout); } while (ret == LIBUSB_ERROR_TIMEOUT && actualLength == 0 && IsOpened()); @@ -521,8 +525,8 @@ namespace nsyshid::backend::libusb return ReadResult::Success; } cemuLog_logDebug(LogType::Force, - "nsyshid::DeviceLibusb::read(): failed with error code: {}", - ret); + "nsyshid::DeviceLibusb::read(): failed at endpoint 0x{:02x} with error message: {}", this->m_libusbEndpointIn, + libusb_error_name(ret)); return ReadResult::Error; } @@ -532,18 +536,23 @@ namespace nsyshid::backend::libusb if (!handleLock->IsValid()) { cemuLog_logDebug(LogType::Force, - "nsyshid::DeviceLibusb::write(): cannot write to a non-opened device\n"); + "nsyshid::DeviceLibusb::write(): cannot write to a non-opened device\n"); return WriteResult::Error; } + for (int i = 0; i < m_config_descriptors.size(); i++) + { + ClaimAllInterfaces(i); + } + message->bytesWritten = 0; int actualLength = 0; - int ret = libusb_bulk_transfer(handleLock->GetHandle(), - this->m_libusbEndpointOut, - message->data, - message->length, - &actualLength, - 0); + int ret = libusb_interrupt_transfer(handleLock->GetHandle(), + this->m_libusbEndpointOut, + message->data, + message->length, + &actualLength, + 0); if (ret == 0) { @@ -556,14 +565,14 @@ namespace nsyshid::backend::libusb return WriteResult::Success; } cemuLog_logDebug(LogType::Force, - "nsyshid::DeviceLibusb::write(): failed with error code: {}", - ret); + "nsyshid::DeviceLibusb::write(): failed with error code: {}", + ret); return WriteResult::Error; } bool DeviceLibusb::GetDescriptor(uint8 descType, uint8 descIndex, - uint8 lang, + uint16 lang, uint8* output, uint32 outputMaxLength) { @@ -579,7 +588,6 @@ namespace nsyshid::backend::libusb struct libusb_config_descriptor* conf = nullptr; libusb_device* dev = libusb_get_device(handleLock->GetHandle()); int ret = libusb_get_active_config_descriptor(dev, &conf); - if (ret == 0) { std::vector configurationDescriptor(conf->wTotalLength); @@ -656,7 +664,6 @@ namespace nsyshid::backend::libusb extraReadPointer += bLength; } } - for (int endpointIndex = 0; endpointIndex < altsetting.bNumEndpoints; endpointIndex++) { // endpoint descriptor @@ -681,24 +688,61 @@ namespace nsyshid::backend::libusb uint32 bytesWritten = currentWritePtr - &configurationDescriptor[0]; libusb_free_config_descriptor(conf); cemu_assert_debug(bytesWritten <= conf->wTotalLength); - memcpy(output, &configurationDescriptor[0], std::min(outputMaxLength, bytesWritten)); return true; } - else - { - cemuLog_logDebug(LogType::Force, - "nsyshid::DeviceLibusb::getDescriptor(): failed to get config descriptor with error code: {}", - ret); - return false; - } } else { - cemu_assert_unimplemented(); + uint16 wValue = uint16(descType) << 8 | uint16(descIndex); + // HID Get_Descriptor requests are handled via libusb_control_transfer + int ret = libusb_control_transfer(handleLock->GetHandle(), + LIBUSB_REQUEST_TYPE_STANDARD | LIBUSB_ENDPOINT_IN, + LIBUSB_REQUEST_GET_DESCRIPTOR, + wValue, + lang, + output, + outputMaxLength, + 0); + if (ret != outputMaxLength) + { + cemuLog_logDebug(LogType::Force, "nsyshid::DeviceLibusb::GetDescriptor(): Control Transfer Failed: {}", libusb_error_name(ret)); + return false; + } } - return false; + return true; + } + + bool DeviceLibusb::SetIdle(uint8 ifIndex, + uint8 reportId, + uint8 duration) + { + auto handleLock = AquireHandleLock(); + if (!handleLock->IsValid()) + { + cemuLog_log(LogType::Force, "nsyshid::DeviceLibusb::SetIdle(): device is not opened"); + return false; + } + + uint16 wValue = uint16(duration) << 8 | uint16(reportId); + + // HID Set_Idle requests are handled via libusb_control_transfer + int ret = libusb_control_transfer(handleLock->GetHandle(), + LIBUSB_REQUEST_TYPE_CLASS | LIBUSB_RECIPIENT_INTERFACE | LIBUSB_ENDPOINT_OUT, + HID_CLASS_SET_IDLE, // Defined in HID Class Specific Requests (7.2) + wValue, + ifIndex, + nullptr, + 0, + 0); + + if (ret != 0) + { + cemuLog_logDebug(LogType::Force, "nsyshid::DeviceLibusb::SetIdle(): Control Transfer Failed: {}", libusb_error_name(ret)); + return false; + } + return true; } template @@ -767,18 +811,22 @@ namespace nsyshid::backend::libusb cemuLog_logDebug(LogType::Force, "nsyshid::DeviceLibusb::SetProtocol(): device is not opened"); return false; } - if (m_interfaceIndex != ifIndex) - m_interfaceIndex = ifIndex; - ReleaseAllInterfacesForCurrentConfig(); - int ret = libusb_set_configuration(AquireHandleLock()->GetHandle(), protocol); - if (ret == LIBUSB_SUCCESS) - ret = ClaimAllInterfaces(protocol); + int ret = libusb_control_transfer(handleLock->GetHandle(), + LIBUSB_REQUEST_TYPE_CLASS | LIBUSB_RECIPIENT_INTERFACE | LIBUSB_ENDPOINT_OUT, + HID_CLASS_SET_PROTOCOL, // Defined in HID Class Specific Requests (7.2) + protocol, + ifIndex, + nullptr, + 0, + 0); - if (ret == LIBUSB_SUCCESS) - return true; - - return false; + if (ret != 0) + { + cemuLog_logDebug(LogType::Force, "nsyshid::DeviceLibusb::SetProtocol(): Control Transfer Failed: {}", libusb_error_name(ret)); + return false; + } + return true; } bool DeviceLibusb::SetReport(ReportMessage* message) @@ -790,18 +838,20 @@ namespace nsyshid::backend::libusb return false; } + uint16 wValue = uint16(message->reportType) << 8 | uint16(message->reportId); + int ret = libusb_control_transfer(handleLock->GetHandle(), LIBUSB_REQUEST_TYPE_CLASS | LIBUSB_RECIPIENT_INTERFACE | LIBUSB_ENDPOINT_OUT, - LIBUSB_REQUEST_SET_CONFIGURATION, - 512, - 0, - message->originalData, - message->originalLength, + HID_CLASS_SET_REPORT, // Defined in HID Class Specific Requests (7.2) + wValue, + m_interfaceIndex, + message->data, + uint16(message->length & 0xFFFF), 0); - if (ret != message->originalLength) + if (ret != message->length) { - cemuLog_logDebug(LogType::Force, "nsyshid::DeviceLibusb::SetReport(): Control Transfer Failed: {}", libusb_error_name(ret)); + cemuLog_logDebug(LogType::Force, "nsyshid::DeviceLibusb::SetReport(): Control Transfer Failed at interface {} : {}", m_interfaceIndex, libusb_error_name(ret)); return false; } return true; @@ -854,5 +904,3 @@ namespace nsyshid::backend::libusb return m_handle; } } // namespace nsyshid::backend::libusb - -#endif // NSYSHID_ENABLE_BACKEND_LIBUSB diff --git a/src/Cafe/OS/libs/nsyshid/BackendLibusb.h b/src/Cafe/OS/libs/nsyshid/BackendLibusb.h index a7b23769..6b2d8e1a 100644 --- a/src/Cafe/OS/libs/nsyshid/BackendLibusb.h +++ b/src/Cafe/OS/libs/nsyshid/BackendLibusb.h @@ -1,15 +1,20 @@ -#ifndef CEMU_NSYSHID_BACKEND_LIBUSB_H -#define CEMU_NSYSHID_BACKEND_LIBUSB_H - #include "nsyshid.h" -#if NSYSHID_ENABLE_BACKEND_LIBUSB - #include #include "Backend.h" namespace nsyshid::backend::libusb { + enum : uint8 + { + HID_CLASS_GET_REPORT = 0x01, + HID_CLASS_GET_IDLE = 0x02, + HID_CLASS_GET_PROTOCOL = 0x03, + HID_CLASS_SET_REPORT = 0x09, + HID_CLASS_SET_IDLE = 0x0A, + HID_CLASS_SET_PROTOCOL = 0x0B + }; + class BackendLibusb : public nsyshid::Backend { public: BackendLibusb(); @@ -75,10 +80,14 @@ namespace nsyshid::backend::libusb bool GetDescriptor(uint8 descType, uint8 descIndex, - uint8 lang, + uint16 lang, uint8* output, uint32 outputMaxLength) override; + bool SetIdle(uint8 ifIndex, + uint8 reportId, + uint8 duration) override; + bool SetProtocol(uint8 ifIndex, uint8 protocol) override; int ClaimAllInterfaces(uint8 config_num); @@ -134,7 +143,3 @@ namespace nsyshid::backend::libusb std::unique_ptr AquireHandleLock(); }; } // namespace nsyshid::backend::libusb - -#endif // NSYSHID_ENABLE_BACKEND_LIBUSB - -#endif // CEMU_NSYSHID_BACKEND_LIBUSB_H diff --git a/src/Cafe/OS/libs/nsyshid/BackendWindowsHID.cpp b/src/Cafe/OS/libs/nsyshid/BackendWindowsHID.cpp deleted file mode 100644 index 267111b2..00000000 --- a/src/Cafe/OS/libs/nsyshid/BackendWindowsHID.cpp +++ /dev/null @@ -1,444 +0,0 @@ -#include "BackendWindowsHID.h" - -#if NSYSHID_ENABLE_BACKEND_WINDOWS_HID - -#include -#include -#include - -#pragma comment(lib, "Setupapi.lib") -#pragma comment(lib, "hid.lib") - -DEFINE_GUID(GUID_DEVINTERFACE_HID, - 0x4D1E55B2L, 0xF16F, 0x11CF, 0x88, 0xCB, 0x00, 0x11, 0x11, 0x00, 0x00, 0x30); - -namespace nsyshid::backend::windows -{ - BackendWindowsHID::BackendWindowsHID() - { - } - - void BackendWindowsHID::AttachVisibleDevices() - { - // add all currently connected devices - HDEVINFO hDevInfo; - SP_DEVICE_INTERFACE_DATA DevIntfData; - PSP_DEVICE_INTERFACE_DETAIL_DATA DevIntfDetailData; - SP_DEVINFO_DATA DevData; - - DWORD dwSize, dwMemberIdx; - - hDevInfo = SetupDiGetClassDevs(&GUID_DEVINTERFACE_HID, NULL, 0, DIGCF_DEVICEINTERFACE | DIGCF_PRESENT); - - if (hDevInfo != INVALID_HANDLE_VALUE) - { - DevIntfData.cbSize = sizeof(SP_DEVICE_INTERFACE_DATA); - dwMemberIdx = 0; - - SetupDiEnumDeviceInterfaces(hDevInfo, NULL, &GUID_DEVINTERFACE_HID, - dwMemberIdx, &DevIntfData); - - while (GetLastError() != ERROR_NO_MORE_ITEMS) - { - DevData.cbSize = sizeof(DevData); - SetupDiGetDeviceInterfaceDetail( - hDevInfo, &DevIntfData, NULL, 0, &dwSize, NULL); - - DevIntfDetailData = (PSP_DEVICE_INTERFACE_DETAIL_DATA)HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, - dwSize); - DevIntfDetailData->cbSize = sizeof(SP_DEVICE_INTERFACE_DETAIL_DATA); - - if (SetupDiGetDeviceInterfaceDetail(hDevInfo, &DevIntfData, - DevIntfDetailData, dwSize, &dwSize, &DevData)) - { - HANDLE hHIDDevice = OpenDevice(DevIntfDetailData->DevicePath); - if (hHIDDevice != INVALID_HANDLE_VALUE) - { - auto device = CheckAndCreateDevice(DevIntfDetailData->DevicePath, hHIDDevice); - if (device != nullptr) - { - if (IsDeviceWhitelisted(device->m_vendorId, device->m_productId)) - { - if (!AttachDevice(device)) - { - cemuLog_log(LogType::Force, - "nsyshid::BackendWindowsHID: failed to attach device: {:04x}:{:04x}", - device->m_vendorId, - device->m_productId); - } - } - } - CloseHandle(hHIDDevice); - } - } - HeapFree(GetProcessHeap(), 0, DevIntfDetailData); - // next - SetupDiEnumDeviceInterfaces(hDevInfo, NULL, &GUID_DEVINTERFACE_HID, ++dwMemberIdx, &DevIntfData); - } - SetupDiDestroyDeviceInfoList(hDevInfo); - } - } - - BackendWindowsHID::~BackendWindowsHID() - { - } - - bool BackendWindowsHID::IsInitialisedOk() - { - return true; - } - - std::shared_ptr BackendWindowsHID::CheckAndCreateDevice(wchar_t* devicePath, HANDLE hDevice) - { - HIDD_ATTRIBUTES hidAttr; - hidAttr.Size = sizeof(HIDD_ATTRIBUTES); - if (HidD_GetAttributes(hDevice, &hidAttr) == FALSE) - return nullptr; - - auto device = std::make_shared(hidAttr.VendorID, - hidAttr.ProductID, - 1, - 2, - 0, - _wcsdup(devicePath)); - // get additional device info - sint32 maxPacketInputLength = -1; - sint32 maxPacketOutputLength = -1; - PHIDP_PREPARSED_DATA ppData = nullptr; - if (HidD_GetPreparsedData(hDevice, &ppData)) - { - HIDP_CAPS caps; - if (HidP_GetCaps(ppData, &caps) == HIDP_STATUS_SUCCESS) - { - // length includes the report id byte - maxPacketInputLength = caps.InputReportByteLength - 1; - maxPacketOutputLength = caps.OutputReportByteLength - 1; - } - HidD_FreePreparsedData(ppData); - } - if (maxPacketInputLength <= 0 || maxPacketInputLength >= 0xF000) - { - cemuLog_logDebug(LogType::Force, "HID: Input packet length not available or out of range (length = {})", maxPacketInputLength); - maxPacketInputLength = 0x20; - } - if (maxPacketOutputLength <= 0 || maxPacketOutputLength >= 0xF000) - { - cemuLog_logDebug(LogType::Force, "HID: Output packet length not available or out of range (length = {})", maxPacketOutputLength); - maxPacketOutputLength = 0x20; - } - - device->m_maxPacketSizeRX = maxPacketInputLength; - device->m_maxPacketSizeTX = maxPacketOutputLength; - - return device; - } - - DeviceWindowsHID::DeviceWindowsHID(uint16 vendorId, - uint16 productId, - uint8 interfaceIndex, - uint8 interfaceSubClass, - uint8 protocol, - wchar_t* devicePath) - : Device(vendorId, - productId, - interfaceIndex, - interfaceSubClass, - protocol), - m_devicePath(devicePath), - m_hFile(INVALID_HANDLE_VALUE) - { - } - - DeviceWindowsHID::~DeviceWindowsHID() - { - if (m_hFile != INVALID_HANDLE_VALUE) - { - CloseHandle(m_hFile); - m_hFile = INVALID_HANDLE_VALUE; - } - } - - bool DeviceWindowsHID::Open() - { - if (IsOpened()) - { - return true; - } - m_hFile = OpenDevice(m_devicePath); - if (m_hFile == INVALID_HANDLE_VALUE) - { - return false; - } - HidD_SetNumInputBuffers(m_hFile, 2); // don't cache too many reports - return true; - } - - void DeviceWindowsHID::Close() - { - if (m_hFile != INVALID_HANDLE_VALUE) - { - CloseHandle(m_hFile); - m_hFile = INVALID_HANDLE_VALUE; - } - } - - bool DeviceWindowsHID::IsOpened() - { - return m_hFile != INVALID_HANDLE_VALUE; - } - - Device::ReadResult DeviceWindowsHID::Read(ReadMessage* message) - { - message->bytesRead = 0; - DWORD bt; - OVERLAPPED ovlp = {0}; - ovlp.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); - - uint8* tempBuffer = (uint8*)malloc(message->length + 1); - sint32 transferLength = 0; // minus report byte - - _debugPrintHex("HID_READ_BEFORE", message->data, message->length); - - cemuLog_logDebug(LogType::Force, "HidRead Begin (Length 0x{:08x})", message->length); - BOOL readResult = ReadFile(this->m_hFile, tempBuffer, message->length + 1, &bt, &ovlp); - if (readResult != FALSE) - { - // sometimes we get the result immediately - if (bt == 0) - transferLength = 0; - else - transferLength = bt - 1; - cemuLog_logDebug(LogType::Force, "HidRead Result received immediately (error 0x{:08x}) Length 0x{:08x}", - GetLastError(), transferLength); - } - else - { - // wait for result - cemuLog_logDebug(LogType::Force, "HidRead WaitForResult (error 0x{:08x})", GetLastError()); - // async hid read is never supposed to return unless there is a response? Lego Dimensions stops HIDRead calls as soon as one of them fails with a non-zero error (which includes time out) - DWORD r = WaitForSingleObject(ovlp.hEvent, 2000 * 100); - if (r == WAIT_TIMEOUT) - { - cemuLog_logDebug(LogType::Force, "HidRead internal timeout (error 0x{:08x})", GetLastError()); - // return -108 in case of timeout - free(tempBuffer); - CloseHandle(ovlp.hEvent); - return ReadResult::ErrorTimeout; - } - - cemuLog_logDebug(LogType::Force, "HidRead WaitHalfComplete"); - GetOverlappedResult(this->m_hFile, &ovlp, &bt, false); - if (bt == 0) - transferLength = 0; - else - transferLength = bt - 1; - cemuLog_logDebug(LogType::Force, "HidRead WaitComplete Length: 0x{:08x}", transferLength); - } - sint32 returnCode = 0; - ReadResult result = ReadResult::Success; - if (bt != 0) - { - memcpy(message->data, tempBuffer + 1, transferLength); - sint32 hidReadLength = transferLength; - - char debugOutput[1024] = {0}; - for (sint32 i = 0; i < transferLength; i++) - { - sprintf(debugOutput + i * 3, "%02x ", tempBuffer[1 + i]); - } - cemuLog_logDebug(LogType::Force, "HIDRead data: {}", debugOutput); - - message->bytesRead = transferLength; - result = ReadResult::Success; - } - else - { - cemuLog_log(LogType::Force, "Failed HID read"); - result = ReadResult::Error; - } - free(tempBuffer); - CloseHandle(ovlp.hEvent); - return result; - } - - Device::WriteResult DeviceWindowsHID::Write(WriteMessage* message) - { - message->bytesWritten = 0; - DWORD bt; - OVERLAPPED ovlp = {0}; - ovlp.hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); - - uint8* tempBuffer = (uint8*)malloc(message->length + 1); - memcpy(tempBuffer + 1, message->data, message->length); - tempBuffer[0] = 0; // report byte? - - cemuLog_logDebug(LogType::Force, "HidWrite Begin (Length 0x{:08x})", message->length); - BOOL writeResult = WriteFile(this->m_hFile, tempBuffer, message->length + 1, &bt, &ovlp); - if (writeResult != FALSE) - { - // sometimes we get the result immediately - cemuLog_logDebug(LogType::Force, "HidWrite Result received immediately (error 0x{:08x}) Length 0x{:08x}", - GetLastError()); - } - else - { - // wait for result - cemuLog_logDebug(LogType::Force, "HidWrite WaitForResult (error 0x{:08x})", GetLastError()); - // todo - check for error type - DWORD r = WaitForSingleObject(ovlp.hEvent, 2000); - if (r == WAIT_TIMEOUT) - { - cemuLog_logDebug(LogType::Force, "HidWrite internal timeout"); - // return -108 in case of timeout - free(tempBuffer); - CloseHandle(ovlp.hEvent); - return WriteResult::ErrorTimeout; - } - - cemuLog_logDebug(LogType::Force, "HidWrite WaitHalfComplete"); - GetOverlappedResult(this->m_hFile, &ovlp, &bt, false); - cemuLog_logDebug(LogType::Force, "HidWrite WaitComplete"); - } - - free(tempBuffer); - CloseHandle(ovlp.hEvent); - - if (bt != 0) - { - message->bytesWritten = message->length; - return WriteResult::Success; - } - return WriteResult::Error; - } - - bool DeviceWindowsHID::GetDescriptor(uint8 descType, - uint8 descIndex, - uint8 lang, - uint8* output, - uint32 outputMaxLength) - { - if (!IsOpened()) - { - cemuLog_logDebug(LogType::Force, "nsyshid::DeviceWindowsHID::getDescriptor(): device is not opened"); - return false; - } - if (descType == 0x02) - { - uint8 configurationDescriptor[0x29]; - - uint8* currentWritePtr; - - // configuration descriptor - currentWritePtr = configurationDescriptor + 0; - *(uint8*)(currentWritePtr + 0) = 9; // bLength - *(uint8*)(currentWritePtr + 1) = 2; // bDescriptorType - *(uint16be*)(currentWritePtr + 2) = 0x0029; // wTotalLength - *(uint8*)(currentWritePtr + 4) = 1; // bNumInterfaces - *(uint8*)(currentWritePtr + 5) = 1; // bConfigurationValue - *(uint8*)(currentWritePtr + 6) = 0; // iConfiguration - *(uint8*)(currentWritePtr + 7) = 0x80; // bmAttributes - *(uint8*)(currentWritePtr + 8) = 0xFA; // MaxPower - currentWritePtr = currentWritePtr + 9; - // configuration descriptor - *(uint8*)(currentWritePtr + 0) = 9; // bLength - *(uint8*)(currentWritePtr + 1) = 0x04; // bDescriptorType - *(uint8*)(currentWritePtr + 2) = 0; // bInterfaceNumber - *(uint8*)(currentWritePtr + 3) = 0; // bAlternateSetting - *(uint8*)(currentWritePtr + 4) = 2; // bNumEndpoints - *(uint8*)(currentWritePtr + 5) = 3; // bInterfaceClass - *(uint8*)(currentWritePtr + 6) = 0; // bInterfaceSubClass - *(uint8*)(currentWritePtr + 7) = 0; // bInterfaceProtocol - *(uint8*)(currentWritePtr + 8) = 0; // iInterface - currentWritePtr = currentWritePtr + 9; - // configuration descriptor - *(uint8*)(currentWritePtr + 0) = 9; // bLength - *(uint8*)(currentWritePtr + 1) = 0x21; // bDescriptorType - *(uint16be*)(currentWritePtr + 2) = 0x0111; // bcdHID - *(uint8*)(currentWritePtr + 4) = 0x00; // bCountryCode - *(uint8*)(currentWritePtr + 5) = 0x01; // bNumDescriptors - *(uint8*)(currentWritePtr + 6) = 0x22; // bDescriptorType - *(uint16be*)(currentWritePtr + 7) = 0x001D; // wDescriptorLength - currentWritePtr = currentWritePtr + 9; - // endpoint descriptor 1 - *(uint8*)(currentWritePtr + 0) = 7; // bLength - *(uint8*)(currentWritePtr + 1) = 0x05; // bDescriptorType - *(uint8*)(currentWritePtr + 2) = 0x81; // bEndpointAddress - *(uint8*)(currentWritePtr + 3) = 0x03; // bmAttributes - *(uint16be*)(currentWritePtr + 4) = - this->m_maxPacketSizeRX; // wMaxPacketSize - *(uint8*)(currentWritePtr + 6) = 0x01; // bInterval - currentWritePtr = currentWritePtr + 7; - // endpoint descriptor 2 - *(uint8*)(currentWritePtr + 0) = 7; // bLength - *(uint8*)(currentWritePtr + 1) = 0x05; // bDescriptorType - *(uint8*)(currentWritePtr + 2) = 0x02; // bEndpointAddress - *(uint8*)(currentWritePtr + 3) = 0x03; // bmAttributes - *(uint16be*)(currentWritePtr + 4) = - this->m_maxPacketSizeTX; // wMaxPacketSize - *(uint8*)(currentWritePtr + 6) = 0x01; // bInterval - currentWritePtr = currentWritePtr + 7; - - cemu_assert_debug((currentWritePtr - configurationDescriptor) == 0x29); - - memcpy(output, configurationDescriptor, - std::min(outputMaxLength, sizeof(configurationDescriptor))); - return true; - } - else - { - cemu_assert_unimplemented(); - } - return false; - } - - bool DeviceWindowsHID::SetProtocol(uint8 ifIndex, uint8 protocol) - { - // ToDo: implement this - // pretend that everything is fine - return true; - } - - bool DeviceWindowsHID::SetReport(ReportMessage* message) - { - sint32 retryCount = 0; - while (true) - { - BOOL r = HidD_SetOutputReport(this->m_hFile, message->reportData, message->length); - if (r != FALSE) - break; - Sleep(20); // retry - retryCount++; - if (retryCount >= 50) - { - cemuLog_log(LogType::Force, "nsyshid::DeviceWindowsHID::SetReport(): HID SetReport failed"); - return false; - } - } - return true; - } - - HANDLE OpenDevice(wchar_t* devicePath) - { - return CreateFile(devicePath, - GENERIC_READ | GENERIC_WRITE, - FILE_SHARE_READ | - FILE_SHARE_WRITE, - NULL, - OPEN_EXISTING, - FILE_FLAG_OVERLAPPED, - NULL); - } - - void _debugPrintHex(std::string prefix, uint8* data, size_t len) - { - char debugOutput[1024] = {0}; - len = std::min(len, (size_t)100); - for (sint32 i = 0; i < len; i++) - { - sprintf(debugOutput + i * 3, "%02x ", data[i]); - } - cemuLog_logDebug(LogType::Force, "[{}] Data: {}", prefix, debugOutput); - } -} // namespace nsyshid::backend::windows - -#endif // NSYSHID_ENABLE_BACKEND_WINDOWS_HID diff --git a/src/Cafe/OS/libs/nsyshid/BackendWindowsHID.h b/src/Cafe/OS/libs/nsyshid/BackendWindowsHID.h deleted file mode 100644 index 9a8a78e9..00000000 --- a/src/Cafe/OS/libs/nsyshid/BackendWindowsHID.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef CEMU_NSYSHID_BACKEND_WINDOWS_HID_H -#define CEMU_NSYSHID_BACKEND_WINDOWS_HID_H - -#include "nsyshid.h" - -#if NSYSHID_ENABLE_BACKEND_WINDOWS_HID - -#include "Backend.h" - -namespace nsyshid::backend::windows -{ - class BackendWindowsHID : public nsyshid::Backend { - public: - BackendWindowsHID(); - - ~BackendWindowsHID(); - - bool IsInitialisedOk() override; - - protected: - void AttachVisibleDevices() override; - - private: - std::shared_ptr CheckAndCreateDevice(wchar_t* devicePath, HANDLE hDevice); - }; - - class DeviceWindowsHID : public nsyshid::Device { - public: - DeviceWindowsHID(uint16 vendorId, - uint16 productId, - uint8 interfaceIndex, - uint8 interfaceSubClass, - uint8 protocol, - wchar_t* devicePath); - - ~DeviceWindowsHID(); - - bool Open() override; - - void Close() override; - - bool IsOpened() override; - - ReadResult Read(ReadMessage* message) override; - - WriteResult Write(WriteMessage* message) override; - - bool GetDescriptor(uint8 descType, uint8 descIndex, uint8 lang, uint8* output, uint32 outputMaxLength) override; - - bool SetProtocol(uint8 ifIndex, uint8 protocol) override; - - bool SetReport(ReportMessage* message) override; - - private: - wchar_t* m_devicePath; - HANDLE m_hFile; - }; - - HANDLE OpenDevice(wchar_t* devicePath); - - void _debugPrintHex(std::string prefix, uint8* data, size_t len); -} // namespace nsyshid::backend::windows - -#endif // NSYSHID_ENABLE_BACKEND_WINDOWS_HID - -#endif // CEMU_NSYSHID_BACKEND_WINDOWS_HID_H diff --git a/src/Cafe/OS/libs/nsyshid/Dimensions.cpp b/src/Cafe/OS/libs/nsyshid/Dimensions.cpp index 8a2acc76..b23560f1 100644 --- a/src/Cafe/OS/libs/nsyshid/Dimensions.cpp +++ b/src/Cafe/OS/libs/nsyshid/Dimensions.cpp @@ -426,7 +426,7 @@ namespace nsyshid bool DimensionsToypadDevice::GetDescriptor(uint8 descType, uint8 descIndex, - uint8 lang, + uint16 lang, uint8* output, uint32 outputMaxLength) { @@ -489,6 +489,13 @@ namespace nsyshid return true; } + bool DimensionsToypadDevice::SetIdle(uint8 ifIndex, + uint8 reportId, + uint8 duration) + { + return true; + } + bool DimensionsToypadDevice::SetProtocol(uint8 ifIndex, uint8 protocol) { cemuLog_log(LogType::Force, "Toypad Protocol"); diff --git a/src/Cafe/OS/libs/nsyshid/Dimensions.h b/src/Cafe/OS/libs/nsyshid/Dimensions.h index d5a2a529..00ceff9e 100644 --- a/src/Cafe/OS/libs/nsyshid/Dimensions.h +++ b/src/Cafe/OS/libs/nsyshid/Dimensions.h @@ -25,10 +25,14 @@ namespace nsyshid bool GetDescriptor(uint8 descType, uint8 descIndex, - uint8 lang, + uint16 lang, uint8* output, uint32 outputMaxLength) override; + bool SetIdle(uint8 ifIndex, + uint8 reportId, + uint8 duration) override; + bool SetProtocol(uint8 ifIndex, uint8 protocol) override; bool SetReport(ReportMessage* message) override; diff --git a/src/Cafe/OS/libs/nsyshid/Infinity.cpp b/src/Cafe/OS/libs/nsyshid/Infinity.cpp index ac793109..94ef817e 100644 --- a/src/Cafe/OS/libs/nsyshid/Infinity.cpp +++ b/src/Cafe/OS/libs/nsyshid/Infinity.cpp @@ -387,7 +387,7 @@ namespace nsyshid bool InfinityBaseDevice::GetDescriptor(uint8 descType, uint8 descIndex, - uint8 lang, + uint16 lang, uint8* output, uint32 outputMaxLength) { @@ -450,6 +450,13 @@ namespace nsyshid return true; } + bool InfinityBaseDevice::SetIdle(uint8 ifIndex, + uint8 reportId, + uint8 duration) + { + return true; + } + bool InfinityBaseDevice::SetProtocol(uint8 ifIndex, uint8 protocol) { return true; @@ -492,7 +499,7 @@ namespace nsyshid return response; } - void InfinityUSB::SendCommand(uint8* buf, sint32 originalLength) + void InfinityUSB::SendCommand(uint8* buf, uint32 length) { const uint8 command = buf[2]; const uint8 sequence = buf[3]; diff --git a/src/Cafe/OS/libs/nsyshid/Infinity.h b/src/Cafe/OS/libs/nsyshid/Infinity.h index aa98fd15..81942abd 100644 --- a/src/Cafe/OS/libs/nsyshid/Infinity.h +++ b/src/Cafe/OS/libs/nsyshid/Infinity.h @@ -26,10 +26,14 @@ namespace nsyshid bool GetDescriptor(uint8 descType, uint8 descIndex, - uint8 lang, + uint16 lang, uint8* output, uint32 outputMaxLength) override; + bool SetIdle(uint8 ifIndex, + uint8 reportId, + uint8 duration) override; + bool SetProtocol(uint8 ifIndex, uint8 protocol) override; bool SetReport(ReportMessage* message) override; @@ -53,7 +57,7 @@ namespace nsyshid void Save(); }; - void SendCommand(uint8* buf, sint32 originalLength); + void SendCommand(uint8* buf, uint32 length); std::array GetStatus(); void GetBlankResponse(uint8 sequence, std::array& replyBuf); diff --git a/src/Cafe/OS/libs/nsyshid/Skylander.cpp b/src/Cafe/OS/libs/nsyshid/Skylander.cpp index 1b4515ef..9fab17b6 100644 --- a/src/Cafe/OS/libs/nsyshid/Skylander.cpp +++ b/src/Cafe/OS/libs/nsyshid/Skylander.cpp @@ -564,7 +564,7 @@ namespace nsyshid bool SkylanderPortalDevice::GetDescriptor(uint8 descType, uint8 descIndex, - uint8 lang, + uint16 lang, uint8* output, uint32 outputMaxLength) { @@ -583,7 +583,7 @@ namespace nsyshid *(uint8*)(currentWritePtr + 7) = 0x80; // bmAttributes *(uint8*)(currentWritePtr + 8) = 0xFA; // MaxPower currentWritePtr = currentWritePtr + 9; - // configuration descriptor + // interface descriptor *(uint8*)(currentWritePtr + 0) = 9; // bLength *(uint8*)(currentWritePtr + 1) = 0x04; // bDescriptorType *(uint8*)(currentWritePtr + 2) = 0; // bInterfaceNumber @@ -594,7 +594,7 @@ namespace nsyshid *(uint8*)(currentWritePtr + 7) = 0; // bInterfaceProtocol *(uint8*)(currentWritePtr + 8) = 0; // iInterface currentWritePtr = currentWritePtr + 9; - // configuration descriptor + // HID descriptor *(uint8*)(currentWritePtr + 0) = 9; // bLength *(uint8*)(currentWritePtr + 1) = 0x21; // bDescriptorType *(uint16be*)(currentWritePtr + 2) = 0x0111; // bcdHID @@ -608,7 +608,7 @@ namespace nsyshid *(uint8*)(currentWritePtr + 1) = 0x05; // bDescriptorType *(uint8*)(currentWritePtr + 2) = 0x81; // bEndpointAddress *(uint8*)(currentWritePtr + 3) = 0x03; // bmAttributes - *(uint16be*)(currentWritePtr + 4) = 0x40; // wMaxPacketSize + *(uint16be*)(currentWritePtr + 4) = 0x0040; // wMaxPacketSize *(uint8*)(currentWritePtr + 6) = 0x01; // bInterval currentWritePtr = currentWritePtr + 7; // endpoint descriptor 2 @@ -616,7 +616,7 @@ namespace nsyshid *(uint8*)(currentWritePtr + 1) = 0x05; // bDescriptorType *(uint8*)(currentWritePtr + 2) = 0x02; // bEndpointAddress *(uint8*)(currentWritePtr + 3) = 0x03; // bmAttributes - *(uint16be*)(currentWritePtr + 4) = 0x40; // wMaxPacketSize + *(uint16be*)(currentWritePtr + 4) = 0x0040; // wMaxPacketSize *(uint8*)(currentWritePtr + 6) = 0x01; // bInterval currentWritePtr = currentWritePtr + 7; @@ -627,6 +627,13 @@ namespace nsyshid return true; } + bool SkylanderPortalDevice::SetIdle(uint8 ifIndex, + uint8 reportId, + uint8 duration) + { + return true; + } + bool SkylanderPortalDevice::SetProtocol(uint8 ifIndex, uint8 protocol) { return true; @@ -634,12 +641,12 @@ namespace nsyshid bool SkylanderPortalDevice::SetReport(ReportMessage* message) { - g_skyportal.ControlTransfer(message->originalData, message->originalLength); + g_skyportal.ControlTransfer(message->data, message->length); std::this_thread::sleep_for(std::chrono::milliseconds(1)); return true; } - void SkylanderUSB::ControlTransfer(uint8* buf, sint32 originalLength) + void SkylanderUSB::ControlTransfer(uint8* buf, uint32 length) { std::array interruptResponse = {}; switch (buf[0]) diff --git a/src/Cafe/OS/libs/nsyshid/Skylander.h b/src/Cafe/OS/libs/nsyshid/Skylander.h index 986ef185..9b9580b0 100644 --- a/src/Cafe/OS/libs/nsyshid/Skylander.h +++ b/src/Cafe/OS/libs/nsyshid/Skylander.h @@ -26,10 +26,14 @@ namespace nsyshid bool GetDescriptor(uint8 descType, uint8 descIndex, - uint8 lang, + uint16 lang, uint8* output, uint32 outputMaxLength) override; + bool SetIdle(uint8 ifIndex, + uint8 reportId, + uint8 duration) override; + bool SetProtocol(uint8 ifIndex, uint8 protocol) override; bool SetReport(ReportMessage* message) override; @@ -70,7 +74,7 @@ namespace nsyshid uint8 blue = 0; }; - void ControlTransfer(uint8* buf, sint32 originalLength); + void ControlTransfer(uint8* buf, uint32 length); void Activate(); void Deactivate(); diff --git a/src/Cafe/OS/libs/nsyshid/nsyshid.cpp b/src/Cafe/OS/libs/nsyshid/nsyshid.cpp index 99a736d9..2fe6da07 100644 --- a/src/Cafe/OS/libs/nsyshid/nsyshid.cpp +++ b/src/Cafe/OS/libs/nsyshid/nsyshid.cpp @@ -305,47 +305,37 @@ namespace nsyshid osLib_returnFromFunction(hCPU, 0); } - void export_HIDGetDescriptor(PPCInterpreter_t* hCPU) + void _debugPrintHex(const std::string prefix, const uint8* data, size_t size) { - ppcDefineParamU32(hidHandle, 0); // r3 - ppcDefineParamU8(descType, 1); // r4 - ppcDefineParamU8(descIndex, 2); // r5 - ppcDefineParamU8(lang, 3); // r6 - ppcDefineParamUStr(output, 4); // r7 - ppcDefineParamU32(outputMaxLength, 5); // r8 - ppcDefineParamMPTR(cbFuncMPTR, 6); // r9 - ppcDefineParamMPTR(cbParamMPTR, 7); // r10 + constexpr size_t BYTES_PER_LINE = 16; - int returnValue = -1; - std::shared_ptr device = GetDeviceByHandle(hidHandle, true); - if (device) + std::string out; + for (size_t row_start = 0; row_start < size; row_start += BYTES_PER_LINE) { - memset(output, 0, outputMaxLength); - if (device->GetDescriptor(descType, descIndex, lang, output, outputMaxLength)) + out += fmt::format("{:06x}: ", row_start); + for (size_t i = 0; i < BYTES_PER_LINE; ++i) { - returnValue = 0; + if (row_start + i < size) + { + out += fmt::format("{:02x} ", data[row_start + i]); + } + else + { + out += " "; + } } - else + out += " "; + for (size_t i = 0; i < BYTES_PER_LINE; ++i) { - returnValue = -1; + if (row_start + i < size) + { + char c = static_cast(data[row_start + i]); + out += std::isprint(c, std::locale::classic()) ? c : '.'; + } } + out += "\n"; } - else - { - cemu_assert_suspicious(); - } - osLib_returnFromFunction(hCPU, returnValue); - } - - void _debugPrintHex(std::string prefix, uint8* data, size_t len) - { - char debugOutput[1024] = {0}; - len = std::min(len, (size_t)100); - for (sint32 i = 0; i < len; i++) - { - sprintf(debugOutput + i * 3, "%02x ", data[i]); - } - cemuLog_logDebug(LogType::Force, "[{}] Data: {}", prefix, debugOutput); + cemuLog_logDebug(LogType::Force, "[{}] Data: \n{}", prefix, out); } void DoHIDTransferCallback(MPTR callbackFuncMPTR, MPTR callbackParamMPTR, uint32 hidHandle, uint32 errorCode, @@ -354,26 +344,152 @@ namespace nsyshid coreinitAsyncCallback_add(callbackFuncMPTR, 5, hidHandle, errorCode, buffer, length, callbackParamMPTR); } - void export_HIDSetIdle(PPCInterpreter_t* hCPU) + void _hidGetDescriptorAsync(std::shared_ptr device, uint8 descType, uint8 descIndex, uint16 lang, uint8* output, uint32 outputMaxLength, MPTR callbackFuncMPTR, MPTR callbackParamMPTR) { - ppcDefineParamU32(hidHandle, 0); // r3 - ppcDefineParamU32(ifIndex, 1); // r4 - ppcDefineParamU32(ukn, 2); // r5 - ppcDefineParamU32(duration, 3); // r6 - ppcDefineParamMPTR(callbackFuncMPTR, 4); // r7 - ppcDefineParamMPTR(callbackParamMPTR, 5); // r8 - cemuLog_logDebug(LogType::Force, "nsyshid.HIDSetIdle(...)"); - - // todo - if (callbackFuncMPTR) + if (device->GetDescriptor(descType, descIndex, lang, output, outputMaxLength)) { - DoHIDTransferCallback(callbackFuncMPTR, callbackParamMPTR, hidHandle, 0, MPTR_NULL, 0); + DoHIDTransferCallback(callbackFuncMPTR, + callbackParamMPTR, + device->m_hid->handle, + 0, + 0, + 0); } else { - cemu_assert_unimplemented(); + DoHIDTransferCallback(callbackFuncMPTR, + callbackParamMPTR, + device->m_hid->handle, + -1, + 0, + 0); + } + } + + void export_HIDGetDescriptor(PPCInterpreter_t* hCPU) + { + ppcDefineParamU32(hidHandle, 0); // r3 + ppcDefineParamU8(descType, 1); // r4 + ppcDefineParamU8(descIndex, 2); // r5 + ppcDefineParamU16(lang, 3); // r6 + ppcDefineParamUStr(output, 4); // r7 + ppcDefineParamU32(outputMaxLength, 5); // r8 + ppcDefineParamMPTR(cbFuncMPTR, 6); // r9 + ppcDefineParamMPTR(cbParamMPTR, 7); // r10 + cemuLog_logDebug(LogType::Force, "nsyshid.HIDGetDescriptor(0x{:08x}, 0x{:02x}, 0x{:02x}, 0x{:04x}, 0x{:x}, 0x{:08x}, 0x{:08x}, 0x{:08x})", + hCPU->gpr[3], hCPU->gpr[4], hCPU->gpr[5], hCPU->gpr[6], hCPU->gpr[7], hCPU->gpr[8], hCPU->gpr[9], hCPU->gpr[10]); + + std::shared_ptr device = GetDeviceByHandle(hidHandle, true); + if (device == nullptr) + { + cemuLog_log(LogType::Force, "nsyshid.HIDGetDescriptor(): Unable to find device with hid handle {}", hidHandle); + osLib_returnFromFunction(hCPU, -1); + return; + } + + // issue request (synchronous or asynchronous) + sint32 returnCode = 0; + if (cbFuncMPTR == MPTR_NULL) + { + // synchronous + returnCode = -1; + if (device->GetDescriptor(descType, descIndex, lang, output, outputMaxLength)) + { + returnCode = outputMaxLength; + } + } + else + { + // asynchronous + std::thread(&_hidGetDescriptorAsync, device, descType, descIndex, lang, output, outputMaxLength, cbFuncMPTR, cbParamMPTR) + .detach(); + returnCode = 0; + } + osLib_returnFromFunction(hCPU, returnCode); + } + + void _hidSetIdleAsync(std::shared_ptr device, uint8 ifIndex, uint8 reportId, uint8 duration, MPTR callbackFuncMPTR, MPTR callbackParamMPTR) + { + if (device->SetIdle(ifIndex, reportId, duration)) + { + DoHIDTransferCallback(callbackFuncMPTR, + callbackParamMPTR, + device->m_hid->handle, + 0, + 0, + 0); + } + else + { + DoHIDTransferCallback(callbackFuncMPTR, + callbackParamMPTR, + device->m_hid->handle, + -1, + 0, + 0); + } + } + + void export_HIDSetIdle(PPCInterpreter_t* hCPU) + { + ppcDefineParamU32(hidHandle, 0); // r3 + ppcDefineParamU8(ifIndex, 1); // r4 + ppcDefineParamU8(reportId, 2); // r5 + ppcDefineParamU8(duration, 3); // r6 + ppcDefineParamMPTR(callbackFuncMPTR, 4); // r7 + ppcDefineParamMPTR(callbackParamMPTR, 5); // r8 + cemuLog_logDebug(LogType::Force, "nsyshid.HIDSetIdle(0x{:08x}, 0x{:02x}, 0x{:02x}, 0x{:02x}, 0x{:08x}, 0x{:08x})", hCPU->gpr[3], + hCPU->gpr[4], hCPU->gpr[5], hCPU->gpr[6], hCPU->gpr[7], hCPU->gpr[8]); + + std::shared_ptr device = GetDeviceByHandle(hidHandle, true); + if (device == nullptr) + { + cemuLog_log(LogType::Force, "nsyshid.HIDSetIdle(): Unable to find device with hid handle {}", hidHandle); + osLib_returnFromFunction(hCPU, -1); + return; + } + + // issue request (synchronous or asynchronous) + sint32 returnCode = 0; + if (callbackFuncMPTR == MPTR_NULL) + { + // synchronous + returnCode = -1; + if (device->SetIdle(ifIndex, reportId, duration)) + { + returnCode = 0; + } + } + else + { + // asynchronous + std::thread(&_hidSetIdleAsync, device, ifIndex, reportId, duration, callbackFuncMPTR, callbackParamMPTR) + .detach(); + returnCode = 0; + } + osLib_returnFromFunction(hCPU, returnCode); + } + + void _hidSetProtocolAsync(std::shared_ptr device, uint8 ifIndex, uint8 protocol, MPTR callbackFuncMPTR, MPTR callbackParamMPTR) + { + if (device->SetProtocol(ifIndex, protocol)) + { + DoHIDTransferCallback(callbackFuncMPTR, + callbackParamMPTR, + device->m_hid->handle, + 0, + 0, + 0); + } + else + { + DoHIDTransferCallback(callbackFuncMPTR, + callbackParamMPTR, + device->m_hid->handle, + -1, + 0, + 0); } - osLib_returnFromFunction(hCPU, 0); // for non-async version, return number of bytes transferred } void export_HIDSetProtocol(PPCInterpreter_t* hCPU) @@ -383,51 +499,51 @@ namespace nsyshid ppcDefineParamU8(protocol, 2); // r5 ppcDefineParamMPTR(callbackFuncMPTR, 3); // r6 ppcDefineParamMPTR(callbackParamMPTR, 4); // r7 - cemuLog_logDebug(LogType::Force, "nsyshid.HIDSetProtocol(...)"); + cemuLog_logDebug(LogType::Force, "nsyshid.HIDSetProtocol(0x{:08x}, 0x{:02x}, 0x{:02x}, 0x{:08x}, 0x{:08x})", hCPU->gpr[3], + hCPU->gpr[4], hCPU->gpr[5], hCPU->gpr[6], hCPU->gpr[7]); std::shared_ptr device = GetDeviceByHandle(hidHandle, true); - sint32 returnCode = -1; - if (device) + if (device == nullptr) { - if (!device->IsOpened()) + cemuLog_log(LogType::Force, "nsyshid.HIDSetProtocol(): Unable to find device with hid handle {}", hidHandle); + osLib_returnFromFunction(hCPU, -1); + return; + } + // issue request (synchronous or asynchronous) + sint32 returnCode = 0; + if (callbackFuncMPTR == MPTR_NULL) + { + // synchronous + returnCode = -1; + if (device->SetProtocol(ifIndex, protocol)) { - cemuLog_logDebug(LogType::Force, "nsyshid.HIDSetProtocol(): error: device is not opened"); - } - else - { - if (device->SetProtocol(ifIndex, protocol)) - { - returnCode = 0; - } + returnCode = 0; } } else { - cemu_assert_suspicious(); - } - - if (callbackFuncMPTR) - { - DoHIDTransferCallback(callbackFuncMPTR, callbackParamMPTR, hidHandle, 0, MPTR_NULL, 0); + // asynchronous + std::thread(&_hidSetProtocolAsync, device, ifIndex, protocol, callbackFuncMPTR, callbackParamMPTR) + .detach(); + returnCode = 0; } osLib_returnFromFunction(hCPU, returnCode); } // handler for async HIDSetReport transfers - void _hidSetReportAsync(std::shared_ptr device, uint8* reportData, sint32 length, - uint8* originalData, - sint32 originalLength, MPTR callbackFuncMPTR, MPTR callbackParamMPTR) + void _hidSetReportAsync(std::shared_ptr device, uint8 reportType, uint8 reportId, uint8* data, uint32 length, + MPTR callbackFuncMPTR, MPTR callbackParamMPTR) { cemuLog_logDebug(LogType::Force, "_hidSetReportAsync begin"); - ReportMessage message(reportData, length, originalData, originalLength); + ReportMessage message(reportType, reportId, data, length); if (device->SetReport(&message)) { DoHIDTransferCallback(callbackFuncMPTR, callbackParamMPTR, device->m_hid->handle, 0, - memory_getVirtualOffsetFromPointer(originalData), - originalLength); + memory_getVirtualOffsetFromPointer(data), + length); } else { @@ -435,24 +551,22 @@ namespace nsyshid callbackParamMPTR, device->m_hid->handle, -1, - memory_getVirtualOffsetFromPointer(originalData), - 0); + memory_getVirtualOffsetFromPointer(data), + length); } - free(reportData); } // handler for synchronous HIDSetReport transfers - sint32 _hidSetReportSync(std::shared_ptr device, uint8* reportData, sint32 length, - uint8* originalData, sint32 originalLength, coreinit::OSEvent* event) + sint32 _hidSetReportSync(std::shared_ptr device, uint8 reportType, uint8 reportId, + uint8* data, uint32 length, coreinit::OSEvent* event) { - _debugPrintHex("_hidSetReportSync Begin", reportData, length); + _debugPrintHex("_hidSetReportSync Begin", data, length); sint32 returnCode = 0; - ReportMessage message(reportData, length, originalData, originalLength); + ReportMessage message(reportType, reportId, data, length); if (device->SetReport(&message)) { - returnCode = originalLength; + returnCode = length; } - free(reportData); cemuLog_logDebug(LogType::Force, "_hidSetReportSync end. returnCode: {}", returnCode); coreinit::OSSignalEvent(event); return returnCode; @@ -461,19 +575,19 @@ namespace nsyshid void export_HIDSetReport(PPCInterpreter_t* hCPU) { ppcDefineParamU32(hidHandle, 0); // r3 - ppcDefineParamU32(reportRelatedUkn, 1); // r4 - ppcDefineParamU32(reportId, 2); // r5 + ppcDefineParamU8(reportType, 1); // r4 + ppcDefineParamU8(reportId, 2); // r5 ppcDefineParamUStr(data, 3); // r6 ppcDefineParamU32(dataLength, 4); // r7 ppcDefineParamMPTR(callbackFuncMPTR, 5); // r8 ppcDefineParamMPTR(callbackParamMPTR, 6); // r9 - cemuLog_logDebug(LogType::Force, "nsyshid.HIDSetReport({},0x{:02x},0x{:02x},...)", hidHandle, reportRelatedUkn, - reportId); + cemuLog_logDebug(LogType::Force, "nsyshid.HIDSetReport(0x{:08x}, 0x{:02x}, 0x{:02x}, 0x{:08x}, 0x{:08x}, 0x{:08x}, 0x{:08x})", hCPU->gpr[3], + hCPU->gpr[4], hCPU->gpr[5], hCPU->gpr[6], hCPU->gpr[7], hCPU->gpr[8], hCPU->gpr[9]); _debugPrintHex("HIDSetReport", data, dataLength); #ifdef CEMU_DEBUG_ASSERT - if (reportRelatedUkn != 2 || reportId != 0) + if (reportType != 2 || reportId != 0) assert_dbg(); #endif @@ -485,15 +599,6 @@ namespace nsyshid return; } - // prepare report data - // note: Currently we need to pad the data to 0x20 bytes for it to work (plus one extra byte for HidD_SetOutputReport) - // Does IOSU pad data to 0x20 byte? Also check if this is specific to Skylanders portal - sint32 paddedLength = (dataLength + 0x1F) & ~0x1F; - uint8* reportData = (uint8*)malloc(paddedLength + 1); - memset(reportData, 0, paddedLength + 1); - reportData[0] = 0; - memcpy(reportData + 1, data, dataLength); - // issue request (synchronous or asynchronous) sint32 returnCode = 0; if (callbackFuncMPTR == MPTR_NULL) @@ -501,15 +606,14 @@ namespace nsyshid // synchronous StackAllocator event; coreinit::OSInitEvent(&event, coreinit::OSEvent::EVENT_STATE::STATE_NOT_SIGNALED, coreinit::OSEvent::EVENT_MODE::MODE_AUTO); - std::future res = std::async(std::launch::async, &_hidSetReportSync, device, reportData, - paddedLength + 1, data, dataLength, &event); + std::future res = std::async(std::launch::async, &_hidSetReportSync, device, reportType, reportId, data, dataLength, &event); coreinit::OSWaitEvent(&event); returnCode = res.get(); } else { // asynchronous - std::thread(&_hidSetReportAsync, device, reportData, paddedLength + 1, data, dataLength, + std::thread(&_hidSetReportAsync, device, reportType, reportId, data, dataLength, callbackFuncMPTR, callbackParamMPTR) .detach(); returnCode = 0; @@ -586,7 +690,7 @@ namespace nsyshid ppcDefineParamMPTR(callbackFuncMPTR, 3); // r6 ppcDefineParamMPTR(callbackParamMPTR, 4); // r7 cemuLog_logDebug(LogType::Force, "nsyshid.HIDRead(0x{:x},0x{:08x},0x{:08x},0x{:08x},0x{:08x})", hCPU->gpr[3], - hCPU->gpr[4], hCPU->gpr[5], hCPU->gpr[6], hCPU->gpr[7]); + hCPU->gpr[4], hCPU->gpr[5], hCPU->gpr[6], hCPU->gpr[7]); std::shared_ptr device = GetDeviceByHandle(hidHandle, true); if (device == nullptr) @@ -683,7 +787,7 @@ namespace nsyshid ppcDefineParamMPTR(callbackFuncMPTR, 3); // r6 ppcDefineParamMPTR(callbackParamMPTR, 4); // r7 cemuLog_logDebug(LogType::Force, "nsyshid.HIDWrite(0x{:x},0x{:08x},0x{:08x},0x{:08x},0x{:08x})", hCPU->gpr[3], - hCPU->gpr[4], hCPU->gpr[5], hCPU->gpr[6], hCPU->gpr[7]); + hCPU->gpr[4], hCPU->gpr[5], hCPU->gpr[6], hCPU->gpr[7]); std::shared_ptr device = GetDeviceByHandle(hidHandle, true); if (device == nullptr) @@ -718,7 +822,7 @@ namespace nsyshid ppcDefineParamTypePtr(ukn0, uint32be, 1); ppcDefineParamTypePtr(ukn1, uint32be, 2); cemuLog_logDebug(LogType::Force, "nsyshid.HIDDecodeError(0x{:08x},0x{:08x},0x{:08x})", hCPU->gpr[3], - hCPU->gpr[4], hCPU->gpr[5]); + hCPU->gpr[4], hCPU->gpr[5]); // todo *ukn0 = 0x3FF; diff --git a/src/Common/precompiled.h b/src/Common/precompiled.h index d4df4343..bda75cef 100644 --- a/src/Common/precompiled.h +++ b/src/Common/precompiled.h @@ -274,6 +274,25 @@ inline uint64 _udiv128(uint64 highDividend, uint64 lowDividend, uint64 divisor, #define NOEXPORT __attribute__ ((visibility ("hidden"))) #endif +#if defined(_MSC_VER) +#define FORCE_INLINE __forceinline +#elif defined(__GNUC__) || defined(__clang__) +#define FORCE_INLINE inline __attribute__((always_inline)) +#else +#define FORCE_INLINE inline +#endif + +FORCE_INLINE int BSF(uint32 v) // returns index of first bit set, counting from LSB. If v is 0 then result is undefined +{ +#if defined(_MSC_VER) + return _tzcnt_u32(v); // TZCNT requires BMI1. But if not supported it will execute as BSF +#elif defined(__GNUC__) || defined(__clang__) + return __builtin_ctz(v); +#else + return std::countr_zero(v); +#endif +} + // On aarch64 we handle some of the x86 intrinsics by implementing them as wrappers #if defined(__aarch64__) @@ -597,4 +616,4 @@ namespace stdx scope_exit& operator=(scope_exit) = delete; void release() { m_released = true;} }; -} \ No newline at end of file +} diff --git a/src/util/ChunkedHeap/ChunkedHeap.h b/src/util/ChunkedHeap/ChunkedHeap.h index abc45429..21a1b868 100644 --- a/src/util/ChunkedHeap/ChunkedHeap.h +++ b/src/util/ChunkedHeap/ChunkedHeap.h @@ -1,35 +1,39 @@ #pragma once +#include + struct CHAddr { uint32 offset; uint32 chunkIndex; + void* internal; // AllocRange - CHAddr(uint32 _offset, uint32 _chunkIndex) : offset(_offset), chunkIndex(_chunkIndex) {}; + CHAddr(uint32 _offset, uint32 _chunkIndex, void* internal = nullptr) : offset(_offset), chunkIndex(_chunkIndex), internal(internal) {}; CHAddr() : offset(0xFFFFFFFF), chunkIndex(0xFFFFFFFF) {}; bool isValid() { return chunkIndex != 0xFFFFFFFF; }; static CHAddr getInvalid() { return CHAddr(0xFFFFFFFF, 0xFFFFFFFF); }; }; +template class ChunkedHeap { - struct allocRange_t + struct AllocRange { - allocRange_t* nextFree{}; - allocRange_t* prevFree{}; - allocRange_t* prevOrdered{}; - allocRange_t* nextOrdered{}; + AllocRange* nextFree{}; + AllocRange* prevFree{}; + AllocRange* prevOrdered{}; + AllocRange* nextOrdered{}; uint32 offset; uint32 chunkIndex; uint32 size; bool isFree; - allocRange_t(uint32 _offset, uint32 _chunkIndex, uint32 _size, bool _isFree) : offset(_offset), chunkIndex(_chunkIndex), size(_size), isFree(_isFree), nextFree(nullptr) {}; + AllocRange(uint32 _offset, uint32 _chunkIndex, uint32 _size, bool _isFree) : offset(_offset), chunkIndex(_chunkIndex), size(_size), isFree(_isFree), nextFree(nullptr) {}; }; - struct chunk_t + struct Chunk { - std::unordered_map map_allocatedRange; + uint32 size; }; public: @@ -47,45 +51,32 @@ public: _free(addr); } - virtual uint32 allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize) - { - return 0; - } + virtual uint32 allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize) = 0; private: unsigned ulog2(uint32 v) { - static const unsigned MUL_DE_BRUIJN_BIT[] = - { - 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, - 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 - }; - - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - - return MUL_DE_BRUIJN_BIT[(v * 0x07C4ACDDu) >> 27]; + cemu_assert_debug(v != 0); + return 31 - std::countl_zero(v); } - void trackFreeRange(allocRange_t* range) + void trackFreeRange(AllocRange* range) { // get index of msb cemu_assert_debug(range->size != 0); // size of zero is not allowed uint32 bucketIndex = ulog2(range->size); - range->nextFree = bucketFreeRange[bucketIndex]; - if (bucketFreeRange[bucketIndex]) - bucketFreeRange[bucketIndex]->prevFree = range; + range->nextFree = m_bucketFreeRange[bucketIndex]; + if (m_bucketFreeRange[bucketIndex]) + m_bucketFreeRange[bucketIndex]->prevFree = range; range->prevFree = nullptr; - bucketFreeRange[bucketIndex] = range; + m_bucketFreeRange[bucketIndex] = range; + m_bucketUseMask |= (1u << bucketIndex); } - void forgetFreeRange(allocRange_t* range, uint32 bucketIndex) + void forgetFreeRange(AllocRange* range, uint32 bucketIndex) { - allocRange_t* prevRange = range->prevFree; - allocRange_t* nextRange = range->nextFree; + AllocRange* prevRange = range->prevFree; + AllocRange* nextRange = range->nextFree; if (prevRange) { prevRange->nextFree = nextRange; @@ -94,36 +85,42 @@ private: } else { - if (bucketFreeRange[bucketIndex] != range) - assert_dbg(); - bucketFreeRange[bucketIndex] = nextRange; + cemu_assert_debug(m_bucketFreeRange[bucketIndex] == range); + m_bucketFreeRange[bucketIndex] = nextRange; if (nextRange) nextRange->prevFree = nullptr; + else + m_bucketUseMask &= ~(1u << bucketIndex); } } bool allocateChunk(uint32 minimumAllocationSize) { - uint32 chunkIndex = (uint32)list_chunks.size(); - list_chunks.emplace_back(new chunk_t()); + uint32 chunkIndex = (uint32)m_chunks.size(); + m_chunks.emplace_back(); uint32 chunkSize = allocateNewChunk(chunkIndex, minimumAllocationSize); + cemu_assert_debug((chunkSize%TMinimumAlignment) == 0); // chunk size should be a multiple of the minimum alignment if (chunkSize == 0) return false; - allocRange_t* range = new allocRange_t(0, chunkIndex, chunkSize, true); + cemu_assert_debug(chunkSize < 0x80000000u); // chunk size must be below 2GB + AllocRange* range = m_allocEntriesPool.allocObj(0, chunkIndex, chunkSize, true); trackFreeRange(range); - numHeapBytes += chunkSize; + m_numHeapBytes += chunkSize; return true; } - void _allocFrom(allocRange_t* range, uint32 bucketIndex, uint32 allocOffset, uint32 allocSize) + void _allocFrom(AllocRange* range, uint32 bucketIndex, uint32 allocOffset, uint32 allocSize) { + cemu_assert_debug(allocSize > 0); // remove the range from the chain of free ranges forgetFreeRange(range, bucketIndex); // split head, allocation and tail into separate ranges - if (allocOffset > range->offset) + uint32 headBytes = allocOffset - range->offset; + if (headBytes > 0) { // alignment padding -> create free range - allocRange_t* head = new allocRange_t(range->offset, range->chunkIndex, allocOffset - range->offset, true); + cemu_assert_debug(headBytes >= TMinimumAlignment); + AllocRange* head = m_allocEntriesPool.allocObj(range->offset, range->chunkIndex, headBytes, true); trackFreeRange(head); if (range->prevOrdered) range->prevOrdered->nextOrdered = head; @@ -131,10 +128,12 @@ private: head->nextOrdered = range; range->prevOrdered = head; } - if ((allocOffset + allocSize) < (range->offset + range->size)) // todo - create only if it's more than a couple of bytes? + uint32 tailBytes = (range->offset + range->size) - (allocOffset + allocSize); + if (tailBytes > 0) { // tail -> create free range - allocRange_t* tail = new allocRange_t((allocOffset + allocSize), range->chunkIndex, (range->offset + range->size) - (allocOffset + allocSize), true); + cemu_assert_debug(tailBytes >= TMinimumAlignment); + AllocRange* tail = m_allocEntriesPool.allocObj((allocOffset + allocSize), range->chunkIndex, tailBytes, true); trackFreeRange(tail); if (range->nextOrdered) range->nextOrdered->prevOrdered = tail; @@ -149,36 +148,51 @@ private: CHAddr _alloc(uint32 size, uint32 alignment) { + cemu_assert_debug(size <= (0x7FFFFFFFu-TMinimumAlignment)); + // make sure size is not zero and align it + if(size == 0) [[unlikely]] + size = TMinimumAlignment; + else + size = (size + (TMinimumAlignment - 1)) & ~(TMinimumAlignment - 1); // find smallest bucket to scan uint32 alignmentM1 = alignment - 1; uint32 bucketIndex = ulog2(size); - while (bucketIndex < 32) + // check if the bucket is available + if( !(m_bucketUseMask & (1u << bucketIndex)) ) { - allocRange_t* range = bucketFreeRange[bucketIndex]; + // skip to next non-empty bucket + uint32 nextIndex = BSF(m_bucketUseMask>>bucketIndex); + bucketIndex += nextIndex; + } + while (bucketIndex < 31) + { + AllocRange* range = m_bucketFreeRange[bucketIndex]; while (range) { if (range->size >= size) { // verify if aligned allocation fits uint32 alignedOffset = (range->offset + alignmentM1) & ~alignmentM1; - uint32 alignmentLoss = alignedOffset - range->offset; - if (alignmentLoss < range->size && (range->size - alignmentLoss) >= size) + uint32 endOffset = alignedOffset + size; + if((range->offset+range->size) >= endOffset) { _allocFrom(range, bucketIndex, alignedOffset, size); - list_chunks[range->chunkIndex]->map_allocatedRange.emplace(alignedOffset, range); - numAllocatedBytes += size; - return CHAddr(alignedOffset, range->chunkIndex); + m_numAllocatedBytes += size; + return CHAddr(alignedOffset, range->chunkIndex, range); } } range = range->nextFree; } - bucketIndex++; // try higher bucket + // check next non-empty bucket or skip to end + bucketIndex++; + uint32 emptyBuckets = BSF(m_bucketUseMask>>bucketIndex); + bucketIndex += emptyBuckets; } - if(allocationLimitReached) + if(m_allocationLimitReached) return CHAddr(0xFFFFFFFF, 0xFFFFFFFF); if (!allocateChunk(size)) { - allocationLimitReached = true; + m_allocationLimitReached = true; return CHAddr(0xFFFFFFFF, 0xFFFFFFFF); } return _alloc(size, alignment); @@ -186,24 +200,16 @@ private: void _free(CHAddr addr) { - auto it = list_chunks[addr.chunkIndex]->map_allocatedRange.find(addr.offset); - if (it == list_chunks[addr.chunkIndex]->map_allocatedRange.end()) + if(!addr.internal) { cemuLog_log(LogType::Force, "Internal heap error. {:08x} {:08x}", addr.chunkIndex, addr.offset); - cemuLog_log(LogType::Force, "Debug info:"); - for (auto& rangeItr : list_chunks[addr.chunkIndex]->map_allocatedRange) - { - cemuLog_log(LogType::Force, "{:08x} {:08x}", rangeItr.second->offset, rangeItr.second->size); - } return; } - - allocRange_t* range = it->second; - numAllocatedBytes -= it->second->size; - list_chunks[range->chunkIndex]->map_allocatedRange.erase(it); + AllocRange* range = (AllocRange*)addr.internal; + m_numAllocatedBytes -= range->size; // try merge left or right - allocRange_t* prevRange = range->prevOrdered; - allocRange_t* nextRange = range->nextOrdered; + AllocRange* prevRange = range->prevOrdered; + AllocRange* nextRange = range->nextOrdered; if (prevRange && prevRange->isFree) { if (nextRange && nextRange->isFree) @@ -216,8 +222,8 @@ private: forgetFreeRange(prevRange, ulog2(prevRange->size)); prevRange->size = newSize; trackFreeRange(prevRange); - delete range; - delete nextRange; + m_allocEntriesPool.freeObj(range); + m_allocEntriesPool.freeObj(nextRange); } else { @@ -228,7 +234,7 @@ private: forgetFreeRange(prevRange, ulog2(prevRange->size)); prevRange->size = newSize; trackFreeRange(prevRange); - delete range; + m_allocEntriesPool.freeObj(range); } } else if (nextRange && nextRange->isFree) @@ -242,7 +248,7 @@ private: range->prevOrdered->nextOrdered = nextRange; nextRange->prevOrdered = range->prevOrdered; trackFreeRange(nextRange); - delete range; + m_allocEntriesPool.freeObj(range); } else { @@ -265,7 +271,7 @@ private: for (uint32 i = 0; i < 32; i++) { - allocRange_t* ar = bucketFreeRange[i]; + AllocRange* ar = m_bucketFreeRange[i]; while (ar) { availableRange_t dbgRange; @@ -278,7 +284,7 @@ private: if (itr.chunkIndex != dbgRange.chunkIndex) continue; if (itr.offset < (dbgRange.offset + dbgRange.size) && (itr.offset + itr.size) >(dbgRange.offset)) - assert_dbg(); + cemu_assert_error(); } availRanges.emplace_back(dbgRange); @@ -290,14 +296,16 @@ private: } private: - std::vector list_chunks; - allocRange_t* bucketFreeRange[32]{}; - bool allocationLimitReached = false; + std::vector m_chunks; + uint32 m_bucketUseMask{0x80000000}; // bitmask indicating non-empty buckets. MSB always set to provide an upper bound for BSF instruction + AllocRange* m_bucketFreeRange[32]{}; // we are only using 31 entries since the MSB is reserved (thus chunks equal or larger than 2^31 are not allowed) + bool m_allocationLimitReached = false; + MemoryPool m_allocEntriesPool{64}; public: // statistics - uint32 numHeapBytes{}; // total size of the heap - uint32 numAllocatedBytes{}; + uint32 m_numHeapBytes{}; // total size of the heap + uint32 m_numAllocatedBytes{}; }; class VGenericHeap @@ -633,7 +641,7 @@ public: uint32 getCurrentBlockOffset() const { return m_currentBlockOffset; } uint8* getCurrentBlockPtr() const { return m_currentBlockPtr; } - + private: void allocateAdditionalChunk() {