mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-03 13:31:18 +12:00
General aarch64 improvements & Apple Silicon support (#1255)
This commit is contained in:
parent
c8ffff8f41
commit
00ff5549d9
18 changed files with 405 additions and 32 deletions
|
@ -101,13 +101,21 @@ if (MACOS_BUNDLE)
|
|||
endforeach(folder)
|
||||
|
||||
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/x64-osx/debug/lib/libusb-1.0.0.dylib")
|
||||
set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/debug/lib/libusb-1.0.0.dylib")
|
||||
else()
|
||||
set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/x64-osx/lib/libusb-1.0.0.dylib")
|
||||
set(LIBUSB_PATH "${CMAKE_BINARY_DIR}/vcpkg_installed/${VCPKG_TARGET_TRIPLET}/lib/libusb-1.0.0.dylib")
|
||||
endif()
|
||||
|
||||
if (EXISTS "/usr/local/lib/libMoltenVK.dylib")
|
||||
set(MOLTENVK_PATH "/usr/local/lib/libMoltenVK.dylib")
|
||||
elseif (EXISTS "/opt/homebrew/lib/libMoltenVK.dylib")
|
||||
set(MOLTENVK_PATH "/opt/homebrew/lib/libMoltenVK.dylib")
|
||||
else()
|
||||
message(FATAL_ERROR "failed to find libMoltenVK.dylib")
|
||||
endif ()
|
||||
|
||||
add_custom_command (TARGET CemuBin POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} ARGS -E copy "/usr/local/lib/libMoltenVK.dylib" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libMoltenVK.dylib"
|
||||
COMMAND ${CMAKE_COMMAND} ARGS -E copy "${MOLTENVK_PATH}" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libMoltenVK.dylib"
|
||||
COMMAND ${CMAKE_COMMAND} ARGS -E copy "${LIBUSB_PATH}" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/Frameworks/libusb-1.0.0.dylib"
|
||||
COMMAND ${CMAKE_COMMAND} ARGS -E copy "${CMAKE_SOURCE_DIR}/src/resource/update.sh" "${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/update.sh"
|
||||
COMMAND bash -c "install_name_tool -add_rpath @executable_path/../Frameworks ${CMAKE_SOURCE_DIR}/bin/${OUTPUT_NAME}.app/Contents/MacOS/${OUTPUT_NAME}"
|
||||
|
|
|
@ -537,7 +537,7 @@ if(APPLE)
|
|||
target_sources(CemuCafe PRIVATE "HW/Latte/Renderer/Vulkan/CocoaSurface.mm")
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(AARCH64)")
|
||||
if(CEMU_ARCHITECTURE MATCHES "(aarch64)|(AARCH64)|(arm64)|(ARM64)")
|
||||
target_sources(CemuCafe PRIVATE
|
||||
HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.cpp
|
||||
HW/Espresso/Recompiler/BackendAArch64/BackendAArch64.h
|
||||
|
|
|
@ -169,8 +169,10 @@ struct AArch64GenContext_t : CodeGenerator
|
|||
|
||||
bool processAllJumps()
|
||||
{
|
||||
for (auto&& [jumpStart, jumpInfo] : jumps)
|
||||
for (auto jump : jumps)
|
||||
{
|
||||
auto jumpStart = jump.first;
|
||||
auto jumpInfo = jump.second;
|
||||
bool success = std::visit(
|
||||
[&, this](const auto& jump) {
|
||||
setSize(jumpStart);
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
#if defined(ARCH_X86_64) && defined(__GNUC__)
|
||||
#include <immintrin.h>
|
||||
#elif defined(__aarch64__)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
struct
|
||||
|
@ -502,6 +504,114 @@ void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDat
|
|||
indexMax = std::max(indexMax, _maxIndex);
|
||||
indexMin = std::min(indexMin, _minIndex);
|
||||
}
|
||||
#elif defined(__aarch64__)
|
||||
|
||||
void LatteIndices_fastConvertU16_NEON(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
|
||||
{
|
||||
const uint16* indicesU16BE = (const uint16*)indexDataInput;
|
||||
uint16* indexOutput = (uint16*)indexDataOutput;
|
||||
sint32 count8 = count >> 3;
|
||||
sint32 countRemaining = count & 7;
|
||||
|
||||
if (count8)
|
||||
{
|
||||
uint16x8_t mMin = vdupq_n_u16(0xFFFF);
|
||||
uint16x8_t mMax = vdupq_n_u16(0x0000);
|
||||
uint16x8_t mTemp;
|
||||
uint16x8_t* mRawIndices = (uint16x8_t*) indicesU16BE;
|
||||
indicesU16BE += count8 * 8;
|
||||
uint16x8_t* mOutputIndices = (uint16x8_t*) indexOutput;
|
||||
indexOutput += count8 * 8;
|
||||
|
||||
while (count8--)
|
||||
{
|
||||
mTemp = vld1q_u16((uint16*)mRawIndices);
|
||||
mRawIndices++;
|
||||
mTemp = vrev16q_u8(mTemp);
|
||||
mMin = vminq_u16(mMin, mTemp);
|
||||
mMax = vmaxq_u16(mMax, mTemp);
|
||||
vst1q_u16((uint16*)mOutputIndices, mTemp);
|
||||
mOutputIndices++;
|
||||
}
|
||||
|
||||
uint16* mMaxU16 = (uint16*)&mMax;
|
||||
uint16* mMinU16 = (uint16*)&mMin;
|
||||
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
indexMax = std::max(indexMax, (uint32)mMaxU16[i]);
|
||||
indexMin = std::min(indexMin, (uint32)mMinU16[i]);
|
||||
}
|
||||
}
|
||||
// process remaining indices
|
||||
uint32 _minIndex = 0xFFFFFFFF;
|
||||
uint32 _maxIndex = 0;
|
||||
for (sint32 i = countRemaining; (--i) >= 0;)
|
||||
{
|
||||
uint16 idx = _swapEndianU16(*indicesU16BE);
|
||||
*indexOutput = idx;
|
||||
indexOutput++;
|
||||
indicesU16BE++;
|
||||
_maxIndex = std::max(_maxIndex, (uint32)idx);
|
||||
_minIndex = std::min(_minIndex, (uint32)idx);
|
||||
}
|
||||
// update min/max
|
||||
indexMax = std::max(indexMax, _maxIndex);
|
||||
indexMin = std::min(indexMin, _minIndex);
|
||||
}
|
||||
|
||||
void LatteIndices_fastConvertU32_NEON(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
|
||||
{
|
||||
const uint32* indicesU32BE = (const uint32*)indexDataInput;
|
||||
uint32* indexOutput = (uint32*)indexDataOutput;
|
||||
sint32 count8 = count >> 2;
|
||||
sint32 countRemaining = count & 3;
|
||||
|
||||
if (count8)
|
||||
{
|
||||
uint32x4_t mMin = vdupq_n_u32(0xFFFFFFFF);
|
||||
uint32x4_t mMax = vdupq_n_u32(0x00000000);
|
||||
uint32x4_t mTemp;
|
||||
uint32x4_t* mRawIndices = (uint32x4_t*) indicesU32BE;
|
||||
indicesU32BE += count8 * 4;
|
||||
uint32x4_t* mOutputIndices = (uint32x4_t*) indexOutput;
|
||||
indexOutput += count8 * 4;
|
||||
|
||||
while (count8--)
|
||||
{
|
||||
mTemp = vld1q_u32((uint32*)mRawIndices);
|
||||
mRawIndices++;
|
||||
mTemp = vrev32q_u8(mTemp);
|
||||
mMin = vminq_u32(mMin, mTemp);
|
||||
mMax = vmaxq_u32(mMax, mTemp);
|
||||
vst1q_u32((uint32*)mOutputIndices, mTemp);
|
||||
mOutputIndices++;
|
||||
}
|
||||
|
||||
uint32* mMaxU32 = (uint32*)&mMax;
|
||||
uint32* mMinU32 = (uint32*)&mMin;
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
indexMax = std::max(indexMax, mMaxU32[i]);
|
||||
indexMin = std::min(indexMin, mMinU32[i]);
|
||||
}
|
||||
}
|
||||
// process remaining indices
|
||||
uint32 _minIndex = 0xFFFFFFFF;
|
||||
uint32 _maxIndex = 0;
|
||||
for (sint32 i = countRemaining; (--i) >= 0;)
|
||||
{
|
||||
uint32 idx = _swapEndianU32(*indicesU32BE);
|
||||
*indexOutput = idx;
|
||||
indexOutput++;
|
||||
indicesU32BE++;
|
||||
_maxIndex = std::max(_maxIndex, idx);
|
||||
_minIndex = std::min(_minIndex, idx);
|
||||
}
|
||||
// update min/max
|
||||
indexMax = std::max(indexMax, _maxIndex);
|
||||
indexMin = std::min(indexMin, _minIndex);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
|
@ -688,27 +798,31 @@ void LatteIndices_decode(const void* indexData, LatteIndexType indexType, uint32
|
|||
{
|
||||
if (indexType == LatteIndexType::U16_BE)
|
||||
{
|
||||
#if defined(ARCH_X86_64)
|
||||
#if defined(ARCH_X86_64)
|
||||
if (g_CPUFeatures.x86.avx2)
|
||||
LatteIndices_fastConvertU16_AVX2(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
else if (g_CPUFeatures.x86.sse4_1 && g_CPUFeatures.x86.ssse3)
|
||||
LatteIndices_fastConvertU16_SSE41(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
else
|
||||
LatteIndices_convertBE<uint16>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#else
|
||||
#elif defined(__aarch64__)
|
||||
LatteIndices_fastConvertU16_NEON(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#else
|
||||
LatteIndices_convertBE<uint16>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
else if (indexType == LatteIndexType::U32_BE)
|
||||
{
|
||||
#if defined(ARCH_X86_64)
|
||||
#if defined(ARCH_X86_64)
|
||||
if (g_CPUFeatures.x86.avx2)
|
||||
LatteIndices_fastConvertU32_AVX2(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
else
|
||||
LatteIndices_convertBE<uint32>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#else
|
||||
#elif defined(__aarch64__)
|
||||
LatteIndices_fastConvertU32_NEON(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#else
|
||||
LatteIndices_convertBE<uint32>(indexData, indexOutputPtr, count, indexMin, indexMax);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
else if (indexType == LatteIndexType::U16_LE)
|
||||
{
|
||||
|
|
|
@ -25,7 +25,11 @@ void nnNfp_update();
|
|||
|
||||
namespace coreinit
|
||||
{
|
||||
#ifdef __arm64__
|
||||
void __OSFiberThreadEntry(uint32, uint32);
|
||||
#else
|
||||
void __OSFiberThreadEntry(void* thread);
|
||||
#endif
|
||||
void __OSAddReadyThreadToRunQueue(OSThread_t* thread);
|
||||
void __OSRemoveThreadFromRunQueues(OSThread_t* thread);
|
||||
};
|
||||
|
@ -49,7 +53,7 @@ namespace coreinit
|
|||
|
||||
struct OSHostThread
|
||||
{
|
||||
OSHostThread(OSThread_t* thread) : m_thread(thread), m_fiber(__OSFiberThreadEntry, this, this)
|
||||
OSHostThread(OSThread_t* thread) : m_thread(thread), m_fiber((void(*)(void*))__OSFiberThreadEntry, this, this)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -1304,8 +1308,14 @@ namespace coreinit
|
|||
__OSThreadStartTimeslice(hostThread->m_thread, &hostThread->ppcInstance);
|
||||
}
|
||||
|
||||
#ifdef __arm64__
|
||||
void __OSFiberThreadEntry(uint32 _high, uint32 _low)
|
||||
{
|
||||
uint64 _thread = (uint64) _high << 32 | _low;
|
||||
#else
|
||||
void __OSFiberThreadEntry(void* _thread)
|
||||
{
|
||||
#endif
|
||||
OSHostThread* hostThread = (OSHostThread*)_thread;
|
||||
|
||||
#if defined(ARCH_X86_64)
|
||||
|
|
|
@ -310,7 +310,8 @@ inline uint64 __rdtsc()
|
|||
|
||||
inline void _mm_mfence()
|
||||
{
|
||||
|
||||
asm volatile("" ::: "memory");
|
||||
std::atomic_thread_fence(std::memory_order_seq_cst);
|
||||
}
|
||||
|
||||
inline unsigned char _addcarry_u64(unsigned char carry, unsigned long long a, unsigned long long b, unsigned long long *result)
|
||||
|
|
|
@ -140,6 +140,7 @@ enum
|
|||
MAINFRAME_MENU_ID_DEBUG_VK_ACCURATE_BARRIERS,
|
||||
|
||||
// debug->logging
|
||||
MAINFRAME_MENU_ID_DEBUG_LOGGING_MESSAGE = 21499,
|
||||
MAINFRAME_MENU_ID_DEBUG_LOGGING0 = 21500,
|
||||
MAINFRAME_MENU_ID_DEBUG_ADVANCED_PPC_INFO = 21599,
|
||||
// debug->dump
|
||||
|
@ -2234,7 +2235,7 @@ void MainWindow::RecreateMenu()
|
|||
debugLoggingMenu->AppendSeparator();
|
||||
|
||||
wxMenu* logCosModulesMenu = new wxMenu();
|
||||
logCosModulesMenu->AppendCheckItem(0, _("&Options below are for experts. Leave off if unsure"), wxEmptyString)->Enable(false);
|
||||
logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING_MESSAGE, _("&Options below are for experts. Leave off if unsure"), wxEmptyString)->Enable(false);
|
||||
logCosModulesMenu->AppendSeparator();
|
||||
logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING0 + stdx::to_underlying(LogType::CoreinitFile), _("coreinit File-Access API"), wxEmptyString)->Check(cemuLog_isLoggingEnabled(LogType::CoreinitFile));
|
||||
logCosModulesMenu->AppendCheckItem(MAINFRAME_MENU_ID_DEBUG_LOGGING0 + stdx::to_underlying(LogType::CoreinitThreadSync), _("coreinit Thread-Synchronization API"), wxEmptyString)->Check(cemuLog_isLoggingEnabled(LogType::CoreinitThreadSync));
|
||||
|
|
|
@ -15,7 +15,12 @@ Fiber::Fiber(void(*FiberEntryPoint)(void* userParam), void* userParam, void* pri
|
|||
ctx->uc_stack.ss_sp = m_stackPtr;
|
||||
ctx->uc_stack.ss_size = stackSize;
|
||||
ctx->uc_link = &ctx[0];
|
||||
#ifdef __arm64__
|
||||
// https://www.man7.org/linux/man-pages/man3/makecontext.3.html#NOTES
|
||||
makecontext(ctx, (void(*)())FiberEntryPoint, 2, (uint64) userParam >> 32, userParam);
|
||||
#else
|
||||
makecontext(ctx, (void(*)())FiberEntryPoint, 1, userParam);
|
||||
#endif
|
||||
this->m_implData = (void*)ctx;
|
||||
}
|
||||
|
||||
|
|
|
@ -45,7 +45,11 @@ namespace MemMapper
|
|||
void* r;
|
||||
if(fromReservation)
|
||||
{
|
||||
if( mprotect(baseAddr, size, GetProt(permissionFlags)) == 0 )
|
||||
uint64 page_size = sysconf(_SC_PAGESIZE);
|
||||
void* page = baseAddr;
|
||||
if ( (uint64) baseAddr % page_size != 0 )
|
||||
page = (void*) ((uint64)baseAddr & ~(page_size - 1));
|
||||
if( mprotect(page, size, GetProt(permissionFlags)) == 0 )
|
||||
r = baseAddr;
|
||||
else
|
||||
r = nullptr;
|
||||
|
|
|
@ -27,6 +27,8 @@ uint64 HighResolutionTimer::m_freq = []() -> uint64 {
|
|||
LARGE_INTEGER freq;
|
||||
QueryPerformanceFrequency(&freq);
|
||||
return (uint64)(freq.QuadPart);
|
||||
#elif BOOST_OS_MACOS
|
||||
return 1000000000;
|
||||
#else
|
||||
timespec pc;
|
||||
clock_getres(CLOCK_MONOTONIC_RAW, &pc);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue