Add all the files

This commit is contained in:
Exzap 2022-08-22 22:21:23 +02:00
parent e3db07a16a
commit d60742f52b
1445 changed files with 430238 additions and 0 deletions

15
src/util/CMakeLists.txt Normal file
View file

@ -0,0 +1,15 @@
project(CemuUtil)
include_directories(".")
file(GLOB_RECURSE CPP_FILES *.cpp)
file(GLOB_RECURSE H_FILES *.h)
add_library(CemuUtil ${CPP_FILES} ${H_FILES})
set_property(TARGET CemuUtil PROPERTY MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
target_precompile_headers(CemuUtil PRIVATE ../Common/precompiled.h)
target_link_libraries(CemuUtil CemuCommon CemuConfig)
target_include_directories(CemuUtil PRIVATE ../)

View file

@ -0,0 +1,641 @@
#pragma once
struct CHAddr
{
uint32 offset;
uint32 chunkIndex;
CHAddr(uint32 _offset, uint32 _chunkIndex) : offset(_offset), chunkIndex(_chunkIndex) {};
CHAddr() : offset(0xFFFFFFFF), chunkIndex(0xFFFFFFFF) {};
bool isValid() { return chunkIndex != 0xFFFFFFFF; };
static CHAddr getInvalid() { return CHAddr(0xFFFFFFFF, 0xFFFFFFFF); };
};
class ChunkedHeap
{
struct allocRange_t
{
allocRange_t* nextFree{};
allocRange_t* prevFree{};
allocRange_t* prevOrdered{};
allocRange_t* nextOrdered{};
uint32 offset;
uint32 chunkIndex;
uint32 size;
bool isFree;
allocRange_t(uint32 _offset, uint32 _chunkIndex, uint32 _size, bool _isFree) : offset(_offset), chunkIndex(_chunkIndex), size(_size), isFree(_isFree), nextFree(nullptr) {};
};
struct chunk_t
{
std::unordered_map<uint32, allocRange_t*> map_allocatedRange;
};
public:
ChunkedHeap()
{
}
CHAddr alloc(uint32 size, uint32 alignment = 4)
{
return _alloc(size, alignment);
}
void free(CHAddr addr)
{
_free(addr);
}
virtual uint32 allocateNewChunk(uint32 chunkIndex, uint32 minimumAllocationSize)
{
return 0;
}
private:
unsigned ulog2(uint32 v)
{
static const unsigned MUL_DE_BRUIJN_BIT[] =
{
0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
};
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
return MUL_DE_BRUIJN_BIT[(v * 0x07C4ACDDu) >> 27];
}
void trackFreeRange(allocRange_t* range)
{
// get index of msb
cemu_assert_debug(range->size != 0); // size of zero is not allowed
uint32 bucketIndex = ulog2(range->size);
range->nextFree = bucketFreeRange[bucketIndex];
if (bucketFreeRange[bucketIndex])
bucketFreeRange[bucketIndex]->prevFree = range;
range->prevFree = nullptr;
bucketFreeRange[bucketIndex] = range;
}
void forgetFreeRange(allocRange_t* range, uint32 bucketIndex)
{
allocRange_t* prevRange = range->prevFree;
allocRange_t* nextRange = range->nextFree;
if (prevRange)
{
prevRange->nextFree = nextRange;
if (nextRange)
nextRange->prevFree = prevRange;
}
else
{
if (bucketFreeRange[bucketIndex] != range)
assert_dbg();
bucketFreeRange[bucketIndex] = nextRange;
if (nextRange)
nextRange->prevFree = nullptr;
}
}
bool allocateChunk(uint32 minimumAllocationSize)
{
uint32 chunkIndex = (uint32)list_chunks.size();
list_chunks.emplace_back(new chunk_t());
uint32 chunkSize = allocateNewChunk(chunkIndex, minimumAllocationSize);
if (chunkSize == 0)
return false;
allocRange_t* range = new allocRange_t(0, chunkIndex, chunkSize, true);
trackFreeRange(range);
numHeapBytes += chunkSize;
return true;
}
void _allocFrom(allocRange_t* range, uint32 bucketIndex, uint32 allocOffset, uint32 allocSize)
{
// remove the range from the chain of free ranges
forgetFreeRange(range, bucketIndex);
// split head, allocation and tail into separate ranges
if (allocOffset > range->offset)
{
// alignment padding -> create free range
allocRange_t* head = new allocRange_t(range->offset, range->chunkIndex, allocOffset - range->offset, true);
trackFreeRange(head);
if (range->prevOrdered)
range->prevOrdered->nextOrdered = head;
head->prevOrdered = range->prevOrdered;
head->nextOrdered = range;
range->prevOrdered = head;
}
if ((allocOffset + allocSize) < (range->offset + range->size)) // todo - create only if it's more than a couple of bytes?
{
// tail -> create free range
allocRange_t* tail = new allocRange_t((allocOffset + allocSize), range->chunkIndex, (range->offset + range->size) - (allocOffset + allocSize), true);
trackFreeRange(tail);
if (range->nextOrdered)
range->nextOrdered->prevOrdered = tail;
tail->prevOrdered = range;
tail->nextOrdered = range->nextOrdered;
range->nextOrdered = tail;
}
range->offset = allocOffset;
range->size = allocSize;
range->isFree = false;
}
CHAddr _alloc(uint32 size, uint32 alignment)
{
// find smallest bucket to scan
uint32 alignmentM1 = alignment - 1;
uint32 bucketIndex = ulog2(size);
while (bucketIndex < 32)
{
allocRange_t* range = bucketFreeRange[bucketIndex];
while (range)
{
if (range->size >= size)
{
// verify if aligned allocation fits
uint32 alignedOffset = (range->offset + alignmentM1) & ~alignmentM1;
uint32 alignmentLoss = alignedOffset - range->offset;
if (alignmentLoss < range->size && (range->size - alignmentLoss) >= size)
{
_allocFrom(range, bucketIndex, alignedOffset, size);
list_chunks[range->chunkIndex]->map_allocatedRange.emplace(alignedOffset, range);
numAllocatedBytes += size;
return CHAddr(alignedOffset, range->chunkIndex);
}
}
range = range->nextFree;
}
bucketIndex++; // try higher bucket
}
if(allocationLimitReached)
return CHAddr(0xFFFFFFFF, 0xFFFFFFFF);
if (!allocateChunk(size))
{
allocationLimitReached = true;
return CHAddr(0xFFFFFFFF, 0xFFFFFFFF);
}
return _alloc(size, alignment);
}
void _free(CHAddr addr)
{
auto it = list_chunks[addr.chunkIndex]->map_allocatedRange.find(addr.offset);
if (it == list_chunks[addr.chunkIndex]->map_allocatedRange.end())
{
forceLog_printf("Internal heap error. %08x %08x", addr.chunkIndex, addr.offset);
forceLog_printf("Debug info:");
for (auto& rangeItr : list_chunks[addr.chunkIndex]->map_allocatedRange)
{
forceLog_printf("%08x %08x", rangeItr.second->offset, rangeItr.second->size);
}
return;
}
allocRange_t* range = it->second;
numAllocatedBytes -= it->second->size;
list_chunks[range->chunkIndex]->map_allocatedRange.erase(it);
// try merge left or right
allocRange_t* prevRange = range->prevOrdered;
allocRange_t* nextRange = range->nextOrdered;
if (prevRange && prevRange->isFree)
{
if (nextRange && nextRange->isFree)
{
forgetFreeRange(nextRange, ulog2(nextRange->size));
uint32 newSize = (nextRange->offset + nextRange->size) - prevRange->offset;
prevRange->nextOrdered = nextRange->nextOrdered;
if (nextRange->nextOrdered)
nextRange->nextOrdered->prevOrdered = prevRange;
forgetFreeRange(prevRange, ulog2(prevRange->size));
prevRange->size = newSize;
trackFreeRange(prevRange);
delete range;
delete nextRange;
}
else
{
uint32 newSize = (range->offset + range->size) - prevRange->offset;
prevRange->nextOrdered = nextRange;
if (nextRange)
nextRange->prevOrdered = prevRange;
forgetFreeRange(prevRange, ulog2(prevRange->size));
prevRange->size = newSize;
trackFreeRange(prevRange);
delete range;
}
}
else if (nextRange && nextRange->isFree)
{
uint32 newOffset = range->offset;
uint32 newSize = (nextRange->offset + nextRange->size) - newOffset;
forgetFreeRange(nextRange, ulog2(nextRange->size));
nextRange->offset = newOffset;
nextRange->size = newSize;
if (range->prevOrdered)
range->prevOrdered->nextOrdered = nextRange;
nextRange->prevOrdered = range->prevOrdered;
trackFreeRange(nextRange);
delete range;
}
else
{
range->isFree = true;
trackFreeRange(range);
}
}
void verifyHeap()
{
// check for collisions within bucketFreeRange
struct availableRange_t
{
uint32 chunkIndex;
uint32 offset;
uint32 size;
};
std::vector<availableRange_t> availRanges;
for (uint32 i = 0; i < 32; i++)
{
allocRange_t* ar = bucketFreeRange[i];
while (ar)
{
availableRange_t dbgRange;
dbgRange.chunkIndex = ar->chunkIndex;
dbgRange.offset = ar->offset;
dbgRange.size = ar->size;
for (auto& itr : availRanges)
{
if (itr.chunkIndex != dbgRange.chunkIndex)
continue;
if (itr.offset < (dbgRange.offset + dbgRange.size) && (itr.offset + itr.size) >(dbgRange.offset))
assert_dbg();
}
availRanges.emplace_back(dbgRange);
ar = ar->nextFree;
}
}
}
private:
std::vector<chunk_t*> list_chunks;
allocRange_t* bucketFreeRange[32]{};
bool allocationLimitReached = false;
public:
// statistics
uint32 numHeapBytes{}; // total size of the heap
uint32 numAllocatedBytes{};
};
class VGenericHeap
{
public:
virtual void* alloc(uint32 size, uint32 alignment) = 0;
virtual void free(void* addr) = 0;
};
class VHeap : public VGenericHeap
{
struct allocRange_t
{
allocRange_t* nextFree{};
allocRange_t* prevFree{};
allocRange_t* prevOrdered{};
allocRange_t* nextOrdered{};
uint32 offset;
uint32 size;
bool isFree;
allocRange_t(uint32 _offset, uint32 _size, bool _isFree) : offset(_offset), size(_size), isFree(_isFree), nextFree(nullptr) {};
};
struct chunk_t
{
std::unordered_map<uint32, allocRange_t*> map_allocatedRange;
};
public:
VHeap(void* heapBase, uint32 heapSize) : m_heapBase((uint8*)heapBase), m_heapSize(heapSize)
{
allocRange_t* range = new allocRange_t(0, heapSize, true);
trackFreeRange(range);
}
~VHeap()
{
for (auto freeRange : bucketFreeRange)
{
while (freeRange)
{
auto temp = freeRange;
freeRange = freeRange->nextFree;
delete temp;
}
}
}
void setHeapBase(void* heapBase)
{
cemu_assert_debug(map_allocatedRange.empty()); // heap base can only be changed when there are no active allocations
m_heapBase = (uint8*)heapBase;
}
void* alloc(uint32 size, uint32 alignment = 4) override
{
cemu_assert_debug(m_heapBase != nullptr); // if this is null, we cant use alloc() == nullptr to determine if an allocation failed
uint32 allocOffset = 0;
bool r = _alloc(size, alignment, allocOffset);
if (!r)
return nullptr;
return m_heapBase + allocOffset;
}
void free(void* addr) override
{
_free((uint32)((uint8*)addr - (uint8*)m_heapBase));
}
bool allocOffset(uint32 size, uint32 alignment, uint32& offsetOut)
{
uint32 allocOffset = 0;
bool r = _alloc(size, alignment, allocOffset);
if (!r)
return false;
offsetOut = allocOffset;
return true;
}
void freeOffset(uint32 offset)
{
_free((uint32)offset);
}
uint32 getAllocationSizeFromAddr(void* addr)
{
uint32 addrOffset = (uint32)((uint8*)addr - m_heapBase);
auto it = map_allocatedRange.find(addrOffset);
if (it == map_allocatedRange.end())
assert_dbg();
return it->second->size;
}
bool hasAllocations()
{
return !map_allocatedRange.empty();
}
void getStats(uint32& heapSize, uint32& allocationSize, uint32& allocNum)
{
heapSize = m_heapSize;
allocationSize = m_statsMemAllocated;
allocNum = (uint32)map_allocatedRange.size();
}
private:
unsigned ulog2(uint32 v)
{
static const unsigned MUL_DE_BRUIJN_BIT[] =
{
0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
};
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
return MUL_DE_BRUIJN_BIT[(v * 0x07C4ACDDu) >> 27];
}
void trackFreeRange(allocRange_t* range)
{
// get index of msb
if (range->size == 0)
assert_dbg(); // not allowed
uint32 bucketIndex = ulog2(range->size);
range->nextFree = bucketFreeRange[bucketIndex];
if (bucketFreeRange[bucketIndex])
bucketFreeRange[bucketIndex]->prevFree = range;
range->prevFree = nullptr;
bucketFreeRange[bucketIndex] = range;
}
void forgetFreeRange(allocRange_t* range, uint32 bucketIndex)
{
allocRange_t* prevRange = range->prevFree;
allocRange_t* nextRange = range->nextFree;
if (prevRange)
{
prevRange->nextFree = nextRange;
if (nextRange)
nextRange->prevFree = prevRange;
}
else
{
if (bucketFreeRange[bucketIndex] != range)
assert_dbg();
bucketFreeRange[bucketIndex] = nextRange;
if (nextRange)
nextRange->prevFree = nullptr;
}
}
void _allocFrom(allocRange_t* range, uint32 bucketIndex, uint32 allocOffset, uint32 allocSize)
{
// remove the range from the chain of free ranges
forgetFreeRange(range, bucketIndex);
// split head, allocation and tail into separate ranges
if (allocOffset > range->offset)
{
// alignment padding -> create free range
allocRange_t* head = new allocRange_t(range->offset, allocOffset - range->offset, true);
trackFreeRange(head);
if (range->prevOrdered)
range->prevOrdered->nextOrdered = head;
head->prevOrdered = range->prevOrdered;
head->nextOrdered = range;
range->prevOrdered = head;
}
if ((allocOffset + allocSize) < (range->offset + range->size)) // todo - create only if it's more than a couple of bytes?
{
// tail -> create free range
allocRange_t* tail = new allocRange_t((allocOffset + allocSize), (range->offset + range->size) - (allocOffset + allocSize), true);
trackFreeRange(tail);
if (range->nextOrdered)
range->nextOrdered->prevOrdered = tail;
tail->prevOrdered = range;
tail->nextOrdered = range->nextOrdered;
range->nextOrdered = tail;
}
range->offset = allocOffset;
range->size = allocSize;
range->isFree = false;
m_statsMemAllocated += allocSize;
}
bool _alloc(uint32 size, uint32 alignment, uint32& allocOffsetOut)
{
// find smallest bucket to scan
uint32 alignmentM1 = alignment - 1;
uint32 bucketIndex = ulog2(size);
while (bucketIndex < 32)
{
allocRange_t* range = bucketFreeRange[bucketIndex];
while (range)
{
if (range->size >= size)
{
// verify if aligned allocation fits
uint32 alignedOffset = (range->offset + alignmentM1) & ~alignmentM1;
uint32 alignmentLoss = alignedOffset - range->offset;
if (alignmentLoss < range->size && (range->size - alignmentLoss) >= size)
{
_allocFrom(range, bucketIndex, alignedOffset, size);
map_allocatedRange.emplace(alignedOffset, range);
allocOffsetOut = alignedOffset;
return true;
}
}
range = range->nextFree;
}
bucketIndex++; // try higher bucket
}
return false;
}
void _free(uint32 addrOffset)
{
auto it = map_allocatedRange.find(addrOffset);
if (it == map_allocatedRange.end())
assert_dbg();
allocRange_t* range = it->second;
map_allocatedRange.erase(it);
m_statsMemAllocated -= range->size;
// try merge left or right
allocRange_t* prevRange = range->prevOrdered;
allocRange_t* nextRange = range->nextOrdered;
if (prevRange && prevRange->isFree)
{
if (nextRange && nextRange->isFree)
{
forgetFreeRange(nextRange, ulog2(nextRange->size));
uint32 newSize = (nextRange->offset + nextRange->size) - prevRange->offset;
prevRange->nextOrdered = nextRange->nextOrdered;
if (nextRange->nextOrdered)
nextRange->nextOrdered->prevOrdered = prevRange;
forgetFreeRange(prevRange, ulog2(prevRange->size));
prevRange->size = newSize;
trackFreeRange(prevRange);
delete range;
delete nextRange;
}
else
{
uint32 newSize = (range->offset + range->size) - prevRange->offset;
prevRange->nextOrdered = nextRange;
if (nextRange)
nextRange->prevOrdered = prevRange;
forgetFreeRange(prevRange, ulog2(prevRange->size));
prevRange->size = newSize;
trackFreeRange(prevRange);
delete range;
}
}
else if (nextRange && nextRange->isFree)
{
uint32 newOffset = range->offset;
uint32 newSize = (nextRange->offset + nextRange->size) - newOffset;
forgetFreeRange(nextRange, ulog2(nextRange->size));
nextRange->offset = newOffset;
nextRange->size = newSize;
if (range->prevOrdered)
range->prevOrdered->nextOrdered = nextRange;
nextRange->prevOrdered = range->prevOrdered;
trackFreeRange(nextRange);
delete range;
}
else
{
range->isFree = true;
trackFreeRange(range);
}
}
private:
allocRange_t* bucketFreeRange[32]{};
std::unordered_map<uint32, allocRange_t*> map_allocatedRange;
uint8* m_heapBase;
const uint32 m_heapSize;
uint32 m_statsMemAllocated{ 0 };
};
template<uint32 TChunkSize>
class ChunkedFlatAllocator
{
public:
void setBaseAllocator(VGenericHeap* baseHeap)
{
m_currentBaseAllocator = baseHeap;
}
void* alloc(uint32 size, uint32 alignment = 4)
{
if (m_currentBlockPtr)
{
m_currentBlockOffset = (m_currentBlockOffset + alignment - 1) & ~(alignment - 1);
if ((m_currentBlockOffset+size) <= TChunkSize)
{
void* allocPtr = m_currentBlockPtr + m_currentBlockOffset;
m_currentBlockOffset += size;
return allocPtr;
}
}
allocateAdditionalChunk();
return alloc(size, alignment);
}
void releaseAll()
{
for (auto it : m_allocatedBlocks)
m_currentBaseAllocator->free(it);
m_allocatedBlocks.clear();
m_currentBlockPtr = nullptr;
m_currentBlockOffset = 0;
}
void forEachBlock(void(*funcCb)(void* mem, uint32 size))
{
for (auto it : m_allocatedBlocks)
funcCb(it, TChunkSize);
}
uint32 getCurrentBlockOffset() const { return m_currentBlockOffset; }
uint8* getCurrentBlockPtr() const { return m_currentBlockPtr; }
private:
void allocateAdditionalChunk()
{
m_currentBlockPtr = (uint8*)m_currentBaseAllocator->alloc(TChunkSize, 256);
m_currentBlockOffset = 0;
m_allocatedBlocks.emplace_back(m_currentBlockPtr);
}
VGenericHeap* m_currentBaseAllocator{};
uint8* m_currentBlockPtr{};
uint32 m_currentBlockOffset{};
std::vector<void*> m_allocatedBlocks;
};

View file

@ -0,0 +1,84 @@
#pragma once
#include <dxgi1_4.h>
//#include <atlbase.h>
class DXGIWrapper
{
public:
DXGIWrapper()
: DXGIWrapper(nullptr)
{}
DXGIWrapper(uint8* deviceLUID)
{
m_moduleHandle = LoadLibraryA("dxgi.dll");
if (!m_moduleHandle)
throw std::runtime_error("can't load dxgi module");
const auto pCreateDXGIFactory1 = (decltype(&CreateDXGIFactory1))GetProcAddress(m_moduleHandle, "CreateDXGIFactory1");
if (!pCreateDXGIFactory1)
{
FreeLibrary(m_moduleHandle);
throw std::runtime_error("can't find CreateDXGIFactory1 in dxgi module");
}
IDXGIFactory1* dxgiFactory = nullptr;
pCreateDXGIFactory1(IID_PPV_ARGS(&dxgiFactory));
IDXGIAdapter1* tmpDxgiAdapter = nullptr;
UINT adapterIndex = 0;
while (dxgiFactory->EnumAdapters1(adapterIndex, &tmpDxgiAdapter) != DXGI_ERROR_NOT_FOUND)
{
DXGI_ADAPTER_DESC1 desc;
tmpDxgiAdapter->GetDesc1(&desc);
if (deviceLUID == nullptr || memcmp(&desc.AdapterLuid, deviceLUID, sizeof(LUID)) == 0)
{
tmpDxgiAdapter->QueryInterface(IID_PPV_ARGS(&m_dxgiAdapter));
tmpDxgiAdapter->Release();
break;
}
tmpDxgiAdapter->Release();
++adapterIndex;
}
dxgiFactory->Release();
if (!m_dxgiAdapter)
{
Cleanup();
throw std::runtime_error("can't create dxgi adapter");
}
}
~DXGIWrapper()
{
Cleanup();
}
bool QueryVideoMemoryInfo(DXGI_QUERY_VIDEO_MEMORY_INFO& info) const
{
return m_dxgiAdapter->QueryVideoMemoryInfo(0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info) == S_OK;
}
private:
HMODULE m_moduleHandle = nullptr;
IDXGIAdapter3* m_dxgiAdapter = nullptr;
void Cleanup()
{
if (m_dxgiAdapter)
{
m_dxgiAdapter->Release();
m_dxgiAdapter = nullptr;
}
if (m_moduleHandle)
{
FreeLibrary(m_moduleHandle);
m_moduleHandle = nullptr;
}
}
};

66
src/util/EventService.h Normal file
View file

@ -0,0 +1,66 @@
#pragma once
#include "util/helpers/Singleton.h"
#include <boost/signals2.hpp>
#include <boost/bind/placeholders.hpp>
enum class Events : int32_t
{
ControllerChanged,
};
using ControllerChangedFunc = void(void);
class EventService : public Singleton<EventService>
{
friend class Singleton<EventService>;
EventService() = default;
public:
template <Events event, typename TFunc, typename TClass>
boost::signals2::connection connect(TFunc function, TClass thisptr)
{
using namespace boost::placeholders;
if constexpr (event == Events::ControllerChanged)
return m_controller_changed_signal.connect(boost::bind(function, thisptr));
else
{
cemu_assert_suspicious();
}
}
template <Events event>
void disconnect(const boost::signals2::connection& slot)
{
using namespace boost::placeholders;
if constexpr (event == Events::ControllerChanged)
m_controller_changed_signal.disconnect(slot);
else
{
cemu_assert_suspicious();
}
}
template <Events event, typename ... TArgs>
void signal(TArgs&&... args)
{
try
{
if constexpr (event == Events::ControllerChanged)
m_controller_changed_signal(std::forward<TArgs>(args)...);
else
{
cemu_assert_suspicious();
}
}
catch (const std::exception& ex)
{
cemuLog_force("error when signaling {}: {}", event, ex.what());
}
}
private:
boost::signals2::signal<ControllerChangedFunc> m_controller_changed_signal;
};

22
src/util/Fiber/Fiber.h Normal file
View file

@ -0,0 +1,22 @@
#pragma once
#if BOOST_OS_WINDOWS > 0
#endif
class Fiber
{
public:
Fiber(void(*FiberEntryPoint)(void* userParam), void* userParam, void* privateData);
~Fiber();
static Fiber* PrepareCurrentThread(void* privateData = nullptr);
static void Switch(Fiber& targetFiber);
static void* GetFiberPrivateData();
private:
Fiber(void* privateData); // fiber from current thread
void* m_implData{nullptr};
void* m_privateData;
void* m_stackPtr{ nullptr };
};

View file

@ -0,0 +1,56 @@
#include "Fiber.h"
#if BOOST_OS_LINUX
#include <ucontext.h>
thread_local Fiber* sCurrentFiber{};
Fiber::Fiber(void(*FiberEntryPoint)(void* userParam), void* userParam, void* privateData) : m_privateData(privateData)
{
ucontext_t* ctx = (ucontext_t*)malloc(sizeof(ucontext_t));
const size_t stackSize = 2 * 1024 * 1024;
m_stackPtr = malloc(stackSize);
getcontext(ctx);
ctx->uc_stack.ss_sp = m_stackPtr;
ctx->uc_stack.ss_size = stackSize;
ctx->uc_link = &ctx[0];
makecontext(ctx, (void(*)())FiberEntryPoint, 1, userParam);
this->m_implData = (void*)ctx;
}
Fiber::Fiber(void* privateData) : m_privateData(privateData)
{
ucontext_t* ctx = (ucontext_t*)malloc(sizeof(ucontext_t));
getcontext(ctx);
this->m_implData = (void*)ctx;
m_stackPtr = nullptr;
}
Fiber::~Fiber()
{
if(m_stackPtr)
free(m_stackPtr);
free(m_implData);
}
Fiber* Fiber::PrepareCurrentThread(void* privateData)
{
cemu_assert_debug(sCurrentFiber == nullptr);
sCurrentFiber = new Fiber(privateData);
return sCurrentFiber;
}
void Fiber::Switch(Fiber& targetFiber)
{
Fiber* leavingFiber = sCurrentFiber;
sCurrentFiber = &targetFiber;
swapcontext((ucontext_t*)(leavingFiber->m_implData), (ucontext_t*)(targetFiber.m_implData));
}
void* Fiber::GetFiberPrivateData()
{
return sCurrentFiber->m_privateData;
}
#endif

View file

@ -0,0 +1,43 @@
#include "Fiber.h"
#if BOOST_OS_WINDOWS
#include <Windows.h>
thread_local Fiber* sCurrentFiber{};
Fiber::Fiber(void(*FiberEntryPoint)(void* userParam), void* userParam, void* privateData) : m_privateData(privateData)
{
HANDLE fiberHandle = CreateFiber(2 * 1024 * 1024, (LPFIBER_START_ROUTINE)FiberEntryPoint, userParam);
this->m_implData = (void*)fiberHandle;
}
Fiber::Fiber(void* privateData) : m_privateData(privateData)
{
this->m_implData = (void*)ConvertThreadToFiber(nullptr);
this->m_stackPtr = nullptr;
}
Fiber::~Fiber()
{
DeleteFiber((HANDLE)m_implData);
}
Fiber* Fiber::PrepareCurrentThread(void* privateData)
{
cemu_assert_debug(sCurrentFiber == nullptr); // thread already prepared
Fiber* currentFiber = new Fiber(privateData);
sCurrentFiber = currentFiber;
return currentFiber;
}
void Fiber::Switch(Fiber& targetFiber)
{
sCurrentFiber = &targetFiber;
SwitchToFiber((HANDLE)targetFiber.m_implData);
}
void* Fiber::GetFiberPrivateData()
{
return sCurrentFiber->m_privateData;
}
#endif

127
src/util/ImageWriter/bmp.h Normal file
View file

@ -0,0 +1,127 @@
#include "Common/filestream.h"
static void _bmp_write(FileStream* fs, sint32 width, sint32 height, uint32 bits, void* pixelData)
{
BITMAPFILEHEADER bmp_fh;
BITMAPINFOHEADER bmp_ih;
bmp_fh.bfType = 0x4d42;
bmp_fh.bfSize = 0;
bmp_fh.bfReserved1 = 0;
bmp_fh.bfReserved2 = 0;
bmp_fh.bfOffBits = sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER);
bmp_ih.biSize = sizeof(bmp_ih);
bmp_ih.biWidth = width;
bmp_ih.biHeight = height;
bmp_ih.biPlanes = 1;
bmp_ih.biBitCount = bits;
bmp_ih.biCompression = 0;
bmp_ih.biSizeImage = 0;
bmp_ih.biXPelsPerMeter = 0;
bmp_ih.biYPelsPerMeter = 0;
bmp_ih.biClrUsed = 0;
bmp_ih.biClrImportant = 0;
sint32 rowPitch = (width * bits / 8);
rowPitch = (rowPitch + 3)&~3;
uint8 padding[4] = { 0 };
sint32 paddingLength = rowPitch - (width * bits / 8);
bmp_ih.biSize = sizeof(BITMAPINFOHEADER);
bmp_fh.bfSize = sizeof(BITMAPFILEHEADER) + sizeof(BITMAPINFOHEADER) + rowPitch * height;
fs->writeData(&bmp_fh, sizeof(BITMAPFILEHEADER));
fs->writeData(&bmp_ih, sizeof(BITMAPINFOHEADER));
if (bits == 24 || bits == 32)
{
for (sint32 y = 0; y < height; y++)
{
void* rowInput = ((uint8*)pixelData) + rowPitch * (height - y - 1);
fs->writeData(rowInput, width*bits/8);
// write padding
if(paddingLength > 0)
fs->writeData(padding, paddingLength);
}
}
}
static bool bmp_store8BitAs24(wchar_t* path, sint32 width, sint32 height, sint32 bytesPerRow, void* pixelData)
{
FileStream* fs = FileStream::createFile(path);
if (fs == nullptr)
return false;
uint8* pixelI = (uint8*)pixelData;
uint8* pixelRGB = (uint8*)malloc(width * height * 3);
for (sint32 y = 0; y < height; y++)
{
sint32 srcIdx = y * bytesPerRow;
for (sint32 x = 0; x < width; x++)
{
sint32 dstIdx = x + y * width;
pixelRGB[dstIdx * 3 + 0] = pixelI[srcIdx];
pixelRGB[dstIdx * 3 + 1] = pixelI[srcIdx];
pixelRGB[dstIdx * 3 + 2] = pixelI[srcIdx];
srcIdx++;
}
}
_bmp_write(fs, width, height, 24, pixelRGB);
free(pixelRGB);
delete fs;
return true;
}
static bool bmp_store16BitAs24(wchar_t* path, sint32 width, sint32 height, sint32 bytesPerRow, void* pixelData)
{
FileStream* fs = FileStream::createFile(path);
if (fs == nullptr)
return false;
uint8* pixelI = (uint8*)pixelData;
uint8* pixelRGB = (uint8*)malloc(width * height * 3);
for (sint32 y = 0; y < height; y++)
{
sint32 srcIdx = y * bytesPerRow;
for (sint32 x = 0; x < width; x++)
{
sint32 dstIdx = x + y * width;
pixelRGB[dstIdx * 3 + 0] = pixelI[srcIdx + 0];
pixelRGB[dstIdx * 3 + 1] = pixelI[srcIdx + 1];
pixelRGB[dstIdx * 3 + 2] = 0;
srcIdx += 2;
}
}
_bmp_write(fs, width, height, 24, pixelRGB);
free(pixelRGB);
delete fs;
return true;
}
static bool bmp_store24BitAs24(wchar_t* path, sint32 width, sint32 height, sint32 bytesPerRow, void* pixelData)
{
FileStream* fs = FileStream::createFile(path);
if (fs == nullptr)
return false;
uint8* pixelI = (uint8*)pixelData;
uint8* pixelRGB = (uint8*)malloc(width * height * 3);
for (sint32 y = 0; y < height; y++)
{
sint32 srcIdx = y * bytesPerRow;
for (sint32 x = 0; x < width; x++)
{
sint32 dstIdx = x + y * width;
pixelRGB[dstIdx * 3 + 0] = pixelI[srcIdx + 0];
pixelRGB[dstIdx * 3 + 1] = pixelI[srcIdx + 1];
pixelRGB[dstIdx * 3 + 2] = pixelI[srcIdx + 2];
srcIdx += 3;
}
}
_bmp_write(fs, width, height, 24, pixelRGB);
free(pixelRGB);
delete fs;
return true;
}

View file

@ -0,0 +1,34 @@
#include "Common/filestream.h"
#include <vector>
static bool tga_write_rgba(wchar_t* path, sint32 width, sint32 height, uint8* pixelData)
{
FileStream* fs = FileStream::createFile(path);
if (fs == nullptr)
return false;
uint8_t header[18] = {0,0,2,0,0,0,0,0,0,0,0,0, (uint8)(width % 256), (uint8)(width / 256), (uint8)(height % 256), (uint8)(height / 256), 32, 0x20};
fs->writeData(&header, sizeof(header));
std::vector<uint8> tempPixelData;
tempPixelData.resize(width * height * 4);
// write one row at a time
uint8* pOut = tempPixelData.data();
for (sint32 y = 0; y < height; y++)
{
const uint8* rowIn = pixelData + y * width*4;
for (sint32 x = 0; x < width; x++)
{
pOut[0] = rowIn[2];
pOut[1] = rowIn[1];
pOut[2] = rowIn[0];
pOut[3] = rowIn[3];
pOut += 4;
rowIn += 4;
}
}
fs->writeData(tempPixelData.data(), width * height * 4);
delete fs;
return true;
}

View file

@ -0,0 +1,263 @@
#include "util/IniParser/IniParser.h"
IniParser::IniParser(std::span<char> iniContents, std::string_view name) : m_name(name)
{
// we dont support utf8 but still skip the byte order mark in case the user saved the document with the wrong encoding
if (iniContents.size() >= 3 && (uint8)iniContents[0] == 0xEF && (uint8)iniContents[1] == 0xBB && (uint8)iniContents[2] == 0xBF)
iniContents = iniContents.subspan(3);
m_iniFileData.assign(iniContents.begin(), iniContents.end());
m_isValid = parse();
}
bool IniParser::ReadNextLine(std::string_view& lineString)
{
if (m_parseOffset >= m_iniFileData.size())
return false;
// skip \r and \n
for (; m_parseOffset < m_iniFileData.size(); m_parseOffset++)
{
char c = m_iniFileData[m_parseOffset];
if (c == '\r' || c == '\n')
continue;
break;
}
if (m_parseOffset >= m_iniFileData.size())
return false;
size_t lineStart = m_parseOffset;
// parse until end of line/file
for (; m_parseOffset < m_iniFileData.size(); m_parseOffset++)
{
char c = m_iniFileData[m_parseOffset];
if (c == '\r' || c == '\n')
break;
}
size_t lineEnd = m_parseOffset;
lineString = { m_iniFileData.data() + lineStart, lineEnd - lineStart };
return true;
}
void IniParser::TrimWhitespaces(std::string_view& str)
{
while (!str.empty())
{
char c = str[0];
if (c != ' ' && c != '\t')
break;
str.remove_prefix(1);
}
while (!str.empty())
{
char c = str.back();
if (c != ' ' && c != '\t')
break;
str.remove_suffix(1);
}
}
bool IniParser::parse()
{
sint32 lineNumber = 0;
std::string_view lineView;
while (ReadNextLine(lineView))
{
lineNumber++;
// skip whitespaces
while (!lineView.empty())
{
char c = lineView[0];
if (c != ' ' && c != '\t')
break;
lineView.remove_prefix(1);
}
if (lineView.empty())
continue;
// cut off comments (starting with # or ;)
bool isInQuote = false;
for (size_t i = 0; i < lineView.size(); i++)
{
if (lineView[i] == '\"')
isInQuote = !isInQuote;
if ((lineView[i] == '#' || lineView[i] == ';') && !isInQuote)
{
lineView.remove_suffix(lineView.size() - i);
break;
}
}
if(lineView.empty())
continue;
// handle section headers
if (lineView[0] == '[')
{
isInQuote = false;
bool endsWithBracket = false;
for (size_t i = 1; i < lineView.size(); i++)
{
if (lineView[i] == '\"')
isInQuote = !isInQuote;
if (lineView[i] == ']')
{
lineView.remove_suffix(lineView.size() - i);
lineView.remove_prefix(1);
endsWithBracket = true;
break;
}
}
if (!endsWithBracket)
PrintWarning(lineNumber, "Section doesn't end with a ]", lineView);
StartSection(lineView, lineNumber);
continue;
}
// otherwise try to parse it as an option in the form name = value
// find and split at = character
std::string_view option_name;
std::string_view option_value;
bool invalidName = true;
for (size_t i = 0; i < lineView.size(); i++)
{
if (lineView[i] == '=')
{
option_name = lineView.substr(0, i);
option_value = lineView.substr(i+1);
invalidName = false;
break;
}
}
if (invalidName)
{
TrimWhitespaces(lineView);
if (!lineView.empty())
PrintWarning(lineNumber, "Not a valid section header or name-value pair", lineView);
continue;
}
// validate
TrimWhitespaces(option_name);
TrimWhitespaces(option_value);
if (option_name.empty())
{
PrintWarning(lineNumber, "Empty option name is not allowed", lineView);
continue;
}
bool invalidCharacter = false;
for (auto& _c : option_name)
{
uint8 c = (uint8)_c;
if (c == ']' || c == '[')
{
PrintWarning(lineNumber, "Option name may not contain [ or ]", lineView);
invalidCharacter = true;
break;
}
else if (c < 32 || c > 128 || c == ' ')
{
PrintWarning(lineNumber, "Option name may only contain ANSI characters and no spaces", lineView);
invalidCharacter = true;
break;
}
}
if(invalidCharacter)
continue;
// remove quotes from value
if (!option_value.empty() && option_value.front() == '\"')
{
option_value.remove_prefix(1);
if (option_value.size() >= 2 && option_value.back() == '\"')
{
option_value.remove_suffix(1);
}
else
{
PrintWarning(lineNumber, "Option value starts with a quote character \" but does not end with one", lineView);
continue;
}
}
if (m_sectionList.empty())
{
// no current section
PrintWarning(lineNumber, "Option defined without first defining a section", lineView);
continue;
}
// convert name to lower case
m_sectionList.back().m_optionPairs.emplace_back(option_name, option_value);
}
return true;
}
void IniParser::StartSection(std::string_view sectionName, size_t lineNumber)
{
m_sectionList.emplace_back(sectionName, lineNumber);
}
bool IniParser::NextSection()
{
if (m_currentSectionIndex == std::numeric_limits<size_t>::max())
{
m_currentSectionIndex = 0;
return m_currentSectionIndex < m_sectionList.size();
}
if (m_currentSectionIndex >= m_sectionList.size())
return false;
m_currentSectionIndex++;
return m_currentSectionIndex < m_sectionList.size();
}
std::string_view IniParser::GetCurrentSectionName()
{
if (m_currentSectionIndex == std::numeric_limits<size_t>::max() || m_currentSectionIndex >= m_sectionList.size())
return "";
return m_sectionList[m_currentSectionIndex].m_sectionName;
}
size_t IniParser::GetCurrentSectionLineNumber()
{
if (m_currentSectionIndex == std::numeric_limits<size_t>::max() || m_currentSectionIndex >= m_sectionList.size())
return 0;
return m_sectionList[m_currentSectionIndex].m_lineNumber;
}
std::optional<std::string_view> IniParser::FindOption(std::string_view optionName)
{
if (m_currentSectionIndex == std::numeric_limits<size_t>::max() || m_currentSectionIndex >= m_sectionList.size())
return std::nullopt;
auto& optionPairsList = m_sectionList[m_currentSectionIndex].m_optionPairs;
for (auto& itr : optionPairsList)
{
auto& itrOptionName = itr.first;
// case insensitive ANSI string comparison
if(itrOptionName.size() != optionName.size())
continue;
bool isMatch = true;
for (size_t i = 0; i < itrOptionName.size(); i++)
{
char c0 = itrOptionName[i];
char c1 = optionName[i];
if (c0 >= 'A' && c0 <= 'Z')
c0 -= ('A' - 'a');
if (c1 >= 'A' && c1 <= 'Z')
c1 -= ('A' - 'a');
if (c0 != c1)
{
isMatch = false;
break;
}
}
if (!isMatch)
continue;
return itr.second;
}
return std::nullopt;
}
std::span<std::pair<std::string_view, std::string_view>> IniParser::GetAllOptions()
{
if (m_currentSectionIndex == std::numeric_limits<size_t>::max() || m_currentSectionIndex >= m_sectionList.size())
return {};
return m_sectionList[m_currentSectionIndex].m_optionPairs;
}
void IniParser::PrintWarning(int lineNumber, std::string_view msg, std::string_view lineView)
{
// INI logging is silenced
// cemuLog_force("File: {} Line {}: {}", m_name, lineNumber, msg);
}

View file

@ -0,0 +1,45 @@
#pragma once
#include <vector>
#include <span>
#include <string>
#include <optional>
class IniParser
{
private:
class IniSection
{
public:
IniSection(std::string_view sectionName, size_t lineNumber) : m_sectionName(sectionName), m_lineNumber(lineNumber) {}
std::string_view m_sectionName;
size_t m_lineNumber;
std::vector<std::pair<std::string_view, std::string_view>> m_optionPairs;
};
public:
IniParser(std::span<char> iniContents, std::string_view name = {});
IniParser(std::span<unsigned char> iniContents, std::string_view name = {}) : IniParser(std::span<char>((char*)iniContents.data(), iniContents.size()), name) {};
// section and option iterating
bool NextSection();
std::string_view GetCurrentSectionName();
size_t GetCurrentSectionLineNumber();
std::optional<std::string_view> FindOption(std::string_view optionName);
std::span<std::pair<std::string_view, std::string_view>> GetAllOptions();
private:
// parsing
bool parse();
bool ReadNextLine(std::string_view& lineString);
void TrimWhitespaces(std::string_view& str);
void StartSection(std::string_view sectionName, size_t lineNumber);
void PrintWarning(int lineNumber, std::string_view msg, std::string_view lineView);
std::vector<char> m_iniFileData;
std::string m_name;
bool m_isValid{ false };
size_t m_parseOffset{ 0 };
std::vector<IniSection> m_sectionList;
size_t m_currentSectionIndex{std::numeric_limits<size_t>::max()};
};

View file

@ -0,0 +1,24 @@
#pragma once
namespace MemMapper
{
enum class PAGE_PERMISSION : uint32
{
P_READ = (0x01),
P_WRITE = (0x02),
P_EXECUTE = (0x04),
// combined
P_NONE = 0,
P_RW = (0x03),
P_RWX = (0x07)
};
DEFINE_ENUM_FLAG_OPERATORS(PAGE_PERMISSION);
size_t GetPageSize();
void* ReserveMemory(void* baseAddr, size_t size, PAGE_PERMISSION permissionFlags);
void FreeReservation(void* baseAddr, size_t size);
void* AllocateMemory(void* baseAddr, size_t size, PAGE_PERMISSION permissionFlags, bool fromReservation = false);
void FreeMemory(void* baseAddr, size_t size, bool fromReservation = false);
};

View file

@ -0,0 +1,69 @@
#include "util/MemMapper/MemMapper.h"
#if BOOST_OS_LINUX > 0
#include <unistd.h>
#include <sys/mman.h>
namespace MemMapper
{
const size_t sPageSize{ []()
{
return (size_t)getpagesize();
}()
};
size_t GetPageSize()
{
return sPageSize;
}
int GetProt(PAGE_PERMISSION permissionFlags)
{
int p = 0;
if (HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_READ) && HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_WRITE) && HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_EXECUTE))
p = PROT_READ | PROT_WRITE | PROT_EXEC;
else if (HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_READ) && HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_WRITE) && !HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_EXECUTE))
p = PROT_READ | PROT_WRITE;
else if (HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_READ) && !HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_WRITE) && !HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_EXECUTE))
p = PROT_READ;
else
cemu_assert_unimplemented();
return p;
}
void* ReserveMemory(void* baseAddr, size_t size, PAGE_PERMISSION permissionFlags)
{
return mmap(baseAddr, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
}
void FreeReservation(void* baseAddr, size_t size)
{
munmap(baseAddr, size);
}
void* AllocateMemory(void* baseAddr, size_t size, PAGE_PERMISSION permissionFlags, bool fromReservation)
{
void* r;
if(fromReservation)
{
if( mprotect(baseAddr, size, GetProt(permissionFlags)) == 0 )
r = baseAddr;
else
r = nullptr;
}
else
r = mmap(baseAddr, size, GetProt(permissionFlags), MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
return r;
}
void FreeMemory(void* baseAddr, size_t size, bool fromReservation)
{
if (fromReservation)
mprotect(baseAddr, size, PROT_NONE);
else
munmap(baseAddr, size);
}
};
#endif

View file

@ -0,0 +1,67 @@
#include "util/MemMapper/MemMapper.h"
#if BOOST_OS_WINDOWS > 0
#include <Windows.h>
namespace MemMapper
{
const size_t sPageSize{ []()
{
SYSTEM_INFO si;
GetSystemInfo(&si);
return (size_t)si.dwPageSize;
}()
};
size_t GetPageSize()
{
return sPageSize;
}
DWORD GetPageProtection(PAGE_PERMISSION permissionFlags)
{
DWORD p = 0;
if (HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_READ) && HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_WRITE) && HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_EXECUTE))
p = PAGE_EXECUTE_READWRITE;
else if (HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_READ) && HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_WRITE) && !HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_EXECUTE))
p = PAGE_READWRITE;
else if (HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_READ) && !HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_WRITE) && !HAS_FLAG(permissionFlags, PAGE_PERMISSION::P_EXECUTE))
p = PAGE_READONLY;
else
cemu_assert_unimplemented();
return p;
}
void* ReserveMemory(void* baseAddr, size_t size, PAGE_PERMISSION permissionFlags)
{
void* r = VirtualAlloc(baseAddr, size, MEM_RESERVE, GetPageProtection(permissionFlags));
return r;
}
void FreeReservation(void* baseAddr, size_t size)
{
VirtualFree(baseAddr, size, MEM_RELEASE);
}
void* AllocateMemory(void* baseAddr, size_t size, PAGE_PERMISSION permissionFlags, bool fromReservation)
{
void* r;
if(fromReservation)
r = VirtualAlloc(baseAddr, size, MEM_COMMIT, GetPageProtection(permissionFlags));
else
r = VirtualAlloc(baseAddr, size, MEM_RESERVE | MEM_COMMIT, GetPageProtection(permissionFlags));
return r;
}
void FreeMemory(void* baseAddr, size_t size, bool fromReservation)
{
if(fromReservation)
VirtualFree(baseAddr, size, MEM_DECOMMIT);
else
VirtualFree(baseAddr, size, MEM_RELEASE);
}
};
#endif

View file

View file

@ -0,0 +1,15 @@
#include <thread>
class ThreadPool
{
public:
template<class TFunction, class... TArgs>
static void FireAndForget(TFunction&& f, TArgs&&... args)
{
// todo - find a way to use std::async here so we can utilize thread pooling?
std::thread t(std::forward<TFunction>(f), std::forward<TArgs>(args)...);
t.detach();
}
};

View file

@ -0,0 +1,160 @@
#include "VirtualHeap.h"
VirtualBufferHeap_t* virtualBufferHeap_create(uint32 virtualHeapSize, void* baseAddr)
{
VirtualBufferHeap_t* bufferHeap = (VirtualBufferHeap_t*)malloc(sizeof(VirtualBufferHeap_t));
memset(bufferHeap, 0, sizeof(VirtualBufferHeap_t));
bufferHeap->firstEntry = nullptr;
virtualHeapSize = (virtualHeapSize + 31)&~31;
bufferHeap->virtualSize = virtualHeapSize;
bufferHeap->baseAddress = baseAddr;
bufferHeap->updateTrackIndex = 0;
// create pool of unused entries
sint32 unusedEntryPoolSize = 1024 * 16;
VirtualBufferHeapEntry_t* unusedEntryPool = (VirtualBufferHeapEntry_t*)malloc(sizeof(VirtualBufferHeapEntry_t)*unusedEntryPoolSize);
for (sint32 i = 0; i < unusedEntryPoolSize - 1; i++)
{
unusedEntryPool[i].next = unusedEntryPool + i + 1;
}
unusedEntryPool[unusedEntryPoolSize - 1].next = nullptr;
bufferHeap->firstUnusedEntry = unusedEntryPool + 0;
return bufferHeap;
}
VirtualBufferHeapEntry_t* virtualBufferHeap_createEntry(VirtualBufferHeap_t* bufferHeap)
{
VirtualBufferHeapEntry_t* newEntry = bufferHeap->firstUnusedEntry;
if (newEntry == nullptr)
{
forceLog_printf("virtualBufferHeap_createEntry: Pool empty");
cemu_assert_suspicious();
}
bufferHeap->firstUnusedEntry = newEntry->next;
newEntry->previous = NULL;
newEntry->next = NULL;
return newEntry;
}
void virtualBufferHeap_releaseEntry(VirtualBufferHeap_t* bufferHeap, VirtualBufferHeapEntry_t* entry)
{
bufferHeap->stats.allocatedMemory -= (entry->endOffset - entry->startOffset);
bufferHeap->stats.numActiveAllocs--;
entry->next = bufferHeap->firstUnusedEntry;
bufferHeap->firstUnusedEntry = entry;
}
// Allocate memory region from virtual heap. Always allocates memory at the lowest possible address
VirtualBufferHeapEntry_t* virtualBufferHeap_allocate(VirtualBufferHeap_t* bufferHeap, uint32 size)
{
// align size
size = (size + 255)&~255;
// iterate already allocated entries and try to find free space between them
VirtualBufferHeapEntry_t* entryItr = bufferHeap->firstEntry;
if (entryItr == NULL)
{
// entire heap is unallocated
VirtualBufferHeapEntry_t* newEntry = virtualBufferHeap_createEntry(bufferHeap);
newEntry->startOffset = 0;
newEntry->endOffset = size;
newEntry->previous = NULL;
newEntry->next = NULL;
bufferHeap->firstEntry = newEntry;
bufferHeap->stats.allocatedMemory += size;
bufferHeap->stats.numActiveAllocs++;
return newEntry;
}
else
{
uint32 currentAllocationOffset = 0;
VirtualBufferHeapEntry_t* entryPrev = nullptr;
while (entryItr)
{
if ((currentAllocationOffset + size) > entryItr->startOffset)
{
// space occupied
currentAllocationOffset = entryItr->endOffset;
currentAllocationOffset = (currentAllocationOffset + 255)&~255;
// next
entryPrev = entryItr;
entryItr = entryItr->next;
continue;
}
else
{
if ((currentAllocationOffset + size) > bufferHeap->virtualSize)
return nullptr; // out of heap memory
// free space found
VirtualBufferHeapEntry_t* newEntry = virtualBufferHeap_createEntry(bufferHeap);
newEntry->startOffset = currentAllocationOffset;
newEntry->endOffset = currentAllocationOffset + size;
// insert between previous entry and entryItr
newEntry->previous = entryItr->previous;
newEntry->next = entryItr;
if (entryItr->previous)
entryItr->previous->next = newEntry;
else
bufferHeap->firstEntry = newEntry;
entryItr->previous = newEntry;
bufferHeap->stats.allocatedMemory += size;
bufferHeap->stats.numActiveAllocs++;
return newEntry;
}
}
// add after entryPrev
if ((currentAllocationOffset + size) > bufferHeap->virtualSize)
return NULL; // out of heap memory
VirtualBufferHeapEntry_t* newEntry = virtualBufferHeap_createEntry(bufferHeap);
newEntry->startOffset = currentAllocationOffset;
newEntry->endOffset = currentAllocationOffset + size;
// insert after previous entry
cemu_assert_debug(entryPrev);
cemu_assert_debug(entryPrev->next == nullptr);
newEntry->previous = entryPrev;
newEntry->next = entryPrev->next;
entryPrev->next = newEntry;
bufferHeap->stats.allocatedMemory += size;
bufferHeap->stats.numActiveAllocs++;
return newEntry;
}
return NULL;
}
void virtualBufferHeap_free(VirtualBufferHeap_t* bufferHeap, VirtualBufferHeapEntry_t* entry)
{
if (entry->previous == NULL)
{
// make the next entry the first one
if (entry->next)
entry->next->previous = NULL;
bufferHeap->firstEntry = entry->next;
}
else
entry->previous->next = entry->next;
if (entry->next)
entry->next->previous = entry->previous;
// release entry
virtualBufferHeap_releaseEntry(bufferHeap, entry);
}
void* virtualBufferHeap_allocateAddr(VirtualBufferHeap_t* bufferHeap, uint32 size)
{
VirtualBufferHeapEntry_t* heapEntry = virtualBufferHeap_allocate(bufferHeap, size);
return ((uint8*)bufferHeap->baseAddress + heapEntry->startOffset);
}
void virtualBufferHeap_freeAddr(VirtualBufferHeap_t* bufferHeap, void* addr)
{
auto entry = bufferHeap->firstEntry;
while(entry)
{
const auto entry_address = (uint8*)bufferHeap->baseAddress + entry->startOffset;
if(entry_address == (uint8*)addr)
{
virtualBufferHeap_free(bufferHeap, entry);
return;
}
entry = entry->next;
}
cemu_assert_suspicious();
}

View file

@ -0,0 +1,35 @@
#pragma once
// virtual heap
struct VirtualBufferHeapEntry_t
{
uint32 startOffset;
uint32 endOffset;
VirtualBufferHeapEntry_t* next;
VirtualBufferHeapEntry_t* previous;
};
struct VirtualBufferHeap_t
{
uint32 virtualSize;
void* baseAddress; // base address for _allocateAddr and _freeAddr
VirtualBufferHeapEntry_t* firstEntry;
// unused entries
VirtualBufferHeapEntry_t* firstUnusedEntry;
// update tracking
uint32 updateTrackIndex;
// stats
struct
{
uint32 numActiveAllocs;
uint32 allocatedMemory;
}stats;
};
VirtualBufferHeap_t* virtualBufferHeap_create(uint32 virtualHeapSize, void* baseAddr = nullptr);
VirtualBufferHeapEntry_t* virtualBufferHeap_allocate(VirtualBufferHeap_t* bufferHeap, uint32 size);
void virtualBufferHeap_free(VirtualBufferHeap_t* bufferHeap, VirtualBufferHeapEntry_t* entry);
void* virtualBufferHeap_allocateAddr(VirtualBufferHeap_t* bufferHeap, uint32 size);
void virtualBufferHeap_freeAddr(VirtualBufferHeap_t* bufferHeap, void* addr);

236
src/util/Zir/Core/IR.cpp Normal file
View file

@ -0,0 +1,236 @@
#include "util/Zir/Core/IR.h"
#include "util/Zir/Core/ZpIRDebug.h"
namespace ZpIR
{
const char* getOpcodeName(IR::OpCode opcode)
{
switch (opcode)
{
case IR::OpCode::ADD:
return "ADD";
case IR::OpCode::MOV:
return "MOV";
case IR::OpCode::MUL:
return "MUL";
case IR::OpCode::DIV:
return "DIV";
case IR::OpCode::BITCAST:
return "BITCAST";
case IR::OpCode::SWAP_ENDIAN:
return "SWAP_ENDIAN";
case IR::OpCode::CONVERT_INT_TO_FLOAT:
return "CONV_I2F";
case IR::OpCode::CONVERT_FLOAT_TO_INT:
return "CONV_F2I";
case IR::OpCode::IMPORT_SINGLE:
return "IMPORT_S";
case IR::OpCode::EXPORT:
return "EXPORT";
case IR::OpCode::IMPORT:
return "IMPORT";
default:
cemu_assert_debug(false);
return "UKN";
}
return "";
}
const char* getTypeName(DataType t)
{
switch (t)
{
case DataType::S64:
return "s64";
case DataType::U64:
return "u64";
case DataType::S32:
return "s32";
case DataType::U32:
return "u32";
case DataType::S16:
return "s16";
case DataType::U16:
return "u16";
case DataType::S8:
return "s8";
case DataType::U8:
return "u8";
case DataType::BOOL:
return "bool";
case DataType::POINTER:
return "ptr";
}
return "";
}
std::string DebugPrinter::getRegisterName(ZpIRBasicBlock* block, IRReg r)
{
std::string s;
if ((uint16)r < 0x8000 && m_showPhysicalRegisters)
{
auto& reg = block->m_regs[(uint16)r];
if (!reg.hasAssignedPhysicalRegister())
return "UNASSIGNED";
s = m_getPhysicalRegisterNameCustom(block, reg.physicalRegister);
return s;
}
if ((uint16)r < 0x8000 && m_getRegisterNameCustom)
{
return m_getRegisterNameCustom(block, r);
}
if ((uint16)r >= 0x8000)
{
auto& reg = block->m_consts[(uint16)r & 0x7FFF];
switch (reg.type)
{
case DataType::POINTER:
return fmt::format("ptr:{}", reg.value_ptr);
case DataType::U64:
{
if(reg.value_u64 >= 0x1000)
return fmt::format("u64:0x{0:x}", reg.value_u64);
return fmt::format("u64:{}", reg.value_u64);
}
case DataType::U32:
return fmt::format("u32:{}", reg.value_u32);
case DataType::S32:
return fmt::format("s32:{}", reg.value_u32);
case DataType::F32:
return fmt::format("f32:{}", reg.value_f32);
default:
break;
}
return "ukn_const_type";
}
else
{
auto& reg = block->m_regs[(uint16)r];
const char* regLetter = "r";
switch (reg.type)
{
case DataType::U64:
regLetter = "uq"; // quad-word
break;
case DataType::U32:
regLetter = "ud"; // double-word
break;
case DataType::U16:
regLetter = "uw"; // word
break;
case DataType::U8:
regLetter = "uc"; // char
break;
case DataType::S64:
regLetter = "sq"; // signed quad-word
break;
case DataType::S32:
regLetter = "sd"; // signed double-word
break;
case DataType::S16:
regLetter = "sw"; // signed word
break;
case DataType::S8:
regLetter = "sc"; // signed char
break;
case DataType::F32:
regLetter = "fv"; // 32bit float
break;
case DataType::POINTER:
regLetter = "ptr";
break;
default:
assert_dbg();
}
if (reg.elementCount != 1)
assert_dbg();
s = fmt::format("{}{}", regLetter, (uint16)r);
}
return s;
}
std::string DebugPrinter::getInstructionHRF(ZpIRBasicBlock* block, IR::__InsBase* cmd)
{
if (auto ins = IR::InsRR::getIfForm(cmd))
{
return fmt::format("{:<10} {}, {}", getOpcodeName(cmd->opcode), getRegisterName(block, ins->rA), getRegisterName(block, ins->rB));
}
else if (auto ins = IR::InsRRR::getIfForm(cmd))
{
return fmt::format("{:<10} {}, {}, {}", getOpcodeName(cmd->opcode), getRegisterName(block, ins->rA), getRegisterName(block, ins->rB), getRegisterName(block, ins->rC));
}
else if (auto ins = IR::InsEXPORT::getIfForm(cmd))
{
if (ins->count == 4)
return fmt::format("{:<10} {}, {}, {}, {}, loc: {}", getOpcodeName(cmd->opcode), getRegisterName(block, ins->regArray[0]), getRegisterName(block, ins->regArray[1]), getRegisterName(block, ins->regArray[2]), getRegisterName(block, ins->regArray[3]), ins->exportSymbol);
else if (ins->count == 3)
return fmt::format("{:<10} {}, {}, {}, loc: {}", getOpcodeName(cmd->opcode), getRegisterName(block, ins->regArray[0]), getRegisterName(block, ins->regArray[1]), getRegisterName(block, ins->regArray[2]), ins->exportSymbol);
else if (ins->count == 2)
return fmt::format("{:<10} {}, {}, loc: {}", getOpcodeName(cmd->opcode), getRegisterName(block, ins->regArray[0]), getRegisterName(block, ins->regArray[1]), ins->exportSymbol);
else if (ins->count == 1)
return fmt::format("{:<10} {}, loc: {}", getOpcodeName(cmd->opcode), getRegisterName(block, ins->regArray[0]), ins->exportSymbol);
assert_dbg();
}
else if (auto ins = IR::InsIMPORT::getIfForm(cmd))
{
ShaderSubset::ShaderImportLocation importLocation = ins->importSymbol;
std::string locDebugName = importLocation.GetDebugName();
if (ins->count == 4)
return fmt::format("{:<10} {}, {}, {}, {}, loc: {}", getOpcodeName(cmd->opcode), getRegisterName(block, ins->regArray[0]), getRegisterName(block, ins->regArray[1]), getRegisterName(block, ins->regArray[2]), getRegisterName(block, ins->regArray[3]), locDebugName);
else if (ins->count == 3)
return fmt::format("{:<10} {}, {}, {}, loc: {}", getOpcodeName(cmd->opcode), getRegisterName(block, ins->regArray[0]), getRegisterName(block, ins->regArray[1]), getRegisterName(block, ins->regArray[2]), locDebugName);
else if (ins->count == 2)
return fmt::format("{:<10} {}, {}, loc: {}", getOpcodeName(cmd->opcode), getRegisterName(block, ins->regArray[0]), getRegisterName(block, ins->regArray[1]), locDebugName);
else if (ins->count == 1)
return fmt::format("{:<10} {}, loc: {}", getOpcodeName(cmd->opcode), getRegisterName(block, ins->regArray[0]), locDebugName);
assert_dbg();
}
else
assert_dbg();
return "";
}
void DebugPrinter::debugPrintBlock(ZpIRBasicBlock* block)
{
// print name
printf("IRBasicBlock %016llx\n", (uintptr_t)block);
// print imports
printf("Imports:\n");
for(auto itr : block->m_imports)
printf(" reg: %s sym:0x%llx\n", getRegisterName(block, itr.reg).c_str(), itr.name);
// print exports
printf("Exports:\n");
for (auto itr : block->m_exports)
printf(" reg: %s sym:0x%llx\n", getRegisterName(block, itr.reg).c_str(), itr.name);
// print instructions
printf("Assembly:\n");
IR::__InsBase* instruction = block->m_instructionFirst;
size_t i = 0;
while(instruction)
{
std::string s = getInstructionHRF(block, instruction);
printf("%04x %s\n", (unsigned int)i, s.c_str());
i++;
instruction = instruction->next;
}
}
void DebugPrinter::debugPrint(ZpIRFunction* irFunction)
{
printf("--- Print IR function assembly ---\n");
for (auto& itr : irFunction->m_basicBlocks)
{
debugPrintBlock(itr);
printf("\n");
}
}
}

672
src/util/Zir/Core/IR.h Normal file
View file

@ -0,0 +1,672 @@
#pragma once
#include <optional>
using f32 = float;
using f64 = double;
inline void zpir_debug_assert(bool _cond)
{
if(!_cond)
assert_dbg();
}
namespace ZpIR
{
//enum class ZpIRCmdForm : uint8
//{
// FORM_VOID, // no-op
// FORM_ZERO, // opcode without operands
// FORM_1OP, // op0
// FORM_2OP, // op0, op1
// FORM_3OP, // op0, op1, op2
// FORM_4OP, // op0, op1, op2, op3
// // todo - memory read + memory store
// FORM_MEM, // op0, opEA, offset, type
// // todo - function calls
//};
//enum class ZpIROpcodeDepr : uint8
//{
// OP_VOID,
// // FORM_1OP
// OP_CALL,
// // FORM_2OP
// OP_ASSIGN, // copy/assignment
// OP_CAST_ZEROEXT, // cast type to another. If broadening then zero-extend (unsigned cast)
// OP_CAST_SIGNEXT, // cast type to another. If broadening then sign-extend (signed cast)
// // FORM_3OP
// OP_ADD, // op0 = op1 + op2
// OP_SUB, // op0 = op1 - op2
// OP_MUL, // op0 = op1 * op2
// OP_DIV, // op0 = op1 / op2
// // memory
// OP_MEM_READ,
// OP_MEM_WRITE,
//};
enum class DataType : uint8
{
NONE = 0x00,
// integer
U8 = 1,
S8 = 2,
U16 = 3,
S16 = 4,
U32 = 5,
S32 = 6,
U64 = 7,
S64 = 8,
// floating-point
F32 = 0x10 + 0,
F64 = 0x10 + 1,
// special
POINTER = 0x20, // dynamic width based on pointer width of target architecture
// boolean
BOOL = 0x30, // can hold false/true. Size depends on target architecture
};
typedef uint16 IRReg;
typedef uint64 LocationSymbolName;
typedef uint32 ZpIRPhysicalReg;
inline bool isRegVar(IRReg r) { return r < 0x8000; };
inline bool isConstVar(IRReg r) { return r >= 0x8000; };
inline uint16 getRegIndex(IRReg r) { return (uint16)r & 0x7FFF; };
namespace IR
{
enum class OpCode : uint8
{
UNDEF = 0, // undefined
// basic opcodes
MOV,
// basic arithmetic opcodes
ADD, // addition
SUB, // subtraction
MUL, // multiplication
DIV, // division
// conversion
BITCAST, // like MOV, but allows registers of different types. No value conversion happens, raw bit copy
SWAP_ENDIAN, // swap endianness
CONVERT_INT_TO_FLOAT,
CONVERT_FLOAT_TO_INT,
// misc
IMPORT_SINGLE, // import into a single IRReg. Depr: Make this like EXPORT where there is a 1-4 regs variant and one for more
IMPORT, // import from external/custom resource into 1-4 IRReg
EXPORT, // export 1-4 registers to external/custom resource
// EXPORT_MANY // for when more than 4 registers are needed
// vector
EXTRACT_ELEMENT, // extract a scalar type from a vector type
// some notes: We need this for texture read instructions. Where the result is a vec4 (f32x4) and this is how we can extract individual registers from that
// update -> We may also instead just let the texture sample instruction specify 4 output registers
};
enum class OpForm : uint8
{
NONE = 0,
RR = 1,
RRR = 2,
IMPORT_SINGLE = 3, // deprecated
IMPORT = 4,
EXPORT = 5,
};
// instruction base class
class __InsBase
{
public:
OpCode opcode;
OpForm opform;
__InsBase* next;
protected:
__InsBase(OpCode opcode, OpForm opform) : opcode(opcode), opform(opform) { };
};
// adapted base class, instruction forms inherit from this
template<typename TInstr, OpForm TOpForm>
class __InsBaseWithForm : public __InsBase
{
public:
//OpCode opcode;
//OpForm opform;
//__InsBase* next;
static const OpForm getForm()
{
return TOpForm;
}
static TInstr* getIfForm(__InsBase* instructionBase)
{
if (instructionBase->opform != TOpForm)
return nullptr;
return (TInstr*)instructionBase;
}
protected:
__InsBaseWithForm(OpCode opcode) : __InsBase(opcode, TOpForm) { };
};
class InsRR : public __InsBaseWithForm<InsRR, OpForm::RR>
{
public:
InsRR(OpCode opcode, IRReg rA, IRReg rB) : __InsBaseWithForm(opcode), rA(rA), rB(rB) {};
IRReg rA;
IRReg rB;
};
class InsRRR : public __InsBaseWithForm<InsRRR, OpForm::RRR>
{
public:
InsRRR(OpCode opcode, IRReg rA, IRReg rB, IRReg rC) : __InsBaseWithForm(opcode), rA(rA), rB(rB), rC(rC) {};
IRReg rA;
IRReg rB;
IRReg rC;
};
// should we support RRI format with 32bit signed integer as a way to avoid having to generate dozens of IR const regs for stuff like shift and other logical instructions with constant rhs?
// and if we do, should it be a 32bit signed integer or should the type match the instruction type?
class InsEXPORT : public __InsBaseWithForm<InsEXPORT, OpForm::EXPORT>
{
public:
InsEXPORT(LocationSymbolName exportSymbol, IRReg r) : __InsBaseWithForm(OpCode::EXPORT), exportSymbol(exportSymbol)
{
regArray[0] = r;
count = 1;
};
InsEXPORT(LocationSymbolName exportSymbol, IRReg r0, IRReg r1) : __InsBaseWithForm(OpCode::EXPORT), exportSymbol(exportSymbol)
{
regArray[0] = r0; regArray[1] = r1;
count = 2;
};
InsEXPORT(LocationSymbolName exportSymbol, IRReg r0, IRReg r1, IRReg r2) : __InsBaseWithForm(OpCode::EXPORT), exportSymbol(exportSymbol)
{
regArray[0] = r0; regArray[1] = r1; regArray[2] = r2;
count = 3;
};
InsEXPORT(LocationSymbolName exportSymbol, IRReg r0, IRReg r1, IRReg r2, IRReg r3) : __InsBaseWithForm(OpCode::EXPORT), exportSymbol(exportSymbol)
{
regArray[0] = r0;
regArray[1] = r1;
regArray[2] = r2;
regArray[3] = r3;
count = 4;
};
InsEXPORT(LocationSymbolName exportSymbol, std::span<IRReg> regs) : __InsBaseWithForm(OpCode::EXPORT), exportSymbol(exportSymbol)
{
zpir_debug_assert(regs.size() <= 4);
for(size_t i=0; i<regs.size(); i++)
regArray[i] = regs[i];
count = (uint16)regs.size();
};
uint16 count;
IRReg regArray[4]; // up to 4 registers
LocationSymbolName exportSymbol;
};
class InsIMPORT : public __InsBaseWithForm<InsIMPORT, OpForm::IMPORT>
{
public:
InsIMPORT(LocationSymbolName importSymbol, IRReg r) : __InsBaseWithForm(OpCode::IMPORT), importSymbol(importSymbol)
{
regArray[0] = r;
count = 1;
};
InsIMPORT(LocationSymbolName importSymbol, IRReg r0, IRReg r1) : __InsBaseWithForm(OpCode::IMPORT), importSymbol(importSymbol)
{
regArray[0] = r0; regArray[1] = r1;
count = 2;
};
InsIMPORT(LocationSymbolName importSymbol, IRReg r0, IRReg r1, IRReg r2) : __InsBaseWithForm(OpCode::IMPORT), importSymbol(importSymbol)
{
regArray[0] = r0; regArray[1] = r1; regArray[2] = r2;
count = 3;
};
InsIMPORT(LocationSymbolName importSymbol, IRReg r0, IRReg r1, IRReg r2, IRReg r3) : __InsBaseWithForm(OpCode::IMPORT), importSymbol(importSymbol)
{
regArray[0] = r0;
regArray[1] = r1;
regArray[2] = r2;
regArray[3] = r3;
count = 4;
};
InsIMPORT(LocationSymbolName importSymbol, std::span<IRReg> regs) : __InsBaseWithForm(OpCode::IMPORT), importSymbol(importSymbol)
{
zpir_debug_assert(regs.size() <= 4);
for (size_t i = 0; i < regs.size(); i++)
regArray[i] = regs[i];
count = (uint16)regs.size();
};
uint16 count;
IRReg regArray[4]; // up to 4 registers
LocationSymbolName importSymbol;
};
};
// IR register definition stored in basic block
struct IRRegDef
{
IRRegDef(DataType type, uint8 elementCount) : type(type), elementCount(elementCount) {};
DataType type;
uint8 elementCount; // 1 = scalar
ZpIRPhysicalReg physicalRegister{ std::numeric_limits<ZpIRPhysicalReg>::max()};
// todo - information about spilling location? (it depends on the architecture so we should keep this out of the core IR)
bool hasAssignedPhysicalRegister() const
{
return physicalRegister != std::numeric_limits<ZpIRPhysicalReg>::max();
}
void assignPhysicalRegister(ZpIRPhysicalReg physReg)
{
physicalRegister = physReg;
}
};
// IR register constant definition stored in basic block
struct IRRegConstDef
{
IRRegConstDef() {};
// todo - support for constants with more than one element?
IRRegConstDef& setU32(uint32 v) { value_u32 = v; type = DataType::U32; return *this; };
IRRegConstDef& setS32(sint32 v) { value_s32 = v; type = DataType::S32; return *this; };
IRRegConstDef& setF32(f32 v) { value_f32 = v; type = DataType::F32; return *this; };
IRRegConstDef& setPtr(void* v) { value_ptr = v; type = DataType::POINTER; return *this; };
IRRegConstDef& setRaw(uint32 v, DataType regType) { value_u32 = v; type = regType; return *this; };
DataType type{ DataType::NONE };
union
{
uint32 value_u32;
sint32 value_s32;
sint64 value_s64;
uint64 value_u64;
void* value_ptr;
f32 value_f32;
f64 value_f64;
};
};
struct ZpIRBasicBlock
{
friend class ZpIRBuilder;
struct IRBBImport
{
IRBBImport(IRReg reg, LocationSymbolName name) : reg(reg), name(name) {};
IRReg reg;
LocationSymbolName name;
};
struct IRBBExport
{
IRBBExport(IRReg reg, LocationSymbolName name) : reg(reg), name(name) {};
IRReg reg;
LocationSymbolName name;
};
IR::__InsBase* m_instructionFirst{};
IR::__InsBase* m_instructionLast{};
std::vector<IRRegDef> m_regs;
std::vector<IRRegConstDef> m_consts;
std::vector<IRBBImport> m_imports;
std::vector<IRBBExport> m_exports;
ZpIRBasicBlock* m_branchNotTaken{ nullptr }; // next block if branch not taken or no branch present
ZpIRBasicBlock* m_branchTaken{ nullptr }; // next block if branch is taken
void* m_workbuffer{}; // can be used as temporary storage for information
void appendInstruction(IR::__InsBase* ins)
{
if (m_instructionFirst == nullptr)
{
m_instructionFirst = ins;
m_instructionLast = ins;
ins->next = nullptr;
return;
}
m_instructionLast->next = ins;
m_instructionLast = ins;
ins->next = nullptr;
}
IRReg createRegister(DataType type, uint8 elementCount = 1)
{
uint32 index = (uint32)m_regs.size();
cemu_assert_debug(index < 0x8000);
m_regs.emplace_back(type, elementCount);
return (IRReg)index;
}
IRReg createConstantU32(uint32 value)
{
uint32 index = (uint32)m_consts.size();
cemu_assert_debug(index < 0x8000);
m_consts.emplace_back().setU32(value);
return (IRReg)((uint16)index + 0x8000);
}
IRReg createTypedConstant(uint32 value, DataType type)
{
uint32 index = (uint32)m_consts.size();
cemu_assert_debug(index < 0x8000);
m_consts.emplace_back().setRaw(value, type);
return (IRReg)((uint16)index + 0x8000);
}
IRReg createConstantS32(uint32 value)
{
uint32 index = (uint32)m_consts.size();
cemu_assert_debug(index < 0x8000);
m_consts.emplace_back().setS32(value);
return (IRReg)((uint16)index + 0x8000);
}
IRReg createConstantF32(f32 value)
{
uint32 index = (uint32)m_consts.size();
cemu_assert_debug(index < 0x8000);
m_consts.emplace_back().setF32(value);
return (IRReg)((uint16)index + 0x8000);
}
IRReg createConstantPointer(void* value)
{
uint32 index = (uint32)m_consts.size();
cemu_assert_debug(index < 0x8000);
m_consts.emplace_back().setPtr(value);
return (IRReg)((uint16)index + 0x8000);
}
void addImport(IRReg reg, LocationSymbolName importName)
{
m_imports.emplace_back(reg, importName);
}
void addExport(IRReg reg, LocationSymbolName importName)
{
m_exports.emplace_back(reg, importName);
}
void setWorkbuffer(void* buffer)
{
if (buffer != nullptr)
{
if (m_workbuffer)
assert_dbg();
}
m_workbuffer = buffer;
}
void* getWorkbuffer()
{
return m_workbuffer;
}
DataType getRegType(IRReg reg)
{
uint32 index = (uint32)reg;
if (index >= 0x8000)
{
index -= 0x8000;
cemu_assert_debug(index < m_consts.size());
return m_consts[index].type;
}
return m_regs[index].type;
}
IRRegConstDef* getConstant(IRReg reg)
{
uint32 index = (uint32)reg;
if (index < 0x8000)
return nullptr;
index -= 0x8000;
if (index >= m_consts.size())
return nullptr;
return m_consts.data() + index;
}
std::optional<sint32> getConstantS32(IRReg reg)
{
uint32 index = (uint32)reg;
if (index < 0x8000)
return std::nullopt;
index -= 0x8000;
if (index >= m_consts.size())
return std::nullopt;
if (m_consts[index].type == DataType::U32)
return (sint32)m_consts[index].value_u32;
else if (m_consts[index].type == DataType::POINTER)
assert_dbg();
else if (m_consts[index].type == DataType::U64)
{
if (m_consts[index].value_u64 >= 0x80000000ull)
assert_dbg();
return (sint32)m_consts[index].value_u64;
}
else
assert_dbg();
return std::nullopt;
}
std::optional<uint64> getConstantU64(IRReg reg)
{
auto constReg = getConstant(reg);
if (!constReg)
return std::nullopt;
if (constReg->type == DataType::U64)
return constReg->value_u64;
else
assert_dbg();
return std::nullopt;
}
};
struct ZpIRFunction
{
std::vector<ZpIRBasicBlock*> m_basicBlocks;
std::vector<ZpIRBasicBlock*> m_entryBlocks;
std::vector<ZpIRBasicBlock*> m_exitBlocks;
struct
{
bool registersAllocated{false};
}state;
};
// helpers for shader code
namespace ShaderSubset
{
class ShaderImportLocation
{
enum LOC_TYPE : uint8
{
LOC_TYPE_UNIFORM_REGISTER = 1,
LOC_TYPE_UNIFORM_BUFFER = 2,
LOC_TYPE_ATTRIBUTE = 3,
};
public:
ShaderImportLocation() {}
ShaderImportLocation(LocationSymbolName loc)
{
uint64 v = (uint64)loc;
m_locType = (LOC_TYPE)(v >> 56);
m_indexA = (uint16)(v >> 0);
m_indexB = (uint16)(v >> 16);
}
ShaderImportLocation& SetUniformRegister(uint16 index)
{
m_locType = LOC_TYPE_UNIFORM_REGISTER;
m_indexA = index;
m_indexB = 0;
return *this;
}
ShaderImportLocation& SetVertexAttribute(uint16 attributeIndex, uint16 channelIndex)
{
m_locType = LOC_TYPE_ATTRIBUTE;
m_indexA = attributeIndex;
m_indexB = channelIndex;
return *this;
}
bool IsUniformRegister() const
{
return m_locType == LOC_TYPE_UNIFORM_REGISTER;
}
bool IsVertexAttribute() const
{
return m_locType == LOC_TYPE_ATTRIBUTE;
}
void GetUniformRegister(uint16& index)
{
index = m_indexA;
}
void GetVertexAttribute(uint16& attributeIndex, uint16& channelIndex) const
{
attributeIndex = m_indexA;
channelIndex = m_indexB;
}
operator LocationSymbolName() const
{
uint64 v = 0;
v |= ((uint64)m_locType << 56);
v |= ((uint64)m_indexA << 0);
v |= ((uint64)m_indexB << 16);
return (LocationSymbolName)v;
}
std::string GetDebugName()
{
const char elementTable[] = { 'x' , 'y', 'z', 'w' };
if (m_locType == LOC_TYPE_UNIFORM_REGISTER)
return fmt::format("UniformReg[{0}].{1}", m_indexA >> 2, elementTable[m_indexA & 3]);
if (m_locType == LOC_TYPE_ATTRIBUTE)
return fmt::format("VertexAttribute[{0}].{1}", m_indexA, elementTable[m_indexB]);
return "Unknown";
}
private:
LOC_TYPE m_locType{};
uint16 m_indexA{};
uint16 m_indexB{};
//LocationSymbolName m_symbolName{};
static_assert(sizeof(LocationSymbolName) == 8);
};
class ShaderExportLocation
{
enum LOC_TYPE : uint8
{
LOC_TYPE_POSITION = 1,
LOC_TYPE_OUTPUT = 2,
};
public:
ShaderExportLocation() {}
ShaderExportLocation(LocationSymbolName loc)
{
uint64 v = (uint64)loc;
m_locType = (LOC_TYPE)(v >> 56);
m_indexA = (uint16)(v >> 0);
m_indexB = (uint16)(v >> 16);
}
ShaderExportLocation& SetPosition()
{
m_locType = LOC_TYPE_POSITION;
m_indexA = 0;
m_indexB = 0;
return *this;
}
ShaderExportLocation& SetOutputAttribute(uint16 attributeIndex) // todo - channel mask?
{
m_locType = LOC_TYPE_OUTPUT;
m_indexA = attributeIndex;
m_indexB = 0;
return *this;
}
bool IsPosition() const
{
return m_locType == LOC_TYPE_POSITION;
}
bool IsOutputAttribute() const
{
return m_locType == LOC_TYPE_OUTPUT;
}
void GetOutputAttribute(uint16& attributeIndex) const
{
attributeIndex = m_indexA;
}
operator LocationSymbolName() const
{
uint64 v = 0;
v |= ((uint64)m_locType << 56);
v |= ((uint64)m_indexA << 0);
v |= ((uint64)m_indexB << 16);
return (LocationSymbolName)v;
}
std::string GetDebugName()
{
const char elementTable[] = { 'x' , 'y', 'z', 'w' };
//if (m_locType == LOC_TYPE_UNIFORM_REGISTER)
// return fmt::format("UniformReg[{0}].{1}", m_indexA >> 2, elementTable[m_indexA & 3]);
//if (m_locType == LOC_TYPE_ATTRIBUTE)
// return fmt::format("VertexAttribute[{0}].{1}", m_indexA, elementTable[m_indexB]);
return "Unknown";
}
private:
LOC_TYPE m_locType{};
uint16 m_indexA{};
uint16 m_indexB{};
static_assert(sizeof(LocationSymbolName) == 8);
};
};
}

View file

@ -0,0 +1,76 @@
#pragma once
#include "util/Zir/Core/IR.h"
namespace ZpIR
{
struct ZpIRCmdUtil
{
template<typename TFuncRegRead, typename TFuncRegWrite>
static void forEachAccessedReg(ZpIRBasicBlock& block, IR::__InsBase* instruction, TFuncRegRead funcRegRead, TFuncRegWrite funcRegWrite)
{
if (auto ins = IR::InsRR::getIfForm(instruction))
{
switch (ins->opcode)
{
case ZpIR::IR::OpCode::MOV:
case ZpIR::IR::OpCode::BITCAST:
case ZpIR::IR::OpCode::SWAP_ENDIAN:
case ZpIR::IR::OpCode::CONVERT_FLOAT_TO_INT:
case ZpIR::IR::OpCode::CONVERT_INT_TO_FLOAT:
if (isRegVar(ins->rB))
funcRegRead(ins->rB);
cemu_assert_debug(isRegVar(ins->rA));
funcRegWrite(ins->rA);
break;
default:
cemu_assert_unimplemented();
}
}
else if (auto ins = IR::InsRRR::getIfForm(instruction))
{
switch (ins->opcode)
{
case ZpIR::IR::OpCode::ADD:
case ZpIR::IR::OpCode::SUB:
case ZpIR::IR::OpCode::MUL:
case ZpIR::IR::OpCode::DIV:
if (isRegVar(ins->rB))
funcRegRead(ins->rB);
if (isRegVar(ins->rC))
funcRegRead(ins->rC);
cemu_assert_debug(isRegVar(ins->rA));
funcRegWrite(ins->rA);
break;
default:
cemu_assert_unimplemented();
}
}
else if (auto ins = IR::InsIMPORT::getIfForm(instruction))
{
for (uint16 i = 0; i < ins->count; i++)
{
cemu_assert_debug(isRegVar(ins->regArray[i]));
funcRegWrite(ins->regArray[i]);
}
}
else if (auto ins = IR::InsEXPORT::getIfForm(instruction))
{
for (uint16 i = 0; i < ins->count; i++)
{
if (isRegVar(ins->regArray[i]))
funcRegRead(ins->regArray[i]);
}
}
else
{
cemu_assert_unimplemented();
}
}
static void replaceRegisters(IR::__InsBase& ins, std::unordered_map<IRReg, IRReg>& translationTable)
{
cemu_assert_unimplemented();
}
};
}

View file

@ -0,0 +1,250 @@
#pragma once
#include "util/Zir/Core/IR.h"
namespace ZpIR
{
// helper class for building a single basic block
class BasicBlockBuilder
{
public:
BasicBlockBuilder(ZpIRBasicBlock* basicBlock) : m_basicBlock(basicBlock) {};
IRReg createReg(DataType type, uint8 elementCount = 1)
{
return m_basicBlock->createRegister(type, elementCount);
}
IRReg createReg(IRReg& r, DataType type, uint8 elementCount = 1)
{
r = m_basicBlock->createRegister(type, elementCount);
return r;
}
// append a single instruction at the end
void append(IR::__InsBase* ins)
{
assert_dbg();
}
void emit_EXPORT(LocationSymbolName exportSymbolName, IRReg r0)
{
m_basicBlock->appendInstruction(new IR::InsEXPORT(exportSymbolName, r0));
}
void emit_EXPORT(LocationSymbolName exportSymbolName, std::span<IRReg> regs)
{
m_basicBlock->appendInstruction(new IR::InsEXPORT(exportSymbolName, regs));
}
void emit_IMPORT(LocationSymbolName importSymbolName, IRReg r0)
{
m_basicBlock->appendInstruction(new IR::InsIMPORT(importSymbolName, r0));
}
// result is rA, operand is rB
// for some opcodes both can be operands
void emit_RR(IR::OpCode opcode, IRReg rA, IRReg rB)
{
m_basicBlock->appendInstruction(new IR::InsRR(opcode, rA, rB));
}
IRReg emit_RR(IR::OpCode opcode, DataType resultType, IRReg rB)
{
IRReg resultReg = m_basicBlock->createRegister(resultType);
emit_RR(opcode, resultReg, rB);
return resultReg;
}
// result is rA, operands are rB and rC
// for some opcodes all three can be operands
void emit_RRR(IR::OpCode opcode, IRReg rA, IRReg rB, IRReg rC)
{
m_basicBlock->appendInstruction(new IR::InsRRR(opcode, rA, rB, rC));
}
IRReg emit_RRR(IR::OpCode opcode, DataType resultType, IRReg rB, IRReg rC)
{
IRReg resultReg = m_basicBlock->createRegister(resultType);
m_basicBlock->appendInstruction(new IR::InsRRR(opcode, resultReg, rB, rC));
return resultReg;
}
void emit(IR::__InsBase* ins)
{
m_basicBlock->appendInstruction(ins);
}
// constant var creation
IRReg createConstU32(uint32 v)
{
return m_basicBlock->createConstantU32(v);
}
IRReg createTypedConst(uint32 v, DataType type)
{
return m_basicBlock->createTypedConstant(v, type);
}
IRReg createConstS32(uint32 v)
{
return m_basicBlock->createConstantS32(v);
}
IRReg createConstF32(f32 v)
{
return m_basicBlock->createConstantF32(v);
}
IRReg createConstPointer(void* v)
{
return m_basicBlock->createConstantPointer(v);
}
// use templates to compact other types?
DataType getRegType(IRReg reg)
{
return m_basicBlock->getRegType(reg);
}
void addImport(IRReg reg, LocationSymbolName importSymbolName)
{
m_basicBlock->addImport(reg, importSymbolName);
}
private:
ZpIRBasicBlock* m_basicBlock;
};
// helper class for constructing multiple basic blocks with control flow
class ZpIRBuilder
{
public:
typedef uint64 BlockBranchTarget;
static const inline BlockBranchTarget INVALID_BLOCK_NAME = 0xFFFFFFFFFFFFFFFFull;
struct BasicBlockWorkbuffer
{
BlockBranchTarget name{ INVALID_BLOCK_NAME };
BlockBranchTarget targetBranchNotTaken{ INVALID_BLOCK_NAME };
BlockBranchTarget targetBranchTaken{ INVALID_BLOCK_NAME };
};
void beginBlock(BlockBranchTarget name)
{
m_currentBasicBlock = new ZpIRBasicBlock();
BasicBlockWorkbuffer* wb = new BasicBlockWorkbuffer();
m_currentBasicBlock->setWorkbuffer(wb);
wb->name = name;
m_blocks.emplace_back(m_currentBasicBlock);
m_blocksByName.emplace(name, m_currentBasicBlock);
}
ZpIRBasicBlock* endBlock()
{
ZpIRBasicBlock* block = m_currentBasicBlock;
m_currentBasicBlock = nullptr;
BasicBlockWorkbuffer* wb = (BasicBlockWorkbuffer*)block->getWorkbuffer();
wb->targetBranchNotTaken = m_targetBranchNotTaken;
wb->targetBranchTaken = m_targetBranchTaken;
m_targetBranchNotTaken = INVALID_BLOCK_NAME;
m_targetBranchTaken = INVALID_BLOCK_NAME;
return block;
}
ZpIRFunction* finish()
{
if (m_currentBasicBlock)
assert_dbg();
// create function
ZpIRFunction* func = new ZpIRFunction();
// link all blocks
// and also collect a list of entry and exit nodes
for (auto& itr : m_blocks)
{
BasicBlockWorkbuffer* wb = (BasicBlockWorkbuffer*)itr->getWorkbuffer();
if (wb->targetBranchNotTaken != INVALID_BLOCK_NAME)
{
auto target = m_blocksByName.find(wb->targetBranchNotTaken);
if (target == m_blocksByName.end())
{
assert_dbg();
}
itr->m_branchNotTaken = target->second;
}
if (wb->targetBranchTaken != INVALID_BLOCK_NAME)
{
auto target = m_blocksByName.find(wb->targetBranchTaken);
if (target == m_blocksByName.end())
{
assert_dbg();
}
itr->m_branchTaken = target->second;
}
delete wb;
itr->setWorkbuffer(nullptr);
func->m_basicBlocks.emplace_back(itr);
// todo - track entry and exit blocks (set block flags for entry/exit during block gen)
}
return func;
}
IRReg createBlockRegister(DataType type, uint8 elementCount = 1)
{
return m_currentBasicBlock->createRegister(type, elementCount);
}
IRReg createConstU32(uint32 v)
{
return m_currentBasicBlock->createConstantU32(v);
}
IRReg createConstPointer(void* v)
{
return m_currentBasicBlock->createConstantPointer(v);
}
IRReg createConstPointerV(size_t v)
{
return m_currentBasicBlock->createConstantPointer((void*)v);
}
void addImport(IRReg reg, LocationSymbolName importName)
{
m_currentBasicBlock->addImport(reg, importName);
}
void addExport(IRReg reg, LocationSymbolName importName)
{
m_currentBasicBlock->addExport(reg, importName);
}
void setBlockTargetBranchTaken(BlockBranchTarget target)
{
if (m_targetBranchTaken != INVALID_BLOCK_NAME)
assert_dbg();
m_targetBranchTaken = target;
}
void setBlockTargetBranchNotTaken(BlockBranchTarget target)
{
if (m_targetBranchNotTaken != INVALID_BLOCK_NAME)
assert_dbg();
m_targetBranchNotTaken = target;
}
private:
ZpIRBasicBlock* m_currentBasicBlock{};
std::vector<ZpIRBasicBlock*> m_blocks;
std::unordered_map<BlockBranchTarget, ZpIRBasicBlock*> m_blocksByName;
BlockBranchTarget m_targetBranchNotTaken{ INVALID_BLOCK_NAME };
BlockBranchTarget m_targetBranchTaken{ INVALID_BLOCK_NAME };
};
}

View file

@ -0,0 +1,39 @@
#pragma once
#include "util/Zir/Core/IR.h"
namespace ZpIR
{
class DebugPrinter
{
public:
void debugPrint(ZpIRFunction* irFunction);
void setShowPhysicalRegisters(bool showPhys)
{
m_showPhysicalRegisters = showPhys;
}
void setVirtualRegisterNameSource(std::string(*getRegisterNameCustom)(ZpIRBasicBlock* block, IRReg r))
{
m_getRegisterNameCustom = getRegisterNameCustom;
}
void setPhysicalRegisterNameSource(std::string(*getRegisterNameCustom)(ZpIRBasicBlock* block, ZpIRPhysicalReg r))
{
m_getPhysicalRegisterNameCustom = getRegisterNameCustom;
}
private:
std::string getRegisterName(ZpIRBasicBlock* block, IRReg r);
std::string getInstructionHRF(ZpIRBasicBlock* block, IR::__InsBase* cmd);
void debugPrintBlock(ZpIRBasicBlock* block);
std::string(*m_getRegisterNameCustom)(ZpIRBasicBlock* block, IRReg r) { nullptr };
std::string(*m_getPhysicalRegisterNameCustom)(ZpIRBasicBlock* block, ZpIRPhysicalReg r) { nullptr };
bool m_showPhysicalRegisters{}; // show global/physical register mapping instead of local IRReg indices
};
}

View file

@ -0,0 +1,284 @@
#pragma once
#include "util/Zir/Core/IR.h"
namespace ZirPass
{
class ZpIRPass
{
public:
ZpIRPass(ZpIR::ZpIRFunction* irFunction) : m_irFunction(irFunction) { };
virtual void applyPass() = 0;
protected:
ZpIR::ZpIRFunction* m_irFunction;
};
struct RALivenessRange_t
{
RALivenessRange_t(struct RABlock_t* block, ZpIR::IRReg irReg, sint32 start, sint32 end, ZpIR::DataType irDataType);
~RALivenessRange_t();
enum class SOURCE
{
NONE,
INSTRUCTION, // instruction initializes value
PREVIOUS_BLOCK, // imported from previous block(s)
PREVIOUS_RANGE, // from previous range within same block
};
enum class LOCATION
{
UNASSIGNED,
PHYSICAL_REGISTER,
SPILLED,
};
void setStart(sint32 startIndex);
void setEnd(sint32 endIndex);
void addSourceFromPreviousBlock(RALivenessRange_t* source)
{
if (m_source != SOURCE::NONE && m_source != SOURCE::PREVIOUS_BLOCK)
assert_dbg();
m_source = SOURCE::PREVIOUS_BLOCK;
m_sourceRanges.emplace_back(source);
source->m_destinationRanges.emplace_back(this);
}
void addSourceFromSameBlock(RALivenessRange_t* source)
{
if (m_source != SOURCE::NONE)
assert_dbg();
m_source = SOURCE::PREVIOUS_RANGE;
m_sourceRanges.emplace_back(source);
source->m_destinationRanges.emplace_back(this);
}
bool isOverlapping(sint32 start, sint32 end) const
{
return m_startIndex < end && m_endIndex >= start;
}
bool isOverlapping(RALivenessRange_t* range) const
{
return isOverlapping(range->m_startIndex, range->m_endIndex);
}
void assignPhysicalRegister(ZpIR::ZpIRPhysicalReg physReg);
RABlock_t* m_block;
ZpIR::IRReg m_irReg;
ZpIR::DataType m_irDataType;
sint32 m_startIndex{ -1 };
sint32 m_endIndex{ -1 }; // inclusive
//std::vector<bool> m_reservedPhysRegisters; // unavailable physical registers
std::vector<RALivenessRange_t*> m_overlappingRanges;
// state / assigned location
LOCATION m_location{ LOCATION::UNASSIGNED };
sint32 m_physicalRegister;
// source
SOURCE m_source{ SOURCE::NONE };
std::vector<RALivenessRange_t*> m_sourceRanges;
// destination
//RALivenessRange_t* m_destinationRange{ nullptr };
std::vector<RALivenessRange_t*> m_destinationRanges;
};
struct RABlock_t
{
std::unordered_map<ZpIR::IRReg, RALivenessRange_t*> livenessRanges;
struct Compare
{
bool operator()(const RALivenessRange_t* lhs, const RALivenessRange_t* rhs) const /* noexcept */
{
// order for unassignedRanges
// aka order in which ranges should be assigned physical registers
return lhs->m_startIndex < rhs->m_startIndex;
}
};
public:
std::multiset<RALivenessRange_t*, Compare> unassignedRanges;
};
class RARegular : public ZpIRPass
{
public:
RARegular(ZpIR::ZpIRFunction* irFunction) : ZpIRPass(irFunction) {};
void applyPass()
{
prepareRABlocks();
generateLivenessRanges();
assignPhysicalRegisters();
assert_dbg(); // todo -> rewrite doesnt need to be separate any longer since we store a separate physical register index now (in IRReg)
rewrite();
m_irFunction->state.registersAllocated = true;
}
private:
void prepareRABlocks();
void generateLivenessRanges();
void assignPhysicalRegisters();
void rewrite();
void assignPhysicalRegistersForBlock(RABlock_t& raBlock);
void rewriteBlock(ZpIR::ZpIRBasicBlock& basicBlock, RABlock_t& raBlock);
std::span<sint32> extGetSuitablePhysicalRegisters(ZpIR::DataType dataType)
{
const sint32 OFFSET_U64 = 0;
const sint32 OFFSET_U32 = 16;
static sint32 _regCandidatesU64[] = { OFFSET_U64 + 0, OFFSET_U64 + 1, OFFSET_U64 + 2, OFFSET_U64 + 3, OFFSET_U64 + 4, OFFSET_U64 + 5, OFFSET_U64 + 6, OFFSET_U64 + 7, OFFSET_U64 + 8, OFFSET_U64 + 9, OFFSET_U64 + 10, OFFSET_U64 + 11, OFFSET_U64 + 12, OFFSET_U64 + 13, OFFSET_U64 + 14, OFFSET_U64 + 15 };
static sint32 _regCandidatesU32[] = { OFFSET_U32 + 0, OFFSET_U32 + 1, OFFSET_U32 + 2, OFFSET_U32 + 3, OFFSET_U32 + 4, OFFSET_U32 + 5, OFFSET_U32 + 6, OFFSET_U32 + 7, OFFSET_U32 + 8, OFFSET_U32 + 9, OFFSET_U32 + 10, OFFSET_U32 + 11, OFFSET_U32 + 12, OFFSET_U32 + 13, OFFSET_U32 + 14, OFFSET_U32 + 15 };
if (dataType == ZpIR::DataType::POINTER || dataType == ZpIR::DataType::U64)
return _regCandidatesU64;
if (dataType == ZpIR::DataType::U32)
return _regCandidatesU32;
//if (dataType != ZpIRDataType::POINTER)
//{
//}
assert_dbg();
return _regCandidatesU32;
}
void extFilterPhysicalRegisters(std::vector<sint32>& physRegCandidates, ZpIR::ZpIRPhysicalReg registerToFilter)
{
// todo - this is quite complex on x86 where registers overlap (e.g. RAX and EAX/AL/AH/AX)
// so registerToFilter can translate to multiple filtered values
// but for now we use a simplified placeholder implementation
if (registerToFilter >= 0 && registerToFilter < 16)
{
physRegCandidates.erase(std::remove(physRegCandidates.begin(), physRegCandidates.end(), (sint32)registerToFilter), physRegCandidates.end());
physRegCandidates.erase(std::remove(physRegCandidates.begin(), physRegCandidates.end(), (sint32)registerToFilter + 16), physRegCandidates.end());
}
else if (registerToFilter >= 16 && registerToFilter < 32)
{
physRegCandidates.erase(std::remove(physRegCandidates.begin(), physRegCandidates.end(), (sint32)registerToFilter), physRegCandidates.end());
physRegCandidates.erase(std::remove(physRegCandidates.begin(), physRegCandidates.end(), (sint32)registerToFilter - 16), physRegCandidates.end());
}
else
assert_dbg();
}
ZpIR::ZpIRPhysicalReg extPickPreferredRegister(std::vector<sint32>& physRegCandidates)
{
if (physRegCandidates.empty())
assert_dbg();
return physRegCandidates[0];
}
void debugPrint(RABlock_t& raBlock)
{
std::multiset<RALivenessRange_t*, RABlock_t::Compare> sortedRanges;
for (auto& itr : raBlock.livenessRanges)
sortedRanges.emplace(itr.second);
for (auto& itr : sortedRanges)
{
printf("%04x - %04x reg %04d: ", (uint32)(uint16)itr->m_startIndex, (uint32)(uint16)itr->m_endIndex, (uint32)itr->m_irReg);
if (itr->m_location == RALivenessRange_t::LOCATION::PHYSICAL_REGISTER)
printf("PHYS_REG %d", (int)itr->m_physicalRegister);
else if (itr->m_location == RALivenessRange_t::LOCATION::UNASSIGNED)
printf("UNASSIGNED");
else
assert_dbg();
printf("\n");
}
}
// remove all physical registers from physRegCandidates which are already reserved by any of the overlapping ranges
void filterCandidates(std::vector<sint32>& physRegCandidates, RALivenessRange_t* range)
{
for (auto& itr : range->m_overlappingRanges)
{
if (itr->m_location != RALivenessRange_t::LOCATION::PHYSICAL_REGISTER)
continue;
extFilterPhysicalRegisters(physRegCandidates, itr->m_physicalRegister);
}
}
std::vector<RABlock_t> m_raBlockArray;
};
class RegisterAllocatorForGLSL : public ZpIRPass
{
enum class PHYS_REG_TYPE : uint8
{
U32 = 0,
S32 = 1,
F32 = 2
};
public:
RegisterAllocatorForGLSL(ZpIR::ZpIRFunction* irFunction) : ZpIRPass(irFunction) {};
void applyPass()
{
assignPhysicalRegisters();
m_irFunction->state.registersAllocated = true;
}
static bool IsPhysRegTypeU32(ZpIR::ZpIRPhysicalReg physReg)
{
return ((uint32)physReg >> 30) == (uint32)PHYS_REG_TYPE::U32;
}
static bool IsPhysRegTypeS32(ZpIR::ZpIRPhysicalReg physReg)
{
return ((uint32)physReg >> 30) == (uint32)PHYS_REG_TYPE::S32;
}
static bool IsPhysRegTypeF32(ZpIR::ZpIRPhysicalReg physReg)
{
return ((uint32)physReg >> 30) == (uint32)PHYS_REG_TYPE::F32;
}
static uint32 GetPhysRegIndex(ZpIR::ZpIRPhysicalReg physReg)
{
return (uint32)physReg & 0x3FFFFFFF;
}
static std::string DebugPrintHelper_getPhysRegisterName(ZpIR::ZpIRBasicBlock* block, ZpIR::ZpIRPhysicalReg r);
private:
void assignPhysicalRegisters();
void assignPhysicalRegistersForBlock(ZpIR::ZpIRBasicBlock* basicBlock);
uint32 m_physicalRegisterCounterU32{};
uint32 m_physicalRegisterCounterS32{};
uint32 m_physicalRegisterCounterF32{};
ZpIR::ZpIRPhysicalReg MakePhysReg(PHYS_REG_TYPE regType, uint32 index)
{
uint32 v = (uint32)regType << 30;
v |= index;
return (ZpIR::ZpIRPhysicalReg)v;
}
};
};

View file

@ -0,0 +1,4 @@
#pragma once
#include "util/Zir/Core/IR.h"
#include "util/Zir/Core/ZirUtility.h"

View file

@ -0,0 +1,494 @@
#include "util/Zir/Core/IR.h"
#include "util/Zir/Core/ZirUtility.h"
#include "util/Zir/Core/ZpIRPasses.h"
#include "util/Zir/EmitterGLSL/ZpIREmitGLSL.h"
#include "util/Zir/Core/ZpIRScheduler.h"
// string buffer helper class which keeps buffer space at the front and end, allow fast prepend and append
class DualStringBuffer
{
static constexpr size_t N = 1024;
public:
DualStringBuffer() : m_offsetBegin(N / 2), m_offsetEnd(N / 2) { }
~DualStringBuffer()
{
}
static_assert(sizeof(char) == sizeof(uint8));
void reset()
{
m_offsetBegin = N / 2;
m_offsetEnd = m_offsetBegin;
}
void append(std::string_view strView)
{
cemu_assert_debug((m_offsetEnd + strView.size()) <= N);
std::memcpy(m_strBuffer + m_offsetEnd, strView.data(), strView.size());
m_offsetEnd += (uint32)strView.size();
}
template <typename... Args>
void appendFmt(const char* format_str, Args... args)
{
char* buf = (char*)(m_strBuffer + m_offsetEnd);
char* r = fmt::format_to(buf, format_str, std::forward<Args>(args)...);
cemu_assert_debug(r <= (char*)(m_strBuffer + N));
m_offsetEnd += (uint32)(r - buf);
}
void prepend(std::string_view strView)
{
assert_dbg();
}
size_t size() const
{
return m_offsetEnd - m_offsetBegin;
}
operator std::string_view()
{
return std::basic_string_view<char>((char*)(m_strBuffer + m_offsetBegin), m_offsetEnd - m_offsetBegin);
}
private:
//void resizeBuffer(uint32 spaceRequiredFront, uint32 spaceRequiredBack)
//{
// uint32 newTotalSize = spaceRequiredFront + size() + spaceRequiredBack;
// // round to next multiple of 32 and add extra buffer
// newTotalSize = (newTotalSize + 31) & ~31;
// newTotalSize += (newTotalSize / 4);
// //
//}
//uint8* m_bufferPtr{ nullptr };
//size_t m_bufferSize{ 0 };
//std::vector<uint8> m_buffer;
uint32 m_offsetBegin;
uint32 m_offsetEnd;
uint8 m_strBuffer[N];
};
namespace ZirEmitter
{
static const char g_idx_to_element[] = { 'x' , 'y', 'z', 'w'};
void GLSL::Emit(ZpIR::ZpIRFunction* irFunction, StringBuf* output)
{
m_irFunction = irFunction;
m_glslSource = output;
cemu_assert_debug(m_irFunction->m_entryBlocks.size() == 1);
cemu_assert_debug(m_irFunction->m_basicBlocks.size() == 1); // other sizes are todo
m_glslSource->add("void main()\r\n{\r\n");
GenerateBasicBlockCode(*m_irFunction->m_entryBlocks[0]);
m_glslSource->add("}\r\n");
}
void GLSL::GenerateBasicBlockCode(ZpIR::ZpIRBasicBlock& basicBlock)
{
// init context
#ifndef PUBLIC_RELEASE
for (auto& itr : m_blockContext.regInlinedExpression)
{
cemu_assert_debug(itr == nullptr); // leaked buffer
}
#endif
m_blockContext.regReadTracking.clear();
m_blockContext.regReadTracking.resize(basicBlock.m_regs.size());
m_blockContext.regInlinedExpression.resize(basicBlock.m_regs.size());
m_blockContext.currentBasicBlock = &basicBlock;
// we first do an analysis pass in which we determine the read count for each register
// every register which is only consumed once can be directly inlined instead of storing and referencing it via a variable
ZpIR::IR::__InsBase* instruction = basicBlock.m_instructionFirst;
while (instruction)
{
ZpIR::ZpIRCmdUtil::forEachAccessedReg(basicBlock, instruction,
[this](ZpIR::IRReg readReg)
{
if (readReg >= 0x8000)
assert_dbg();
// read access
auto& entry = m_blockContext.regReadTracking.at(readReg);
if (entry < 255)
entry++;
},
[](ZpIR::IRReg writtenReg)
{
});
instruction = instruction->next;
}
// emit GLSL for this block
instruction = basicBlock.m_instructionFirst;
while (instruction)
{
if (auto ins = ZpIR::IR::InsRR::getIfForm(instruction))
HandleInstruction(ins);
else if (auto ins = ZpIR::IR::InsRRR::getIfForm(instruction))
HandleInstruction(ins);
else if (auto ins = ZpIR::IR::InsIMPORT::getIfForm(instruction))
HandleInstruction(ins);
else if (auto ins = ZpIR::IR::InsEXPORT::getIfForm(instruction))
HandleInstruction(ins);
else
{
assert_dbg();
}
instruction = instruction->next;
}
}
void GLSL::HandleInstruction(ZpIR::IR::InsRR* ins)
{
DualStringBuffer* expressionBuf = GetStringBuffer();
bool forceNoInline = false;
switch (ins->opcode)
{
case ZpIR::IR::OpCode::BITCAST:
{
auto srcType = m_blockContext.currentBasicBlock->getRegType(ins->rB);
auto dstType = m_blockContext.currentBasicBlock->getRegType(ins->rA);
if (srcType == ZpIR::DataType::U32 && dstType == ZpIR::DataType::F32)
expressionBuf->append("uintBitsToFloat(");
else if (srcType == ZpIR::DataType::S32 && dstType == ZpIR::DataType::F32)
expressionBuf->append("intBitsToFloat(");
else if (srcType == ZpIR::DataType::F32 && dstType == ZpIR::DataType::U32)
expressionBuf->append("floatBitsToUint(");
else if (srcType == ZpIR::DataType::F32 && dstType == ZpIR::DataType::S32)
expressionBuf->append("floatBitsToInt(");
else
assert_dbg();
appendSourceString(expressionBuf, ins->rB);
expressionBuf->append(")");
break;
}
case ZpIR::IR::OpCode::SWAP_ENDIAN:
{
auto srcType = m_blockContext.currentBasicBlock->getRegType(ins->rB);
auto dstType = m_blockContext.currentBasicBlock->getRegType(ins->rA);
cemu_assert_debug(srcType == dstType);
// todo - should we store expressionBuf in a temporary variable? We reference it multiple times and reducing complexity would be good
if (srcType == ZpIR::DataType::U32)
{
expressionBuf->append("(((");
appendSourceString(expressionBuf, ins->rB);
expressionBuf->append(")>>24)");
expressionBuf->append("|");
expressionBuf->append("(((");
appendSourceString(expressionBuf, ins->rB);
expressionBuf->append(")>>8)&0xFF00)");
expressionBuf->append("|");
expressionBuf->append("(((");
appendSourceString(expressionBuf, ins->rB);
expressionBuf->append(")<<8)&0xFF0000)");
expressionBuf->append("|");
expressionBuf->append("((");
appendSourceString(expressionBuf, ins->rB);
expressionBuf->append(")<<24))");
// (v>>24)|((v>>8)&0xFF00)|((v<<8)&0xFF0000)|((v<<24))
}
else
assert_dbg();
forceNoInline = true; // avoid inlining endian-swapping, since it would add too much complexity to expressions
break;
}
case ZpIR::IR::OpCode::MOV:
appendSourceString(expressionBuf, ins->rB);
break;
case ZpIR::IR::OpCode::CONVERT_FLOAT_TO_INT:
{
auto srcType = m_blockContext.currentBasicBlock->getRegType(ins->rB);
auto dstType = m_blockContext.currentBasicBlock->getRegType(ins->rA);
cemu_assert_debug(srcType == ZpIR::DataType::F32);
cemu_assert_debug(dstType == ZpIR::DataType::S32 || dstType == ZpIR::DataType::U32);
if(dstType == ZpIR::DataType::U32)
expressionBuf->append("uint(");
else
expressionBuf->append("int(");
appendSourceString(expressionBuf, ins->rB);
expressionBuf->append(")");
break;
}
case ZpIR::IR::OpCode::CONVERT_INT_TO_FLOAT:
{
auto srcType = m_blockContext.currentBasicBlock->getRegType(ins->rB);
auto dstType = m_blockContext.currentBasicBlock->getRegType(ins->rA);
cemu_assert_debug(srcType == ZpIR::DataType::S32 || srcType == ZpIR::DataType::U32);
cemu_assert_debug(dstType == ZpIR::DataType::F32);
expressionBuf->append("float(");
appendSourceString(expressionBuf, ins->rB);
expressionBuf->append(")");
break;
}
default:
assert_dbg();
}
AssignResult(ins->rA, expressionBuf, forceNoInline);
}
void GLSL::HandleInstruction(ZpIR::IR::InsRRR* ins)
{
DualStringBuffer* expressionBuf = GetStringBuffer();
switch (ins->opcode)
{
case ZpIR::IR::OpCode::ADD:
{
appendSourceString(expressionBuf, ins->rB);
expressionBuf->append(" + ");
appendSourceString(expressionBuf, ins->rC);
break;
}
case ZpIR::IR::OpCode::MUL:
{
appendSourceString(expressionBuf, ins->rB);
expressionBuf->append(" * ");
appendSourceString(expressionBuf, ins->rC);
break;
}
case ZpIR::IR::OpCode::DIV:
{
appendSourceString(expressionBuf, ins->rB);
expressionBuf->append(" / ");
appendSourceString(expressionBuf, ins->rC);
break;
}
default:
assert_dbg();
}
AssignResult(ins->rA, expressionBuf);
}
void GLSL::HandleInstruction(ZpIR::IR::InsIMPORT* ins)
{
ZpIR::ShaderSubset::ShaderImportLocation loc(ins->importSymbol);
DualStringBuffer* buf = GetStringBuffer();
if (loc.IsUniformRegister())
{
uint16 index;
loc.GetUniformRegister(index);
// todo - this is complex. Solve via callback
buf->appendFmt("uf_remappedVS[{}].{}", index/4, g_idx_to_element[index&3]);
AssignResult(ins->regArray[0], buf);
}
else if (loc.IsVertexAttribute())
{
uint16 attributeIndex;
uint16 channelIndex;
loc.GetVertexAttribute(attributeIndex, channelIndex);
cemu_assert_debug(ins->count == 1);
cemu_assert_debug(ZpIR::isRegVar(ins->regArray[0]));
cemu_assert_debug(channelIndex < 4);
cemu_assert_debug(m_blockContext.currentBasicBlock->getRegType(ins->regArray[0]) == ZpIR::DataType::U32);
buf->appendFmt("attrDataSem{}.{}", attributeIndex, g_idx_to_element[channelIndex]);
AssignResult(ins->regArray[0], buf);
}
else
{
cemu_assert_debug(false);
}
}
void GLSL::HandleInstruction(ZpIR::IR::InsEXPORT* ins)
{
ZpIR::ShaderSubset::ShaderExportLocation loc(ins->exportSymbol);
DualStringBuffer* buf = GetStringBuffer();
if (loc.IsPosition())
{
// todo - support for output mask (e.g. xyzw, x_zw) ?
buf->append("SET_POSITION(vec4(");
cemu_assert_debug(ins->count == 4);
for (uint32 i = 0; i < ins->count; i++)
{
if(i > 0)
buf->append(", ");
appendSourceString(buf, ins->regArray[i]);
}
m_glslSource->add(*buf);
m_glslSource->add("));\r\n");
}
else if (loc.IsOutputAttribute())
{
uint16 attributeIndex;
loc.GetOutputAttribute(attributeIndex);
buf->appendFmt("passParameterSem{} = vec4(", attributeIndex);
cemu_assert_debug(ins->count == 4);
for (uint32 i = 0; i < ins->count; i++)
{
if (i > 0)
buf->append(", ");
appendSourceString(buf, ins->regArray[i]);
}
m_glslSource->add(*buf);
m_glslSource->add(");\r\n");
}
else
{
assert_dbg();
}
ReleaseStringBuffer(buf);
}
void GLSL::AssignResult(ZpIR::IRReg irReg, DualStringBuffer* buf, bool forceNoInline)
{
if (buf->size() > 100)
forceNoInline = true; // expression too long
if (m_blockContext.CanInlineRegister(irReg) && !forceNoInline)
{
SetRegInlinedExpression(irReg, buf);
}
else
{
ZpIR::DataType regType = m_blockContext.currentBasicBlock->getRegType(irReg);
if (regType == ZpIR::DataType::F32)
m_glslSource->add("float ");
else if (regType == ZpIR::DataType::S32)
m_glslSource->add("int ");
else if (regType == ZpIR::DataType::U32)
m_glslSource->add("uint ");
else
{
cemu_assert_debug(false);
}
char regName[16];
getRegisterName(regName, irReg);
m_glslSource->add(regName);
m_glslSource->add(" = ");
m_glslSource->add(*buf);
m_glslSource->add(";\r\n");
ReleaseStringBuffer(buf);
}
}
void GLSL::appendSourceString(DualStringBuffer* buf, ZpIR::IRReg irReg)
{
if (ZpIR::isConstVar(irReg))
{
ZpIR::IRRegConstDef* constDef = m_blockContext.currentBasicBlock->getConstant(irReg);
if (constDef->type == ZpIR::DataType::U32)
{
buf->appendFmt("{}", constDef->value_u32);
return;
}
else if (constDef->type == ZpIR::DataType::S32)
{
buf->appendFmt("{}", constDef->value_s32);
return;
}
else if (constDef->type == ZpIR::DataType::F32)
{
buf->appendFmt("{}", constDef->value_f32);
return;
}
assert_dbg();
}
else
{
cemu_assert_debug(ZpIR::isRegVar(irReg));
uint16 regIndex = ZpIR::getRegIndex(irReg);
DualStringBuffer* expressionBuf = m_blockContext.regInlinedExpression[regIndex];
if (expressionBuf)
{
buf->append(*expressionBuf);
return;
}
char regName[16];
getRegisterName(regName, irReg);
buf->append(regName);
}
}
void GLSL::getRegisterName(char buf[16], ZpIR::IRReg irReg)
{
auto& regData = m_blockContext.currentBasicBlock->m_regs[(uint16)irReg & 0x7FFF];
cemu_assert_debug(regData.hasAssignedPhysicalRegister());
ZpIR::ZpIRPhysicalReg physReg = regData.physicalRegister;
char typeChar;
if (ZirPass::RegisterAllocatorForGLSL::IsPhysRegTypeF32(physReg))
typeChar = 'f';
else if (ZirPass::RegisterAllocatorForGLSL::IsPhysRegTypeS32(physReg))
typeChar = 'i';
else if (ZirPass::RegisterAllocatorForGLSL::IsPhysRegTypeU32(physReg))
typeChar = 'u';
else
{
typeChar = 'x';
cemu_assert_debug(false);
}
auto r = fmt::format_to(buf, "r{}{}", ZirPass::RegisterAllocatorForGLSL::GetPhysRegIndex(physReg), typeChar);
*r = '\0';
}
void GLSL::SetRegInlinedExpression(ZpIR::IRReg irReg, DualStringBuffer* buf)
{
cemu_assert_debug(ZpIR::isRegVar(irReg));
uint16 dstIndex = (uint16)irReg;
if (m_blockContext.regInlinedExpression[dstIndex])
ReleaseStringBuffer(m_blockContext.regInlinedExpression[dstIndex]);
m_blockContext.regInlinedExpression[dstIndex] = buf;
}
void GLSL::ResetRegInlinedExpression(ZpIR::IRReg irReg)
{
cemu_assert_debug(ZpIR::isRegVar(irReg));
uint16 dstIndex = (uint16)irReg;
if (m_blockContext.regInlinedExpression[dstIndex])
{
ReleaseStringBuffer(m_blockContext.regInlinedExpression[dstIndex]);
m_blockContext.regInlinedExpression[dstIndex] = nullptr;
}
}
DualStringBuffer* GLSL::GetRegInlinedExpression(ZpIR::IRReg irReg)
{
cemu_assert_debug(ZpIR::isRegVar(irReg));
uint16 dstIndex = (uint16)irReg;
return m_blockContext.regInlinedExpression[dstIndex];
}
DualStringBuffer* GLSL::GetStringBuffer()
{
if (m_stringBufferCache.empty())
return new DualStringBuffer();
DualStringBuffer* buf = m_stringBufferCache.back();
m_stringBufferCache.pop_back();
buf->reset();
return buf;
}
void GLSL::ReleaseStringBuffer(DualStringBuffer* buf)
{
m_stringBufferCache.emplace_back(buf);
}
};

View file

@ -0,0 +1,60 @@
#pragma once
#include "util/Zir/Core/IR.h"
#include "util/Zir/Core/ZpIRPasses.h"
#include "util/helpers/StringBuf.h"
class DualStringBuffer;
namespace ZirEmitter
{
class GLSL
{
public:
GLSL() {};
// emit function code and append to output string buffer
void Emit(ZpIR::ZpIRFunction* irFunction, StringBuf* output);
private:
void GenerateBasicBlockCode(ZpIR::ZpIRBasicBlock& basicBlock);
void HandleInstruction(ZpIR::IR::InsRR* ins);
void HandleInstruction(ZpIR::IR::InsRRR* ins);
void HandleInstruction(ZpIR::IR::InsIMPORT* ins);
void HandleInstruction(ZpIR::IR::InsEXPORT* ins);
void appendSourceString(DualStringBuffer* buf, ZpIR::IRReg irReg);
void getRegisterName(char buf[16], ZpIR::IRReg irReg);
private:
ZpIR::ZpIRFunction* m_irFunction{};
StringBuf* m_glslSource{};
struct
{
ZpIR::ZpIRBasicBlock* currentBasicBlock{ nullptr };
std::vector<uint8> regReadTracking;
std::vector<DualStringBuffer*> regInlinedExpression;
bool CanInlineRegister(ZpIR::IRReg reg) const
{
cemu_assert_debug(ZpIR::isRegVar(reg));
return regReadTracking[ZpIR::getRegIndex(reg)] <= 1;
};
}m_blockContext;
void AssignResult(ZpIR::IRReg irReg, DualStringBuffer* buf, bool forceNoInline = false);
// inlined expression cache
void SetRegInlinedExpression(ZpIR::IRReg irReg, DualStringBuffer* buf);
void ResetRegInlinedExpression(ZpIR::IRReg irReg);
DualStringBuffer* GetRegInlinedExpression(ZpIR::IRReg irReg);
// memory pool for StringBuffer
DualStringBuffer* GetStringBuffer();
void ReleaseStringBuffer(DualStringBuffer* buf);
std::vector<DualStringBuffer*> m_stringBufferCache;
};
}

View file

@ -0,0 +1,65 @@
#include "util/Zir/Core/IR.h"
#include "util/Zir/Core/ZirUtility.h"
#include "util/Zir/Core/ZpIRPasses.h"
#include "util/Zir/Core/ZpIRDebug.h"
namespace ZirPass
{
void RegisterAllocatorForGLSL::assignPhysicalRegisters()
{
if (m_irFunction->m_basicBlocks.size() != 1)
cemu_assert_unimplemented();
for (auto& itr : m_irFunction->m_basicBlocks)
assignPhysicalRegistersForBlock(itr);
}
void RegisterAllocatorForGLSL::assignPhysicalRegistersForBlock(ZpIR::ZpIRBasicBlock* basicBlock)
{
// resolve imports
for (auto& itr : basicBlock->m_imports)
{
assert_dbg(); // todo - If imported reg not assigned physical register yet -> create a shared physical register (MSB set in reg index?) And assign it to this basic block but also all the shared IRRegs in the other linked basic blocks
// how to handle import:
// - match physical register of every input/output
// - every import must have a matching export in all the previous basic blocks. If not all match this is an error.
// In our shader emitter this could happen if the original R600 code references an uninitialized register
// note - we also have to make sure the register type matches. If a linked block has a shared register with a different type then we need to create a new register and insert a bitcast instruction in that block
}
// assign a register index to every virtual register
for (auto& itr : basicBlock->m_regs)
{
if (itr.type != ZpIR::DataType::NONE && !itr.hasAssignedPhysicalRegister())
{
if (itr.type == ZpIR::DataType::F32)
itr.assignPhysicalRegister(MakePhysReg(PHYS_REG_TYPE::F32, m_physicalRegisterCounterF32++));
else if (itr.type == ZpIR::DataType::S32)
itr.assignPhysicalRegister(MakePhysReg(PHYS_REG_TYPE::S32, m_physicalRegisterCounterS32++));
else if (itr.type == ZpIR::DataType::U32)
itr.assignPhysicalRegister(MakePhysReg(PHYS_REG_TYPE::U32, m_physicalRegisterCounterU32++));
else
{
cemu_assert_debug(false);
}
}
}
}
std::string RegisterAllocatorForGLSL::DebugPrintHelper_getPhysRegisterName(ZpIR::ZpIRBasicBlock* block, ZpIR::ZpIRPhysicalReg r)
{
std::string s;
uint32 regIndex = GetPhysRegIndex(r);
if (IsPhysRegTypeF32(r))
s = fmt::format("r{}f", regIndex);
else if (IsPhysRegTypeU32(r))
s = fmt::format("r{}u", regIndex);
else if (IsPhysRegTypeS32(r))
s = fmt::format("r{}i", regIndex);
return s;
}
}

View file

@ -0,0 +1,313 @@
#include "util/Zir/Core/IR.h"
#include "util/Zir/Core/ZirUtility.h"
#include "util/Zir/Core/ZpIRPasses.h"
#include "util/Zir/Core/ZpIRDebug.h"
namespace ZirPass
{
using namespace ZpIR;
/*
Algorithm description:
Prepare phase:
Assign every basic block an index
Create internal arrays to match index count
First phase:
Create liveness ranges for each basic block
Link liveness ranges by import/export
Constrained instructions split affected ranges into their own single instruction liveness range
Second phase:
Assign registers. Start with constrained ranges first, then process from beginning to end
Whenever we assign a register to a range, we also try to propagate it to all the connected/coalesced ranges
A liveness range is described by:
- Source (Can be any of: List of previous basic blocks, liveness range in same basic block)
- Destination (list of liveness ranges)
- Index of basic block
- First instruction (where register is assigned, -1 if passed from previous block)
- Last instruction (where register is last accessed)
- IR-Register (within the same basic block)
During algorithm:
- Spillcost (probably can calculate this dynamically)
- Physical location (-1 if not assigned. Otherwise register index or spill memory offset)
*/
RALivenessRange_t::RALivenessRange_t(RABlock_t* block, IRReg irReg, sint32 start, sint32 end, DataType irDataType) : m_block(block), m_irReg(irReg), m_irDataType(irDataType)
{
block->livenessRanges.emplace(irReg, this);
m_startIndex = start;
m_endIndex = end;
// register
for (auto& itr : block->livenessRanges)
{
RALivenessRange_t* itrRange = itr.second;
if (start < itrRange->m_endIndex && end >= itrRange->m_startIndex)
{
m_overlappingRanges.emplace_back(itrRange);
itrRange->m_overlappingRanges.emplace_back(this);
// todo - also immediately flag physical registers as unavailable
}
}
block->unassignedRanges.emplace(this);
}
RALivenessRange_t::~RALivenessRange_t()
{
for (auto& itr : m_overlappingRanges)
{
RALivenessRange_t* overlappedRange = itr;
// todo - unflag physical register (if this has one set)
overlappedRange->m_overlappingRanges.erase(std::remove(overlappedRange->m_overlappingRanges.begin(), overlappedRange->m_overlappingRanges.end(), overlappedRange), overlappedRange->m_overlappingRanges.end());
}
m_overlappingRanges.clear();
assert_dbg();
}
void RALivenessRange_t::setStart(sint32 startIndex)
{
m_startIndex = startIndex;
assert_dbg(); // re-register in sorted range list (if no reg assigned)
}
void RALivenessRange_t::setEnd(sint32 endIndex)
{
if (endIndex > m_endIndex)
{
// add ranges that are now overlapping
for (auto& itr : m_block->livenessRanges)
{
RALivenessRange_t* itrRange = itr.second;
if(itrRange->isOverlapping(this))
continue; // was overlapping before
if(itrRange == this)
continue;
if (itrRange->isOverlapping(m_startIndex, endIndex))
{
m_overlappingRanges.emplace_back(itrRange);
itrRange->m_overlappingRanges.emplace_back(this);
// todo - also immediately flag physical registers as unavailable
}
}
}
else if (endIndex < m_endIndex)
{
// remove ranges that are no longer overlapping
cemu_assert_suspicious();
}
m_endIndex = endIndex;
}
void RALivenessRange_t::assignPhysicalRegister(ZpIRPhysicalReg physReg)
{
if (m_location != LOCATION::UNASSIGNED)
cemu_assert_suspicious();
m_location = LOCATION::PHYSICAL_REGISTER;
m_physicalRegister = physReg;
// remove this from unassignedRanges
auto itr = m_block->unassignedRanges.find(this);
if (itr == m_block->unassignedRanges.end())
cemu_assert_suspicious();
if (*itr != this)
cemu_assert_suspicious();
m_block->unassignedRanges.erase(itr);
}
void RARegular::prepareRABlocks()
{
auto& irBasicBlocks = m_irFunction->m_basicBlocks;
m_raBlockArray.resize(m_irFunction->m_basicBlocks.size());
}
void RARegular::generateLivenessRanges()
{
auto& irBasicBlocks = m_irFunction->m_basicBlocks;
//for (auto& itr : irBasicBlocks)
for (uint32 basicBlockIndex = 0; basicBlockIndex < (uint32)irBasicBlocks.size(); basicBlockIndex++)
{
auto& blockItr = irBasicBlocks[basicBlockIndex];
RABlock_t* raBlock = m_raBlockArray.data() + basicBlockIndex;
std::unordered_map<IRReg, RALivenessRange_t*>& blockRanges = raBlock->livenessRanges;
// init ranges for imports first
for (auto& regImport : blockItr->m_imports)
{
new RALivenessRange_t(raBlock, regImport.reg, -1, -1, blockItr->m_regs[(uint16)regImport.reg].type);
// imports start before the current basic block
}
// parse instructions and create/update ranges
IR::__InsBase* ins = blockItr->m_instructionFirst;
size_t i = 0;
while(ins)
{
ZpIRCmdUtil::forEachAccessedReg(*blockItr, ins,
[&blockRanges, i, raBlock](IRReg readReg)
{
if (readReg >= 0x8000)
cemu_assert_suspicious();
// read access
auto livenessRange = blockRanges.find(readReg);
if (livenessRange == blockRanges.end())
cemu_assert_suspicious();
livenessRange->second->setEnd((sint32)i);
},
[&blockRanges, i, raBlock, blockItr](IRReg writtenReg)
{
if (writtenReg >= 0x8000)
cemu_assert_suspicious();
// write access
auto livenessRange = blockRanges.find(writtenReg);
if (livenessRange != blockRanges.end())
cemu_assert_suspicious();
new RALivenessRange_t(raBlock, writtenReg, (sint32)i, (sint32)i, blockItr->m_regs[(uint16)writtenReg].type);
});
i++;
ins = ins->next;
}
// exports extend ranges to one instruction past the end of the block
for (auto& regExport : blockItr->m_exports)
{
auto livenessRange = blockRanges.find(regExport.reg);
if (livenessRange == blockRanges.end())
cemu_assert_suspicious();
cemu_assert_unimplemented();
//livenessRange->second->setEnd((sint32)blockItr->m_cmdsDepr.size());
}
}
// connect liveness ranges across basic blocks based on their import/export names
std::unordered_map<LocationSymbolName, RALivenessRange_t*> listExportedRanges;
for (uint32 basicBlockIndex = 0; basicBlockIndex < (uint32)irBasicBlocks.size(); basicBlockIndex++)
{
// for each block take all exported ranges and connect them to the imports of the successor blocks
auto& blockItr = irBasicBlocks[basicBlockIndex];
// collect all exported liveness ranges
std::unordered_map<IRReg, RALivenessRange_t*>& localRanges = m_raBlockArray[basicBlockIndex].livenessRanges;
listExportedRanges.clear();
for (auto& regExport : blockItr->m_exports)
{
auto livenessRange = localRanges.find(regExport.reg);
if (livenessRange == localRanges.end())
assert_dbg();
listExportedRanges.emplace(regExport.name, livenessRange->second);
}
// handle imports in the connected blocks
if (blockItr->m_branchTaken)
{
ZpIRBasicBlock* successorBlock = blockItr->m_branchTaken;
std::unordered_map<IRReg, RALivenessRange_t*>& successorRanges = localRanges = m_raBlockArray[basicBlockIndex].livenessRanges;
for (auto& regImport : successorBlock->m_exports)
{
auto livenessRange = successorRanges.find(regImport.reg);
if (livenessRange == successorRanges.end())
assert_dbg();
auto connectedSourceRange = listExportedRanges.find(regImport.name);
if (connectedSourceRange == listExportedRanges.end())
assert_dbg();
livenessRange->second->addSourceFromPreviousBlock(connectedSourceRange->second);
}
}
// handle imports for entry blocks
// todo
// handle export for exit blocks
// todo
}
}
void RARegular::assignPhysicalRegistersForBlock(RABlock_t& raBlock)
{
debugPrint(raBlock);
std::vector<sint32> physRegCandidates;
physRegCandidates.reserve(32);
// process livenessRanges ascending by start address
while (!raBlock.unassignedRanges.empty())
{
RALivenessRange_t* range = *raBlock.unassignedRanges.begin();
// get a list of potential physical registers
std::span<sint32> physReg = extGetSuitablePhysicalRegisters(range->m_irDataType);
physRegCandidates.clear();
for (auto& r : physReg)
physRegCandidates.emplace_back(r);
// try to find a physical register that we can assign to the entire liveness span (current range and all connected ranges)
// todo
// handle special cases like copy coalescing
// todo
// try to find a register for only the current range
filterCandidates(physRegCandidates, range);
if (!physRegCandidates.empty())
{
// pick preferred register
ZpIRPhysicalReg physRegister = extPickPreferredRegister(physRegCandidates);
range->assignPhysicalRegister(physRegister);
continue;
}
// spill is necessary
assert_dbg();
assert_dbg();
}
printf("Assigned:\n");
debugPrint(raBlock);
}
void RARegular::assignPhysicalRegisters()
{
// todo - first we should assign all the fixed registers. E.g. imports/exports, constrained instructions
for (auto& raBlockInfo : m_raBlockArray)
assignPhysicalRegistersForBlock(raBlockInfo);
}
void RARegular::rewrite()
{
for (size_t i = 0; i < m_raBlockArray.size(); i++)
rewriteBlock(*m_irFunction->m_basicBlocks[i], m_raBlockArray[i]);
}
void RARegular::rewriteBlock(ZpIRBasicBlock& basicBlock, RABlock_t& raBlock)
{
assert_dbg();
//std::vector<ZpIRCmd> cmdOut;
//std::unordered_map<ZpIRReg, ZpIRReg> translationTable;
//for (auto& itr : raBlock.livenessRanges)
// translationTable.emplace(itr.second->m_irReg, itr.second->m_physicalRegister);
//// todo - since ir var registers are created in incremental order we could instead use a std::vector for fast look-up instead of a map?
//for (uint32 i = 0; i < (uint32)basicBlock.m_cmdsDepr.size(); i++)
//{
// // todo - insert spill and load instructions
// // todo - insert register moves for range-to-range copies
//
// ZpIRCmd* currentCmd = basicBlock.m_cmdsDepr.data() + i;
// // replace registers and then insert into output command list
// ZpIRCmdUtil::replaceRegisters(*currentCmd, translationTable);
// cmdOut.emplace_back(*currentCmd);
//}
//basicBlock.m_cmdsDepr = std::move(cmdOut);
// todo - should we keep imports/exports but update them to use physical register indices?
// the code emitter needs to know which physical registers are exported in order to determine which optimizations are allowed
basicBlock.m_imports.clear();
basicBlock.m_imports.shrink_to_fit();
basicBlock.m_exports.clear();
basicBlock.m_exports.shrink_to_fit();
basicBlock.m_regs.clear();
basicBlock.m_regs.shrink_to_fit();
}
}

109
src/util/boost/bluetooth.h Normal file
View file

@ -0,0 +1,109 @@
#pragma once
#include "platform/platform.h"
#include <boost/asio.hpp>
namespace boost
{
namespace asio
{
template <typename Protocol>
class device_endpoint
{
public:
typedef Protocol protocol_type;
typedef detail::socket_addr_type data_type;
struct device_t
{
device_t(long long device_addr)
: addr(device_addr)
{
}
long long addr;
};
device_endpoint()
{
memset(&addr, 0x00, sizeof(addr));
}
device_endpoint(device_t device_address)
{
memset(&addr, 0x00, sizeof(addr));
addr.addressFamily = AF_BLUETOOTH;
addr.btAddr = id.addr;
addr.serviceClassId = RFCOMM_PROTOCOL_UUID;
addr.port = BT_PORT_ANY;
}
device_endpoint(const device_endpoint& other)
: addr(other.addr)
{
}
device_endpoint& operator=(const device_endpoint& other)
{
addr = other.addr;
return *this;
}
protocol_type protocol() const
{
return protocol_type();
}
data_type* data()
{
return reinterpret_cast<data_type*>(&addr);
}
const data_type* data() const
{
return reinterpret_cast<const data_type*>(&addr);
}
size_t size() const
{
return sizeof(SOCKADDR_BTH);
}
size_t capacity() const
{
return size();
}
private:
SOCKADDR_BTH addr;
};
class bluetooth
{
public:
using endpoint = device_endpoint<bluetooth>;
using socket = basic_stream_socket<bluetooth>;
using acceptor = basic_socket_acceptor<bluetooth>;
using iostream = basic_socket_iostream<bluetooth>;
bluetooth() = default;
int type() const
{
return SOCK_STREAM;
}
int protocol() const
{
return BTPROTO_RFCOMM;
}
int family() const
{
return AF_BLUETOOTH;
}
};
}
}

View file

@ -0,0 +1,100 @@
#pragma once
template<typename TData, typename TAddr, TAddr TAddressGranularity, int TBucketCount>
class IntervalBucketContainer
{
struct bucketEntry_t
{
TAddr rangeStart;
TAddr rangeEnd;
TData* data;
int bucketStartIndex;
bucketEntry_t(TAddr rangeStart, TAddr rangeEnd, TData* data, int bucketStartIndex) : rangeStart(rangeStart), rangeEnd(rangeEnd), data(data), bucketStartIndex(bucketStartIndex) {};
};
std::vector<bucketEntry_t> list_bucket[TBucketCount];
public:
IntervalBucketContainer() {};
// range is defined as inclusive rangeStart and exclusive rangeEnd
void addRange(TAddr rangeStart, TAddr rangeEnd, TData* data)
{
assert(rangeStart < rangeEnd);
int bucketStartIndex = (rangeStart / TAddressGranularity);
int bucketEndIndex = ((rangeEnd + TAddressGranularity - 1) / TAddressGranularity);
int bucketItrCount = bucketEndIndex - bucketStartIndex;
bucketStartIndex %= TBucketCount;
int bucketFirstIndex = bucketStartIndex;
bucketItrCount = std::min(bucketItrCount, TBucketCount);
assert(bucketItrCount != 0);
while (bucketItrCount--)
{
list_bucket[bucketStartIndex].emplace_back(rangeStart, rangeEnd, data, bucketFirstIndex);
bucketStartIndex = (bucketStartIndex + 1) % TBucketCount;
}
}
void removeRange(TAddr rangeStart, TAddr rangeEnd, TData* data)
{
assert(rangeStart < rangeEnd);
int bucketStartIndex = (rangeStart / TAddressGranularity);
int bucketEndIndex = ((rangeEnd + TAddressGranularity - 1) / TAddressGranularity);
int bucketItrCount = bucketEndIndex - bucketStartIndex;
bucketStartIndex %= TBucketCount;
bucketItrCount = std::min(bucketItrCount, TBucketCount);
assert(bucketItrCount != 0);
int eraseCountVerifier = bucketItrCount;
while (bucketItrCount--)
{
for (auto it = list_bucket[bucketStartIndex].begin(); it != list_bucket[bucketStartIndex].end(); it++)
{
if (it->data == data)
{
assert(it->rangeStart == rangeStart && it->rangeEnd == rangeEnd);
// erase
list_bucket[bucketStartIndex].erase(it);
eraseCountVerifier--;
break;
}
}
bucketStartIndex = (bucketStartIndex + 1) % TBucketCount;
}
assert(eraseCountVerifier == 0); // triggers if rangeStart/End doesn't match up with any registered range
}
template<typename TRangeCallback>
void lookupRanges(TAddr rangeStart, TAddr rangeEnd, TRangeCallback cb)
{
assert(rangeStart < rangeEnd);
int bucketStartIndex = (rangeStart / TAddressGranularity);
int bucketEndIndex = ((rangeEnd + TAddressGranularity - 1) / TAddressGranularity);
int bucketItrCount = bucketEndIndex - bucketStartIndex;
bucketStartIndex %= TBucketCount;
bucketItrCount = std::min(bucketItrCount, TBucketCount);
assert(bucketItrCount != 0);
// in first round we dont need to check if bucket was already visited
for (auto& itr : list_bucket[bucketStartIndex])
{
if (itr.rangeStart < rangeEnd && itr.rangeEnd > rangeStart)
{
cb(itr.data);
}
}
bucketItrCount--;
bucketStartIndex = (bucketStartIndex + 1) % TBucketCount;
// for remaining buckets check if the range starts in the current bucket
while (bucketItrCount--)
{
for (auto& itr : list_bucket[bucketStartIndex])
{
if (itr.rangeStart < rangeEnd && itr.rangeEnd > rangeStart && itr.bucketStartIndex == bucketStartIndex)
{
cb(itr.data);
}
}
bucketStartIndex = (bucketStartIndex + 1) % TBucketCount;
}
}
};

View file

@ -0,0 +1,91 @@
#pragma once
// staged lookup table suited for cases where the lookup index range can be very large (e.g. memory addresses)
// performs 3 consecutive table lookups, where each table's width is defined by TBitsN
// empty subtables consume no memory beyond the initial two default tables for TBitsY and TBitsZ
template<int TBitsX, int TBitsY, int TBitsZ, typename T>
class LookupTableL3
{
struct TableZ // z lookup
{
T arr[1 << TBitsZ]{};
};
struct TableY // y lookup
{
TableZ* arr[1 << TBitsY];
};
// by generating placeholder tables we can avoid conditionals in the lookup code since no null-pointer checking is necessary
TableY* m_placeholderTableY{};
TableZ* m_placeholderTableZ{};
public:
LookupTableL3()
{
// init placeholder table Z
m_placeholderTableZ = GenerateNewTableZ();
// init placeholder table Y (all entries point to placeholder table Z)
m_placeholderTableY = GenerateNewTableY();
// init x table
for (auto& itr : m_tableXArr)
itr = m_placeholderTableY;
}
~LookupTableL3()
{
delete m_placeholderTableY;
delete m_placeholderTableZ;
}
// lookup
// only the bottom most N bits bits are used of the offset
// N = TBitsX + TBitsY + TBitsZ
// if no match is found a default-constructed object is returned
T lookup(uint32 offset)
{
uint32 indexZ = offset & ((1u << TBitsZ) - 1);
offset >>= TBitsZ;
uint32 indexY = offset & ((1u << TBitsY) - 1);
offset >>= TBitsY;
uint32 indexX = offset & ((1u << TBitsX) - 1);
//offset >>= TBitsX;
return m_tableXArr[indexX]->arr[indexY]->arr[indexZ];
}
void store(uint32 offset, T& t)
{
uint32 indexZ = offset & ((1u << TBitsZ) - 1);
offset >>= TBitsZ;
uint32 indexY = offset & ((1u << TBitsY) - 1);
offset >>= TBitsY;
uint32 indexX = offset & ((1u << TBitsX) - 1);
if (m_tableXArr[indexX] == m_placeholderTableY)
m_tableXArr[indexX] = GenerateNewTableY();
TableY* lookupY = m_tableXArr[indexX];
if (lookupY->arr[indexY] == m_placeholderTableZ)
lookupY->arr[indexY] = GenerateNewTableZ();
TableZ* lookupZ = lookupY->arr[indexY];
lookupZ->arr[indexZ] = t;
}
private:
// generate a new Y lookup table which will initially contain only pointers to m_placeholderTableZ
TableY* GenerateNewTableY()
{
TableY* tableY = new TableY();
for (auto& itr : tableY->arr)
itr = m_placeholderTableZ;
return tableY;
}
// generate a new Z lookup table which will initially contain only default constructed T
TableZ* GenerateNewTableZ()
{
TableZ* tableZ = new TableZ();
return tableZ;
}
TableY* m_tableXArr[1 << TBitsX]; // x lookup
};

View file

@ -0,0 +1,137 @@
#pragma once
template<typename _OBJ, typename _ADDR, size_t count, size_t granularity>
class RangeStore
{
public:
typedef struct
{
_ADDR start;
_ADDR end;
_OBJ data;
size_t lastIterationIndex;
}rangeEntry_t;
RangeStore()
{
}
size_t getBucket(_ADDR addr)
{
size_t index = addr / granularity;
index %= count;
return index;
}
void getBucketRange(_ADDR addrStart, _ADDR addrEnd, size_t& bucketFirst, size_t& bucketCount)
{
bucketFirst = getBucket(addrStart);
size_t indexStart = addrStart / granularity;
size_t indexEnd = std::max(addrStart, addrEnd - 1) / granularity;
bucketCount = indexEnd - indexStart + 1;
}
// end address should be supplied as start+size
void* storeRange(_OBJ data, _ADDR start, _ADDR end)
{
size_t bucketFirst;
size_t bucketCount;
getBucketRange(start, end, bucketFirst, bucketCount);
bucketCount = std::min(bucketCount, count);
// create range
rangeEntry_t* rangeEntry = new rangeEntry_t();
rangeEntry->data = data;
rangeEntry->start = start;
rangeEntry->end = end;
rangeEntry->lastIterationIndex = currentIterationIndex;
// register range in every bucket it touches
size_t idx = bucketFirst;
for (size_t i = 0; i < bucketCount; i++)
{
rangeBuckets[idx].list_ranges.push_back(rangeEntry);
idx = (idx + 1) % count;
}
return rangeEntry;
}
void deleteRange(void* rangePtr)
{
rangeEntry_t* rangeEntry = (rangeEntry_t*)rangePtr;
// get bucket range
size_t bucketFirst;
size_t bucketCount;
getBucketRange(rangeEntry->start, rangeEntry->end, bucketFirst, bucketCount);
bucketCount = std::min(bucketCount, count);
// remove from buckets
size_t idx = bucketFirst;
for (size_t i = 0; i < bucketCount; i++)
{
rangeBuckets[idx].list_ranges.erase(std::remove(rangeBuckets[idx].list_ranges.begin(), rangeBuckets[idx].list_ranges.end(), rangeEntry), rangeBuckets[idx].list_ranges.end());
idx = (idx + 1) % count;
}
delete rangeEntry;
}
void findRanges(_ADDR start, _ADDR end, std::function <void(_ADDR start, _ADDR end, _OBJ data)> f)
{
currentIterationIndex++;
size_t bucketFirst;
size_t bucketCount;
getBucketRange(start, end, bucketFirst, bucketCount);
bucketCount = std::min(bucketCount, count);
size_t idx = bucketFirst;
for (size_t i = 0; i < bucketCount; i++)
{
for (auto r : rangeBuckets[idx].list_ranges)
{
if (start < r->end && end > r->start && r->lastIterationIndex != currentIterationIndex)
{
r->lastIterationIndex = currentIterationIndex;
f(r->start, r->end, r->data);
}
}
idx = (idx + 1) % count;
}
}
bool findFirstRange(_ADDR start, _ADDR end, _ADDR& rStart, _ADDR& rEnd, _OBJ& rData)
{
currentIterationIndex++;
size_t bucketFirst;
size_t bucketCount;
getBucketRange(start, end, bucketFirst, bucketCount);
bucketCount = std::min(bucketCount, count);
size_t idx = bucketFirst;
for (size_t i = 0; i < bucketCount; i++)
{
for (auto r : rangeBuckets[idx].list_ranges)
{
if (start < r->end && end > r->start && r->lastIterationIndex != currentIterationIndex)
{
r->lastIterationIndex = currentIterationIndex;
rStart = r->start;
rEnd = r->end;
rData = r->data;
return true;
}
}
idx = (idx + 1) % count;
}
return false;
}
private:
typedef struct
{
std::vector<rangeEntry_t*> list_ranges;
}rangeBucket_t;
std::array<rangeBucket_t, count> rangeBuckets;
size_t currentIterationIndex;
};

View file

@ -0,0 +1,34 @@
// optimized and compact version of std::bitset with no error checking in release mode
// uses a single uint32 to store the bitmask, thus allowing up to 32 bool values
template<size_t N>
class SmallBitset
{
public:
SmallBitset() {};
static_assert(N <= 32);
bool test(size_t index) const
{
cemu_assert_debug(index < N);
return ((m_bits >> index) & 1) != 0;
}
void set(size_t index, bool val)
{
cemu_assert_debug(index < N);
m_bits &= ~(1u << index);
if (val)
m_bits |= (1u << index);
}
void set(size_t index)
{
cemu_assert_debug(index < N);
m_bits |= (1u << index);
}
private:
uint32 m_bits{};
};

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

859
src/util/crypto/aes128.cpp Normal file
View file

@ -0,0 +1,859 @@
/*
Original implementation based on Tiny-AES-c (2015)
https://github.com/kokke/tiny-AES-c
Modified by Exzap
*/
/*****************************************************************************/
/* Includes: */
/*****************************************************************************/
#include "aes128.h"
/*****************************************************************************/
/* Defines: */
/*****************************************************************************/
// The number of columns comprising a state in AES. This is a constant in AES. Value=4
#define Nb 4
// The number of 32 bit words in a key.
#define Nk 4
// Key length in bytes [128 bit]
#define KEYLEN 16
// The number of rounds in AES Cipher.
#define Nr 10
bool useAESNI = false;
typedef uint8 state_t[4][4];
typedef struct
{
state_t* state;
uint8 RoundKey[176];
}aes128Ctx_t;
#define stateVal(__x, __y) ((*aesCtx->state)[__x][__y])
#define stateValU32(__x) (*(uint32*)((*aesCtx->state)[__x]))
// The lookup-tables are marked const so they can be placed in read-only storage instead of RAM
// The numbers below can be computed dynamically trading ROM for RAM -
// This can be useful in (embedded) bootloader applications, where ROM is often limited.
static const uint8 sbox[256] = {
//0 1 2 3 4 5 6 7 8 9 A B C D E F
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 };
static const uint8 rsbox[256] =
{ 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d };
// The round constant word array, Rcon[i], contains the values given by
// x to th e power (i-1) being powers of x (x is denoted as {02}) in the field GF(2^8)
// Note that i starts at 1, not 0).
static const uint8 Rcon[255] = {
0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a,
0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39,
0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a,
0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8,
0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef,
0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc,
0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b,
0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3,
0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94,
0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20,
0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35,
0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f,
0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04,
0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63,
0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd,
0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb };
/*****************************************************************************/
/* Private functions: */
/*****************************************************************************/
uint8 getSBoxValue(uint8 num)
{
return sbox[num];
}
uint8 getSBoxInvert(uint8 num)
{
return rsbox[num];
}
// This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states.
void KeyExpansion(aes128Ctx_t* aesCtx, const uint8* key)
{
uint32 i, j, k;
uint8 tempa[4]; // Used for the column/row operations
// The first round key is the key itself.
for (i = 0; i < Nk; ++i)
{
aesCtx->RoundKey[(i * 4) + 0] = key[(i * 4) + 0];
aesCtx->RoundKey[(i * 4) + 1] = key[(i * 4) + 1];
aesCtx->RoundKey[(i * 4) + 2] = key[(i * 4) + 2];
aesCtx->RoundKey[(i * 4) + 3] = key[(i * 4) + 3];
}
// All other round keys are found from the previous round keys.
for (; (i < (Nb * (Nr + 1))); ++i)
{
for (j = 0; j < 4; ++j)
{
tempa[j] = aesCtx->RoundKey[(i - 1) * 4 + j];
}
if (i % Nk == 0)
{
// This function rotates the 4 bytes in a word to the left once.
// [a0,a1,a2,a3] becomes [a1,a2,a3,a0]
// Function RotWord()
{
k = tempa[0];
tempa[0] = tempa[1];
tempa[1] = tempa[2];
tempa[2] = tempa[3];
tempa[3] = k;
}
// SubWord() is a function that takes a four-byte input word and
// applies the S-box to each of the four bytes to produce an output word.
// Function Subword()
{
tempa[0] = getSBoxValue(tempa[0]);
tempa[1] = getSBoxValue(tempa[1]);
tempa[2] = getSBoxValue(tempa[2]);
tempa[3] = getSBoxValue(tempa[3]);
}
tempa[0] = tempa[0] ^ Rcon[i / Nk];
}
else if (Nk > 6 && i % Nk == 4)
{
// Function Subword()
{
tempa[0] = getSBoxValue(tempa[0]);
tempa[1] = getSBoxValue(tempa[1]);
tempa[2] = getSBoxValue(tempa[2]);
tempa[3] = getSBoxValue(tempa[3]);
}
}
aesCtx->RoundKey[i * 4 + 0] = aesCtx->RoundKey[(i - Nk) * 4 + 0] ^ tempa[0];
aesCtx->RoundKey[i * 4 + 1] = aesCtx->RoundKey[(i - Nk) * 4 + 1] ^ tempa[1];
aesCtx->RoundKey[i * 4 + 2] = aesCtx->RoundKey[(i - Nk) * 4 + 2] ^ tempa[2];
aesCtx->RoundKey[i * 4 + 3] = aesCtx->RoundKey[(i - Nk) * 4 + 3] ^ tempa[3];
}
}
// This function adds the round key to state.
// The round key is added to the state by an XOR function.
void AddRoundKey(aes128Ctx_t* aesCtx, uint8 round)
{
// note: replacing this with two 64bit xor operations decreased performance in benchmarks, probably because the state bytes need to be stored back in memory
stateVal(0, 0) ^= aesCtx->RoundKey[round * Nb * 4 + 0 * Nb + 0];
stateVal(0, 1) ^= aesCtx->RoundKey[round * Nb * 4 + 0 * Nb + 1];
stateVal(0, 2) ^= aesCtx->RoundKey[round * Nb * 4 + 0 * Nb + 2];
stateVal(0, 3) ^= aesCtx->RoundKey[round * Nb * 4 + 0 * Nb + 3];
stateVal(1, 0) ^= aesCtx->RoundKey[round * Nb * 4 + 1 * Nb + 0];
stateVal(1, 1) ^= aesCtx->RoundKey[round * Nb * 4 + 1 * Nb + 1];
stateVal(1, 2) ^= aesCtx->RoundKey[round * Nb * 4 + 1 * Nb + 2];
stateVal(1, 3) ^= aesCtx->RoundKey[round * Nb * 4 + 1 * Nb + 3];
stateVal(2, 0) ^= aesCtx->RoundKey[round * Nb * 4 + 2 * Nb + 0];
stateVal(2, 1) ^= aesCtx->RoundKey[round * Nb * 4 + 2 * Nb + 1];
stateVal(2, 2) ^= aesCtx->RoundKey[round * Nb * 4 + 2 * Nb + 2];
stateVal(2, 3) ^= aesCtx->RoundKey[round * Nb * 4 + 2 * Nb + 3];
stateVal(3, 0) ^= aesCtx->RoundKey[round * Nb * 4 + 3 * Nb + 0];
stateVal(3, 1) ^= aesCtx->RoundKey[round * Nb * 4 + 3 * Nb + 1];
stateVal(3, 2) ^= aesCtx->RoundKey[round * Nb * 4 + 3 * Nb + 2];
stateVal(3, 3) ^= aesCtx->RoundKey[round * Nb * 4 + 3 * Nb + 3];
}
// The SubBytes Function Substitutes the values in the
// state matrix with values in an S-box.
void SubBytes(aes128Ctx_t* aesCtx)
{
uint8 i, j;
for (i = 0; i < 4; ++i)
{
for (j = 0; j < 4; ++j)
{
stateVal(j, i) = getSBoxValue(stateVal(j, i));
}
}
}
// The ShiftRows() function shifts the rows in the state to the left.
// Each row is shifted with different offset.
// Offset = Row number. So the first row is not shifted.
void ShiftRows(aes128Ctx_t* aesCtx)
{
uint8 temp;
// Rotate first row 1 columns to left
temp = stateVal(0, 1);
stateVal(0, 1) = stateVal(1, 1);
stateVal(1, 1) = stateVal(2, 1);
stateVal(2, 1) = stateVal(3, 1);
stateVal(3, 1) = temp;
// Rotate second row 2 columns to left
temp = stateVal(0, 2);
stateVal(0, 2) = stateVal(2, 2);
stateVal(2, 2) = temp;
temp = stateVal(1, 2);
stateVal(1, 2) = stateVal(3, 2);
stateVal(3, 2) = temp;
// Rotate third row 3 columns to left
temp = stateVal(0, 3);
stateVal(0, 3) = stateVal(3, 3);
stateVal(3, 3) = stateVal(2, 3);
stateVal(2, 3) = stateVal(1, 3);
stateVal(1, 3) = temp;
}
uint8 aes_xtime(uint8 x)
{
return ((x << 1) ^ (((x >> 7) & 1) * 0x1b));
}
// MixColumns function mixes the columns of the state matrix
void MixColumns(aes128Ctx_t* aesCtx)
{
uint8 i;
uint8 Tmp, Tm, t;
for (i = 0; i < 4; ++i)
{
t = stateVal(i, 0);
Tmp = stateVal(i, 0) ^ stateVal(i, 1) ^ stateVal(i, 2) ^ stateVal(i, 3);
Tm = stateVal(i, 0) ^ stateVal(i, 1); Tm = aes_xtime(Tm); stateVal(i, 0) ^= Tm ^ Tmp;
Tm = stateVal(i, 1) ^ stateVal(i, 2); Tm = aes_xtime(Tm); stateVal(i, 1) ^= Tm ^ Tmp;
Tm = stateVal(i, 2) ^ stateVal(i, 3); Tm = aes_xtime(Tm); stateVal(i, 2) ^= Tm ^ Tmp;
Tm = stateVal(i, 3) ^ t; Tm = aes_xtime(Tm); stateVal(i, 3) ^= Tm ^ Tmp;
}
}
// Multiply is used to multiply numbers in the field GF(2^8)
#define Multiply(x, y) \
( ((y & 1) * x) ^ \
((y>>1 & 1) * aes_xtime(x)) ^ \
((y>>2 & 1) * aes_xtime(aes_xtime(x))) ^ \
((y>>3 & 1) * aes_xtime(aes_xtime(aes_xtime(x)))) ^ \
((y>>4 & 1) * aes_xtime(aes_xtime(aes_xtime(aes_xtime(x))))))
uint32 lookupTable_multiply[256];
//// MixColumns function mixes the columns of the state matrix.
//// The method used to multiply may be difficult to understand for the inexperienced.
//// Please use the references to gain more information.
//void InvMixColumns(aes128Ctx_t* aesCtx)
//{
// int i;
// uint8 a, b, c, d;
// for (i = 0; i < 4; ++i)
// {
// a = stateVal(i, 0);
// b = stateVal(i, 1);
// c = stateVal(i, 2);
// d = stateVal(i, 3);
//
// uint32 _a = lookupTable_multiply[a];
// uint32 _b = lookupTable_multiply[b];
// uint32 _c = lookupTable_multiply[c];
// uint32 _d = lookupTable_multiply[d];
//
//
// //stateVal(i, 0) = entryA->vE ^ entryB->vB ^ entryC->vD ^ entryD->v9;
// //stateVal(i, 1) = entryA->v9 ^ entryB->vE ^ entryC->vB ^ entryD->vD;
// //stateVal(i, 2) = entryA->vD ^ entryB->v9 ^ entryC->vE ^ entryD->vB;
// //stateVal(i, 3) = entryA->vB ^ entryB->vD ^ entryC->v9 ^ entryD->vE;
//
// //stateVal(i, 0) = Multiply(a, 0x0e) ^ Multiply(b, 0x0b) ^ Multiply(c, 0x0d) ^ Multiply(d, 0x09);
// //stateVal(i, 1) = Multiply(a, 0x09) ^ Multiply(b, 0x0e) ^ Multiply(c, 0x0b) ^ Multiply(d, 0x0d);
// //stateVal(i, 2) = Multiply(a, 0x0d) ^ Multiply(b, 0x09) ^ Multiply(c, 0x0e) ^ Multiply(d, 0x0b);
// //stateVal(i, 3) = Multiply(a, 0x0b) ^ Multiply(b, 0x0d) ^ Multiply(c, 0x09) ^ Multiply(d, 0x0e);
//
// //stateVal(i, 0) = Multiply(b, 0x0b) ^ Multiply(c, 0x0d) ^ Multiply(d, 0x09);
// //stateVal(i, 1) = Multiply(b, 0x0e) ^ Multiply(c, 0x0b) ^ Multiply(d, 0x0d);
// //stateVal(i, 2) = Multiply(b, 0x09) ^ Multiply(c, 0x0e) ^ Multiply(d, 0x0b);
// //stateVal(i, 3) = Multiply(b, 0x0d) ^ Multiply(c, 0x09) ^ Multiply(d, 0x0e);
//
// stateValU32(i) = _a ^ _rotl(_b, 8) ^ _rotl(_c, 16) ^ _rotl(_d, 24);
//
// }
//}
// MixColumns function mixes the columns of the state matrix.
// The method used to multiply may be difficult to understand for the inexperienced.
// Please use the references to gain more information.
void InvMixColumns(aes128Ctx_t* aesCtx)
{
uint8 a, b, c, d;
// i0
a = stateVal(0, 0);
b = stateVal(0, 1);
c = stateVal(0, 2);
d = stateVal(0, 3);
stateValU32(0) = lookupTable_multiply[a] ^ _rotl(lookupTable_multiply[b], 8) ^ _rotl(lookupTable_multiply[c], 16) ^ _rotl(lookupTable_multiply[d], 24);
// i1
a = stateVal(1, 0);
b = stateVal(1, 1);
c = stateVal(1, 2);
d = stateVal(1, 3);
stateValU32(1) = lookupTable_multiply[a] ^ _rotl(lookupTable_multiply[b], 8) ^ _rotl(lookupTable_multiply[c], 16) ^ _rotl(lookupTable_multiply[d], 24);
// i2
a = stateVal(2, 0);
b = stateVal(2, 1);
c = stateVal(2, 2);
d = stateVal(2, 3);
stateValU32(2) = lookupTable_multiply[a] ^ _rotl(lookupTable_multiply[b], 8) ^ _rotl(lookupTable_multiply[c], 16) ^ _rotl(lookupTable_multiply[d], 24);
// i3
a = stateVal(3, 0);
b = stateVal(3, 1);
c = stateVal(3, 2);
d = stateVal(3, 3);
stateValU32(3) = lookupTable_multiply[a] ^ _rotl(lookupTable_multiply[b], 8) ^ _rotl(lookupTable_multiply[c], 16) ^ _rotl(lookupTable_multiply[d], 24);
}
// The SubBytes Function Substitutes the values in the
// state matrix with values in an S-box.
void InvSubBytes(aes128Ctx_t* aesCtx)
{
stateVal(0, 0) = rsbox[stateVal(0, 0)];
stateVal(1, 0) = rsbox[stateVal(1, 0)];
stateVal(2, 0) = rsbox[stateVal(2, 0)];
stateVal(3, 0) = rsbox[stateVal(3, 0)];
stateVal(0, 1) = rsbox[stateVal(0, 1)];
stateVal(1, 1) = rsbox[stateVal(1, 1)];
stateVal(2, 1) = rsbox[stateVal(2, 1)];
stateVal(3, 1) = rsbox[stateVal(3, 1)];
stateVal(0, 2) = rsbox[stateVal(0, 2)];
stateVal(1, 2) = rsbox[stateVal(1, 2)];
stateVal(2, 2) = rsbox[stateVal(2, 2)];
stateVal(3, 2) = rsbox[stateVal(3, 2)];
stateVal(0, 3) = rsbox[stateVal(0, 3)];
stateVal(1, 3) = rsbox[stateVal(1, 3)];
stateVal(2, 3) = rsbox[stateVal(2, 3)];
stateVal(3, 3) = rsbox[stateVal(3, 3)];
}
void InvShiftRows(aes128Ctx_t* aesCtx)
{
uint8 temp;
// Rotate first row 1 columns to right
temp = stateVal(3, 1);
stateVal(3, 1) = stateVal(2, 1);
stateVal(2, 1) = stateVal(1, 1);
stateVal(1, 1) = stateVal(0, 1);
stateVal(0, 1) = temp;
// Rotate second row 2 columns to right
temp = stateVal(0, 2);
stateVal(0, 2) = stateVal(2, 2);
stateVal(2, 2) = temp;
temp = stateVal(1, 2);
stateVal(1, 2) = stateVal(3, 2);
stateVal(3, 2) = temp;
// Rotate third row 3 columns to right
temp = stateVal(0, 3);
stateVal(0, 3) = stateVal(1, 3);
stateVal(1, 3) = stateVal(2, 3);
stateVal(2, 3) = stateVal(3, 3);
stateVal(3, 3) = temp;
}
// Cipher is the main function that encrypts the PlainText.
void Cipher(aes128Ctx_t* aesCtx)
{
uint8 round = 0;
// Add the First round key to the state before starting the rounds.
AddRoundKey(aesCtx, 0);
// There will be Nr rounds.
// The first Nr-1 rounds are identical.
// These Nr-1 rounds are executed in the loop below.
for (round = 1; round < Nr; ++round)
{
SubBytes(aesCtx);
ShiftRows(aesCtx);
MixColumns(aesCtx);
AddRoundKey(aesCtx, round);
}
// The last round is given below.
// The MixColumns function is not here in the last round.
SubBytes(aesCtx);
ShiftRows(aesCtx);
AddRoundKey(aesCtx, Nr);
}
void InvCipher(aes128Ctx_t* aesCtx)
{
uint8 round = 0;
// Add the First round key to the state before starting the rounds.
AddRoundKey(aesCtx, Nr);
// There will be Nr rounds.
// The first Nr-1 rounds are identical.
// These Nr-1 rounds are executed in the loop below.
for (round = Nr - 1; round > 0; round--)
{
InvShiftRows(aesCtx);
InvSubBytes(aesCtx);
AddRoundKey(aesCtx, round);
InvMixColumns(aesCtx);
}
// The last round is given below.
// The MixColumns function is not here in the last round.
InvShiftRows(aesCtx);
InvSubBytes(aesCtx);
AddRoundKey(aesCtx, 0);
}
static void BlockCopy(uint8* output, uint8* input)
{
uint8 i;
for (i = 0; i < KEYLEN; ++i)
{
output[i] = input[i];
}
}
/*****************************************************************************/
/* Public functions: */
/*****************************************************************************/
void __soft__AES128_ECB_encrypt(uint8* input, const uint8* key, uint8* output)
{
aes128Ctx_t aesCtx;
// Copy input to output, and work in-memory on output
BlockCopy(output, input);
aesCtx.state = (state_t*)output;
KeyExpansion(&aesCtx, key);
// The next function call encrypts the PlainText with the Key using AES algorithm.
Cipher(&aesCtx);
}
void AES128_ECB_decrypt(uint8* input, const uint8* key, uint8 *output)
{
aes128Ctx_t aesCtx;
// Copy input to output, and work in-memory on output
BlockCopy(output, input);
aesCtx.state = (state_t*)output;
// The KeyExpansion routine must be called before encryption.
KeyExpansion(&aesCtx, key);
InvCipher(&aesCtx);
}
void XorWithIv(uint8* buf, const uint8* iv)
{
uint8 i;
for (i = 0; i < KEYLEN; ++i)
{
buf[i] ^= iv[i];
}
}
void AES128_CBC_encrypt(uint8* output, uint8* input, uint32 length, const uint8* key, const uint8* iv)
{
aes128Ctx_t aesCtx;
intptr_t i;
uint8 remainders = length % KEYLEN; /* Remaining bytes in the last non-full block */
BlockCopy(output, input);
aesCtx.state = (state_t*)output;
KeyExpansion(&aesCtx, key);
const uint8* currentIv = iv;
for (i = 0; i < length; i += KEYLEN)
{
XorWithIv(input, currentIv);
BlockCopy(output, input);
aesCtx.state = (state_t*)output;
Cipher(&aesCtx);
currentIv = output;
input += KEYLEN;
output += KEYLEN;
}
cemu_assert_debug(remainders == 0);
}
void __soft__AES128_CBC_decrypt(uint8* output, uint8* input, uint32 length, const uint8* key, const uint8* iv)
{
aes128Ctx_t aesCtx;
intptr_t i;
uint8 remainders = length % KEYLEN;
KeyExpansion(&aesCtx, key);
uint8 currentIv[KEYLEN];
uint8 nextIv[KEYLEN];
if (iv)
BlockCopy(currentIv, (uint8*)iv);
else
memset(currentIv, 0, sizeof(currentIv));
for (i = 0; i < length; i += KEYLEN)
{
aesCtx.state = (state_t*)output;
BlockCopy(output, input);
BlockCopy(nextIv, input);
InvCipher(&aesCtx);
XorWithIv(output, currentIv);
BlockCopy(currentIv, nextIv);
output += KEYLEN;
input += KEYLEN;
}
cemu_assert_debug(remainders == 0);
}
void AES128_CBC_decrypt_buffer_depr(uint8* output, uint8* input, uint32 length, const uint8* key, const uint8* iv)
{
aes128Ctx_t aesCtx;
intptr_t i;
uint8 remainders = length % KEYLEN; /* Remaining bytes in the last non-full block */
BlockCopy(output, input);
KeyExpansion(&aesCtx, key);
const uint8* currentIv = iv;
for (i = 0; i < length; i += KEYLEN)
{
BlockCopy(output, input);
aesCtx.state = (state_t*)output;
InvCipher(&aesCtx);
XorWithIv(output, currentIv);
currentIv = input;
input += KEYLEN;
output += KEYLEN;
}
cemu_assert_debug(remainders == 0);
}
void AES128_CBC_decrypt_updateIV(uint8* output, uint8* input, uint32 length, const uint8* key, uint8* iv)
{
length &= ~0xF;
uint8 newIv[16];
if (length == 0)
return;
cemu_assert_debug((length&0xF) == 0);
memcpy(newIv, input + (length - 16), KEYLEN);
AES128_CBC_decrypt(output, input, length, key, iv);
memcpy(iv, newIv, KEYLEN);
}
inline __m128i AESNI128_ASSIST(
__m128i temp1,
__m128i temp2)
{
__m128i temp3;
temp2 = _mm_shuffle_epi32(temp2, 0xff);
temp3 = _mm_slli_si128(temp1, 0x4);
temp1 = _mm_xor_si128(temp1, temp3);
temp3 =
_mm_slli_si128(temp3, 0x4);
temp1 =
_mm_xor_si128(temp1, temp3);
temp3 =
_mm_slli_si128(temp3, 0x4);
temp1 =
_mm_xor_si128(temp1, temp3);
temp1 = _mm_xor_si128(temp1, temp2);
return temp1;
}
void AESNI128_KeyExpansionEncrypt(const unsigned char *userkey, unsigned char *key)
{
__m128i temp1, temp2;
__m128i *Key_Schedule = (__m128i*)key;
temp1 = _mm_loadu_si128((__m128i*)userkey);
Key_Schedule[0] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x1);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[1] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x2);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[2] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x4);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[3] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x8);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[4] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x10);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[5] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x20);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[6] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x40);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[7] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x80);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[8] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x1b);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[9] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x36);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[10] = temp1;
}
void AESNI128_KeyExpansionDecrypt(const unsigned char *userkey, unsigned char *key)
{
__m128i temp1, temp2;
__m128i *Key_Schedule = (__m128i*)key;
temp1 = _mm_loadu_si128((__m128i*)userkey);
Key_Schedule[0] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x1);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[1] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x2);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[2] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x4);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[3] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x8);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[4] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x10);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[5] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x20);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[6] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x40);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[7] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x80);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[8] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x1b);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[9] = temp1;
temp2 = _mm_aeskeygenassist_si128(temp1, 0x36);
temp1 = AESNI128_ASSIST(temp1, temp2);
Key_Schedule[10] = temp1;
// inverse
for (sint32 i = 1; i < 10; i++)
{
Key_Schedule[i] = _mm_aesimc_si128(Key_Schedule[i]);
}
}
void AESNI128_CBC_encrypt(const unsigned char *in,
unsigned char *out,
unsigned char ivec[16],
unsigned long length,
unsigned char *key,
int number_of_rounds)
{
__m128i feedback, data;
int j;
if (length % 16)
length = length / 16 + 1;
else length /= 16;
feedback = _mm_loadu_si128((__m128i*)ivec);
for (unsigned long i = 0; i < length; i++)
{
data =
_mm_loadu_si128(&((__m128i*)in)[i]);
feedback = _mm_xor_si128(data, feedback);
feedback = _mm_xor_si128(feedback, ((__m128i*)key)[0]);
for (j = 1; j < number_of_rounds; j++)
feedback =
_mm_aesenc_si128(feedback, ((__m128i*)key)[j]);
feedback =
_mm_aesenclast_si128(feedback, ((__m128i*)key)[j]);
_mm_storeu_si128(&((__m128i*)out)[i], feedback);
}
}
void AESNI128_CBC_decryptWithExpandedKey(const unsigned char *in,
unsigned char *out,
const unsigned char ivec[16],
unsigned long length,
unsigned char *key)
{
__m128i data, feedback, lastin;
int j;
if (length % 16)
length = length / 16 + 1;
else length /= 16;
feedback = _mm_loadu_si128((__m128i*)ivec);
for (unsigned long i = 0; i < length; i++)
{
lastin = _mm_loadu_si128(&((__m128i*)in)[i]);
data = _mm_xor_si128(lastin, ((__m128i*)key)[10]);
for (j = 9; j > 0; j--)
{
data = _mm_aesdec_si128(data, ((__m128i*)key)[j]);
}
data = _mm_aesdeclast_si128(data, ((__m128i*)key)[0]);
data = _mm_xor_si128(data, feedback);
_mm_storeu_si128(&((__m128i*)out)[i], data);
feedback = lastin;
}
}
void __aesni__AES128_CBC_decrypt(uint8* output, uint8* input, uint32 length, const uint8* key, const uint8* iv)
{
__declspec(align(16)) uint8 expandedKey[11 * 16];
AESNI128_KeyExpansionDecrypt(key, expandedKey);
if (iv)
{
AESNI128_CBC_decryptWithExpandedKey(input, output, iv, length, expandedKey);
}
else
{
uint8 zeroIv[16] = { 0 };
AESNI128_CBC_decryptWithExpandedKey(input, output, zeroIv, length, expandedKey);
}
}
void __aesni__AES128_ECB_encrypt(uint8* input, const uint8* key, uint8* output)
{
__declspec(align(16)) uint8 expandedKey[11 * 16];
AESNI128_KeyExpansionEncrypt(key, expandedKey);
// encrypt single ECB block
__m128i feedback;
feedback = _mm_loadu_si128(&((__m128i*)input)[0]);
feedback = _mm_xor_si128(feedback, ((__m128i*)expandedKey)[0]);
feedback = _mm_aesenc_si128(feedback, ((__m128i*)expandedKey)[1]);
feedback = _mm_aesenc_si128(feedback, ((__m128i*)expandedKey)[2]);
feedback = _mm_aesenc_si128(feedback, ((__m128i*)expandedKey)[3]);
feedback = _mm_aesenc_si128(feedback, ((__m128i*)expandedKey)[4]);
feedback = _mm_aesenc_si128(feedback, ((__m128i*)expandedKey)[5]);
feedback = _mm_aesenc_si128(feedback, ((__m128i*)expandedKey)[6]);
feedback = _mm_aesenc_si128(feedback, ((__m128i*)expandedKey)[7]);
feedback = _mm_aesenc_si128(feedback, ((__m128i*)expandedKey)[8]);
feedback = _mm_aesenc_si128(feedback, ((__m128i*)expandedKey)[9]);
feedback = _mm_aesenclast_si128(feedback, ((__m128i*)expandedKey)[10]);
_mm_storeu_si128(&((__m128i*)output)[0], feedback);
}
void(*AES128_ECB_encrypt)(uint8* input, const uint8* key, uint8* output);
void (*AES128_CBC_decrypt)(uint8* output, uint8* input, uint32 length, const uint8* key, const uint8* iv) = nullptr;
// AES128-CTR encrypt/decrypt
void AES128CTR_transform(uint8* data, sint32 length, uint8* key, uint8* nonceIv)
{
for (sint32 i = 0; i < length; i += 16)
{
uint8* d = data + i;
uint8 tempArray[16];
AES128_ECB_encrypt(nonceIv, key, tempArray);
for (sint32 f = 0; f < 16; f++)
{
d[f] ^= tempArray[f];
}
// increase nonce
*(uint32*)(nonceIv + 0xC) = _swapEndianU32(_swapEndianU32(*(uint32*)(nonceIv + 0xC)) + 1);
if (*(uint32*)(nonceIv + 0xC) == 0)
{
*(uint32*)(nonceIv + 0x8) = _swapEndianU32(_swapEndianU32(*(uint32*)(nonceIv + 0x8)) + 1);
if (*(uint32*)(nonceIv + 0x8) == 0)
{
*(uint32*)(nonceIv + 0x4) = _swapEndianU32(_swapEndianU32(*(uint32*)(nonceIv + 0x4)) + 1);
if (*(uint32*)(nonceIv + 0x4) == 0)
{
*(uint32*)(nonceIv + 0) = _swapEndianU32(_swapEndianU32(*(uint32*)(nonceIv + 0)) + 1);
}
}
}
}
}
void AES128_init()
{
for (uint32 i = 0; i <= 0xFF; i++)
{
uint32 vE = Multiply((uint8)(i & 0xFF), 0x0E) & 0xFF;
uint32 v9 = Multiply((uint8)(i & 0xFF), 0x09) & 0xFF;
uint32 vD = Multiply((uint8)(i & 0xFF), 0x0D) & 0xFF;
uint32 vB = Multiply((uint8)(i & 0xFF), 0x0B) & 0xFF;
lookupTable_multiply[i] = (vE << 0) | (v9 << 8) | (vD << 16) | (vB << 24);
}
// check if AES-NI is available
int v[4];
__cpuid(v, 1);
useAESNI = (v[2] & 0x2000000) != 0;
if (useAESNI)
{
// AES-NI implementation
AES128_CBC_decrypt = __aesni__AES128_CBC_decrypt;
AES128_ECB_encrypt = __aesni__AES128_ECB_encrypt;
}
else
{
// basic software implementation
AES128_CBC_decrypt = __soft__AES128_CBC_decrypt;
AES128_ECB_encrypt = __soft__AES128_ECB_encrypt;
}
}
bool AES128_useAESNI()
{
return useAESNI;
}

19
src/util/crypto/aes128.h Normal file
View file

@ -0,0 +1,19 @@
#ifndef _AES_H_
#define _AES_H_
void AES128_init();
bool AES128_useAESNI();
extern void(*AES128_ECB_encrypt)(uint8* input, const uint8* key, uint8* output);
void AES128_ECB_decrypt(uint8* input, const uint8* key, uint8 *output);
void AES128_CBC_encrypt(uint8* output, uint8* input, uint32 length, const uint8* key, const uint8* iv);
extern void(*AES128_CBC_decrypt)(uint8* output, uint8* input, uint32 length, const uint8* key, const uint8* iv);
void AES128_CBC_decrypt_updateIV(uint8* output, uint8* input, uint32 length, const uint8* key, uint8* iv);
void AES128CTR_transform(uint8* data, sint32 length, uint8* key, uint8* nonceIv);
#endif //_AES_H_

380
src/util/crypto/crc32.cpp Normal file
View file

@ -0,0 +1,380 @@
#include "crc32.h"
#if defined(_MSC_VER) || defined(__MINGW32__)
#define __LITTLE_ENDIAN 1234
#define __BIG_ENDIAN 4321
#define __BYTE_ORDER __LITTLE_ENDIAN
#include <xmmintrin.h>
#ifdef __MINGW32__
#define PREFETCH(location) __builtin_prefetch(location)
#else
#define PREFETCH(location) _mm_prefetch(location, _MM_HINT_T0)
#endif
#else
// defines __BYTE_ORDER as __LITTLE_ENDIAN or __BIG_ENDIAN
#include <sys/param.h>
#ifdef __GNUC__
#define PREFETCH(location) __builtin_prefetch(location)
#else
// no prefetching
#define PREFETCH(location) ;
#endif
#endif
unsigned int Crc32Lookup[8][256] =
{
{
0x00000000,0x77073096,0xEE0E612C,0x990951BA,0x076DC419,0x706AF48F,0xE963A535,0x9E6495A3,
0x0EDB8832,0x79DCB8A4,0xE0D5E91E,0x97D2D988,0x09B64C2B,0x7EB17CBD,0xE7B82D07,0x90BF1D91,
0x1DB71064,0x6AB020F2,0xF3B97148,0x84BE41DE,0x1ADAD47D,0x6DDDE4EB,0xF4D4B551,0x83D385C7,
0x136C9856,0x646BA8C0,0xFD62F97A,0x8A65C9EC,0x14015C4F,0x63066CD9,0xFA0F3D63,0x8D080DF5,
0x3B6E20C8,0x4C69105E,0xD56041E4,0xA2677172,0x3C03E4D1,0x4B04D447,0xD20D85FD,0xA50AB56B,
0x35B5A8FA,0x42B2986C,0xDBBBC9D6,0xACBCF940,0x32D86CE3,0x45DF5C75,0xDCD60DCF,0xABD13D59,
0x26D930AC,0x51DE003A,0xC8D75180,0xBFD06116,0x21B4F4B5,0x56B3C423,0xCFBA9599,0xB8BDA50F,
0x2802B89E,0x5F058808,0xC60CD9B2,0xB10BE924,0x2F6F7C87,0x58684C11,0xC1611DAB,0xB6662D3D,
0x76DC4190,0x01DB7106,0x98D220BC,0xEFD5102A,0x71B18589,0x06B6B51F,0x9FBFE4A5,0xE8B8D433,
0x7807C9A2,0x0F00F934,0x9609A88E,0xE10E9818,0x7F6A0DBB,0x086D3D2D,0x91646C97,0xE6635C01,
0x6B6B51F4,0x1C6C6162,0x856530D8,0xF262004E,0x6C0695ED,0x1B01A57B,0x8208F4C1,0xF50FC457,
0x65B0D9C6,0x12B7E950,0x8BBEB8EA,0xFCB9887C,0x62DD1DDF,0x15DA2D49,0x8CD37CF3,0xFBD44C65,
0x4DB26158,0x3AB551CE,0xA3BC0074,0xD4BB30E2,0x4ADFA541,0x3DD895D7,0xA4D1C46D,0xD3D6F4FB,
0x4369E96A,0x346ED9FC,0xAD678846,0xDA60B8D0,0x44042D73,0x33031DE5,0xAA0A4C5F,0xDD0D7CC9,
0x5005713C,0x270241AA,0xBE0B1010,0xC90C2086,0x5768B525,0x206F85B3,0xB966D409,0xCE61E49F,
0x5EDEF90E,0x29D9C998,0xB0D09822,0xC7D7A8B4,0x59B33D17,0x2EB40D81,0xB7BD5C3B,0xC0BA6CAD,
0xEDB88320,0x9ABFB3B6,0x03B6E20C,0x74B1D29A,0xEAD54739,0x9DD277AF,0x04DB2615,0x73DC1683,
0xE3630B12,0x94643B84,0x0D6D6A3E,0x7A6A5AA8,0xE40ECF0B,0x9309FF9D,0x0A00AE27,0x7D079EB1,
0xF00F9344,0x8708A3D2,0x1E01F268,0x6906C2FE,0xF762575D,0x806567CB,0x196C3671,0x6E6B06E7,
0xFED41B76,0x89D32BE0,0x10DA7A5A,0x67DD4ACC,0xF9B9DF6F,0x8EBEEFF9,0x17B7BE43,0x60B08ED5,
0xD6D6A3E8,0xA1D1937E,0x38D8C2C4,0x4FDFF252,0xD1BB67F1,0xA6BC5767,0x3FB506DD,0x48B2364B,
0xD80D2BDA,0xAF0A1B4C,0x36034AF6,0x41047A60,0xDF60EFC3,0xA867DF55,0x316E8EEF,0x4669BE79,
0xCB61B38C,0xBC66831A,0x256FD2A0,0x5268E236,0xCC0C7795,0xBB0B4703,0x220216B9,0x5505262F,
0xC5BA3BBE,0xB2BD0B28,0x2BB45A92,0x5CB36A04,0xC2D7FFA7,0xB5D0CF31,0x2CD99E8B,0x5BDEAE1D,
0x9B64C2B0,0xEC63F226,0x756AA39C,0x026D930A,0x9C0906A9,0xEB0E363F,0x72076785,0x05005713,
0x95BF4A82,0xE2B87A14,0x7BB12BAE,0x0CB61B38,0x92D28E9B,0xE5D5BE0D,0x7CDCEFB7,0x0BDBDF21,
0x86D3D2D4,0xF1D4E242,0x68DDB3F8,0x1FDA836E,0x81BE16CD,0xF6B9265B,0x6FB077E1,0x18B74777,
0x88085AE6,0xFF0F6A70,0x66063BCA,0x11010B5C,0x8F659EFF,0xF862AE69,0x616BFFD3,0x166CCF45,
0xA00AE278,0xD70DD2EE,0x4E048354,0x3903B3C2,0xA7672661,0xD06016F7,0x4969474D,0x3E6E77DB,
0xAED16A4A,0xD9D65ADC,0x40DF0B66,0x37D83BF0,0xA9BCAE53,0xDEBB9EC5,0x47B2CF7F,0x30B5FFE9,
0xBDBDF21C,0xCABAC28A,0x53B39330,0x24B4A3A6,0xBAD03605,0xCDD70693,0x54DE5729,0x23D967BF,
0xB3667A2E,0xC4614AB8,0x5D681B02,0x2A6F2B94,0xB40BBE37,0xC30C8EA1,0x5A05DF1B,0x2D02EF8D,
}
,
{
0x00000000,0x191B3141,0x32366282,0x2B2D53C3,0x646CC504,0x7D77F445,0x565AA786,0x4F4196C7,
0xC8D98A08,0xD1C2BB49,0xFAEFE88A,0xE3F4D9CB,0xACB54F0C,0xB5AE7E4D,0x9E832D8E,0x87981CCF,
0x4AC21251,0x53D92310,0x78F470D3,0x61EF4192,0x2EAED755,0x37B5E614,0x1C98B5D7,0x05838496,
0x821B9859,0x9B00A918,0xB02DFADB,0xA936CB9A,0xE6775D5D,0xFF6C6C1C,0xD4413FDF,0xCD5A0E9E,
0x958424A2,0x8C9F15E3,0xA7B24620,0xBEA97761,0xF1E8E1A6,0xE8F3D0E7,0xC3DE8324,0xDAC5B265,
0x5D5DAEAA,0x44469FEB,0x6F6BCC28,0x7670FD69,0x39316BAE,0x202A5AEF,0x0B07092C,0x121C386D,
0xDF4636F3,0xC65D07B2,0xED705471,0xF46B6530,0xBB2AF3F7,0xA231C2B6,0x891C9175,0x9007A034,
0x179FBCFB,0x0E848DBA,0x25A9DE79,0x3CB2EF38,0x73F379FF,0x6AE848BE,0x41C51B7D,0x58DE2A3C,
0xF0794F05,0xE9627E44,0xC24F2D87,0xDB541CC6,0x94158A01,0x8D0EBB40,0xA623E883,0xBF38D9C2,
0x38A0C50D,0x21BBF44C,0x0A96A78F,0x138D96CE,0x5CCC0009,0x45D73148,0x6EFA628B,0x77E153CA,
0xBABB5D54,0xA3A06C15,0x888D3FD6,0x91960E97,0xDED79850,0xC7CCA911,0xECE1FAD2,0xF5FACB93,
0x7262D75C,0x6B79E61D,0x4054B5DE,0x594F849F,0x160E1258,0x0F152319,0x243870DA,0x3D23419B,
0x65FD6BA7,0x7CE65AE6,0x57CB0925,0x4ED03864,0x0191AEA3,0x188A9FE2,0x33A7CC21,0x2ABCFD60,
0xAD24E1AF,0xB43FD0EE,0x9F12832D,0x8609B26C,0xC94824AB,0xD05315EA,0xFB7E4629,0xE2657768,
0x2F3F79F6,0x362448B7,0x1D091B74,0x04122A35,0x4B53BCF2,0x52488DB3,0x7965DE70,0x607EEF31,
0xE7E6F3FE,0xFEFDC2BF,0xD5D0917C,0xCCCBA03D,0x838A36FA,0x9A9107BB,0xB1BC5478,0xA8A76539,
0x3B83984B,0x2298A90A,0x09B5FAC9,0x10AECB88,0x5FEF5D4F,0x46F46C0E,0x6DD93FCD,0x74C20E8C,
0xF35A1243,0xEA412302,0xC16C70C1,0xD8774180,0x9736D747,0x8E2DE606,0xA500B5C5,0xBC1B8484,
0x71418A1A,0x685ABB5B,0x4377E898,0x5A6CD9D9,0x152D4F1E,0x0C367E5F,0x271B2D9C,0x3E001CDD,
0xB9980012,0xA0833153,0x8BAE6290,0x92B553D1,0xDDF4C516,0xC4EFF457,0xEFC2A794,0xF6D996D5,
0xAE07BCE9,0xB71C8DA8,0x9C31DE6B,0x852AEF2A,0xCA6B79ED,0xD37048AC,0xF85D1B6F,0xE1462A2E,
0x66DE36E1,0x7FC507A0,0x54E85463,0x4DF36522,0x02B2F3E5,0x1BA9C2A4,0x30849167,0x299FA026,
0xE4C5AEB8,0xFDDE9FF9,0xD6F3CC3A,0xCFE8FD7B,0x80A96BBC,0x99B25AFD,0xB29F093E,0xAB84387F,
0x2C1C24B0,0x350715F1,0x1E2A4632,0x07317773,0x4870E1B4,0x516BD0F5,0x7A468336,0x635DB277,
0xCBFAD74E,0xD2E1E60F,0xF9CCB5CC,0xE0D7848D,0xAF96124A,0xB68D230B,0x9DA070C8,0x84BB4189,
0x03235D46,0x1A386C07,0x31153FC4,0x280E0E85,0x674F9842,0x7E54A903,0x5579FAC0,0x4C62CB81,
0x8138C51F,0x9823F45E,0xB30EA79D,0xAA1596DC,0xE554001B,0xFC4F315A,0xD7626299,0xCE7953D8,
0x49E14F17,0x50FA7E56,0x7BD72D95,0x62CC1CD4,0x2D8D8A13,0x3496BB52,0x1FBBE891,0x06A0D9D0,
0x5E7EF3EC,0x4765C2AD,0x6C48916E,0x7553A02F,0x3A1236E8,0x230907A9,0x0824546A,0x113F652B,
0x96A779E4,0x8FBC48A5,0xA4911B66,0xBD8A2A27,0xF2CBBCE0,0xEBD08DA1,0xC0FDDE62,0xD9E6EF23,
0x14BCE1BD,0x0DA7D0FC,0x268A833F,0x3F91B27E,0x70D024B9,0x69CB15F8,0x42E6463B,0x5BFD777A,
0xDC656BB5,0xC57E5AF4,0xEE530937,0xF7483876,0xB809AEB1,0xA1129FF0,0x8A3FCC33,0x9324FD72,
},
{
0x00000000,0x01C26A37,0x0384D46E,0x0246BE59,0x0709A8DC,0x06CBC2EB,0x048D7CB2,0x054F1685,
0x0E1351B8,0x0FD13B8F,0x0D9785D6,0x0C55EFE1,0x091AF964,0x08D89353,0x0A9E2D0A,0x0B5C473D,
0x1C26A370,0x1DE4C947,0x1FA2771E,0x1E601D29,0x1B2F0BAC,0x1AED619B,0x18ABDFC2,0x1969B5F5,
0x1235F2C8,0x13F798FF,0x11B126A6,0x10734C91,0x153C5A14,0x14FE3023,0x16B88E7A,0x177AE44D,
0x384D46E0,0x398F2CD7,0x3BC9928E,0x3A0BF8B9,0x3F44EE3C,0x3E86840B,0x3CC03A52,0x3D025065,
0x365E1758,0x379C7D6F,0x35DAC336,0x3418A901,0x3157BF84,0x3095D5B3,0x32D36BEA,0x331101DD,
0x246BE590,0x25A98FA7,0x27EF31FE,0x262D5BC9,0x23624D4C,0x22A0277B,0x20E69922,0x2124F315,
0x2A78B428,0x2BBADE1F,0x29FC6046,0x283E0A71,0x2D711CF4,0x2CB376C3,0x2EF5C89A,0x2F37A2AD,
0x709A8DC0,0x7158E7F7,0x731E59AE,0x72DC3399,0x7793251C,0x76514F2B,0x7417F172,0x75D59B45,
0x7E89DC78,0x7F4BB64F,0x7D0D0816,0x7CCF6221,0x798074A4,0x78421E93,0x7A04A0CA,0x7BC6CAFD,
0x6CBC2EB0,0x6D7E4487,0x6F38FADE,0x6EFA90E9,0x6BB5866C,0x6A77EC5B,0x68315202,0x69F33835,
0x62AF7F08,0x636D153F,0x612BAB66,0x60E9C151,0x65A6D7D4,0x6464BDE3,0x662203BA,0x67E0698D,
0x48D7CB20,0x4915A117,0x4B531F4E,0x4A917579,0x4FDE63FC,0x4E1C09CB,0x4C5AB792,0x4D98DDA5,
0x46C49A98,0x4706F0AF,0x45404EF6,0x448224C1,0x41CD3244,0x400F5873,0x4249E62A,0x438B8C1D,
0x54F16850,0x55330267,0x5775BC3E,0x56B7D609,0x53F8C08C,0x523AAABB,0x507C14E2,0x51BE7ED5,
0x5AE239E8,0x5B2053DF,0x5966ED86,0x58A487B1,0x5DEB9134,0x5C29FB03,0x5E6F455A,0x5FAD2F6D,
0xE1351B80,0xE0F771B7,0xE2B1CFEE,0xE373A5D9,0xE63CB35C,0xE7FED96B,0xE5B86732,0xE47A0D05,
0xEF264A38,0xEEE4200F,0xECA29E56,0xED60F461,0xE82FE2E4,0xE9ED88D3,0xEBAB368A,0xEA695CBD,
0xFD13B8F0,0xFCD1D2C7,0xFE976C9E,0xFF5506A9,0xFA1A102C,0xFBD87A1B,0xF99EC442,0xF85CAE75,
0xF300E948,0xF2C2837F,0xF0843D26,0xF1465711,0xF4094194,0xF5CB2BA3,0xF78D95FA,0xF64FFFCD,
0xD9785D60,0xD8BA3757,0xDAFC890E,0xDB3EE339,0xDE71F5BC,0xDFB39F8B,0xDDF521D2,0xDC374BE5,
0xD76B0CD8,0xD6A966EF,0xD4EFD8B6,0xD52DB281,0xD062A404,0xD1A0CE33,0xD3E6706A,0xD2241A5D,
0xC55EFE10,0xC49C9427,0xC6DA2A7E,0xC7184049,0xC25756CC,0xC3953CFB,0xC1D382A2,0xC011E895,
0xCB4DAFA8,0xCA8FC59F,0xC8C97BC6,0xC90B11F1,0xCC440774,0xCD866D43,0xCFC0D31A,0xCE02B92D,
0x91AF9640,0x906DFC77,0x922B422E,0x93E92819,0x96A63E9C,0x976454AB,0x9522EAF2,0x94E080C5,
0x9FBCC7F8,0x9E7EADCF,0x9C381396,0x9DFA79A1,0x98B56F24,0x99770513,0x9B31BB4A,0x9AF3D17D,
0x8D893530,0x8C4B5F07,0x8E0DE15E,0x8FCF8B69,0x8A809DEC,0x8B42F7DB,0x89044982,0x88C623B5,
0x839A6488,0x82580EBF,0x801EB0E6,0x81DCDAD1,0x8493CC54,0x8551A663,0x8717183A,0x86D5720D,
0xA9E2D0A0,0xA820BA97,0xAA6604CE,0xABA46EF9,0xAEEB787C,0xAF29124B,0xAD6FAC12,0xACADC625,
0xA7F18118,0xA633EB2F,0xA4755576,0xA5B73F41,0xA0F829C4,0xA13A43F3,0xA37CFDAA,0xA2BE979D,
0xB5C473D0,0xB40619E7,0xB640A7BE,0xB782CD89,0xB2CDDB0C,0xB30FB13B,0xB1490F62,0xB08B6555,
0xBBD72268,0xBA15485F,0xB853F606,0xB9919C31,0xBCDE8AB4,0xBD1CE083,0xBF5A5EDA,0xBE9834ED,
},
{
0x00000000,0xB8BC6765,0xAA09C88B,0x12B5AFEE,0x8F629757,0x37DEF032,0x256B5FDC,0x9DD738B9,
0xC5B428EF,0x7D084F8A,0x6FBDE064,0xD7018701,0x4AD6BFB8,0xF26AD8DD,0xE0DF7733,0x58631056,
0x5019579F,0xE8A530FA,0xFA109F14,0x42ACF871,0xDF7BC0C8,0x67C7A7AD,0x75720843,0xCDCE6F26,
0x95AD7F70,0x2D111815,0x3FA4B7FB,0x8718D09E,0x1ACFE827,0xA2738F42,0xB0C620AC,0x087A47C9,
0xA032AF3E,0x188EC85B,0x0A3B67B5,0xB28700D0,0x2F503869,0x97EC5F0C,0x8559F0E2,0x3DE59787,
0x658687D1,0xDD3AE0B4,0xCF8F4F5A,0x7733283F,0xEAE41086,0x525877E3,0x40EDD80D,0xF851BF68,
0xF02BF8A1,0x48979FC4,0x5A22302A,0xE29E574F,0x7F496FF6,0xC7F50893,0xD540A77D,0x6DFCC018,
0x359FD04E,0x8D23B72B,0x9F9618C5,0x272A7FA0,0xBAFD4719,0x0241207C,0x10F48F92,0xA848E8F7,
0x9B14583D,0x23A83F58,0x311D90B6,0x89A1F7D3,0x1476CF6A,0xACCAA80F,0xBE7F07E1,0x06C36084,
0x5EA070D2,0xE61C17B7,0xF4A9B859,0x4C15DF3C,0xD1C2E785,0x697E80E0,0x7BCB2F0E,0xC377486B,
0xCB0D0FA2,0x73B168C7,0x6104C729,0xD9B8A04C,0x446F98F5,0xFCD3FF90,0xEE66507E,0x56DA371B,
0x0EB9274D,0xB6054028,0xA4B0EFC6,0x1C0C88A3,0x81DBB01A,0x3967D77F,0x2BD27891,0x936E1FF4,
0x3B26F703,0x839A9066,0x912F3F88,0x299358ED,0xB4446054,0x0CF80731,0x1E4DA8DF,0xA6F1CFBA,
0xFE92DFEC,0x462EB889,0x549B1767,0xEC277002,0x71F048BB,0xC94C2FDE,0xDBF98030,0x6345E755,
0x6B3FA09C,0xD383C7F9,0xC1366817,0x798A0F72,0xE45D37CB,0x5CE150AE,0x4E54FF40,0xF6E89825,
0xAE8B8873,0x1637EF16,0x048240F8,0xBC3E279D,0x21E91F24,0x99557841,0x8BE0D7AF,0x335CB0CA,
0xED59B63B,0x55E5D15E,0x47507EB0,0xFFEC19D5,0x623B216C,0xDA874609,0xC832E9E7,0x708E8E82,
0x28ED9ED4,0x9051F9B1,0x82E4565F,0x3A58313A,0xA78F0983,0x1F336EE6,0x0D86C108,0xB53AA66D,
0xBD40E1A4,0x05FC86C1,0x1749292F,0xAFF54E4A,0x322276F3,0x8A9E1196,0x982BBE78,0x2097D91D,
0x78F4C94B,0xC048AE2E,0xD2FD01C0,0x6A4166A5,0xF7965E1C,0x4F2A3979,0x5D9F9697,0xE523F1F2,
0x4D6B1905,0xF5D77E60,0xE762D18E,0x5FDEB6EB,0xC2098E52,0x7AB5E937,0x680046D9,0xD0BC21BC,
0x88DF31EA,0x3063568F,0x22D6F961,0x9A6A9E04,0x07BDA6BD,0xBF01C1D8,0xADB46E36,0x15080953,
0x1D724E9A,0xA5CE29FF,0xB77B8611,0x0FC7E174,0x9210D9CD,0x2AACBEA8,0x38191146,0x80A57623,
0xD8C66675,0x607A0110,0x72CFAEFE,0xCA73C99B,0x57A4F122,0xEF189647,0xFDAD39A9,0x45115ECC,
0x764DEE06,0xCEF18963,0xDC44268D,0x64F841E8,0xF92F7951,0x41931E34,0x5326B1DA,0xEB9AD6BF,
0xB3F9C6E9,0x0B45A18C,0x19F00E62,0xA14C6907,0x3C9B51BE,0x842736DB,0x96929935,0x2E2EFE50,
0x2654B999,0x9EE8DEFC,0x8C5D7112,0x34E11677,0xA9362ECE,0x118A49AB,0x033FE645,0xBB838120,
0xE3E09176,0x5B5CF613,0x49E959FD,0xF1553E98,0x6C820621,0xD43E6144,0xC68BCEAA,0x7E37A9CF,
0xD67F4138,0x6EC3265D,0x7C7689B3,0xC4CAEED6,0x591DD66F,0xE1A1B10A,0xF3141EE4,0x4BA87981,
0x13CB69D7,0xAB770EB2,0xB9C2A15C,0x017EC639,0x9CA9FE80,0x241599E5,0x36A0360B,0x8E1C516E,
0x866616A7,0x3EDA71C2,0x2C6FDE2C,0x94D3B949,0x090481F0,0xB1B8E695,0xA30D497B,0x1BB12E1E,
0x43D23E48,0xFB6E592D,0xE9DBF6C3,0x516791A6,0xCCB0A91F,0x740CCE7A,0x66B96194,0xDE0506F1,
}
,
{
0x00000000,0x3D6029B0,0x7AC05360,0x47A07AD0,0xF580A6C0,0xC8E08F70,0x8F40F5A0,0xB220DC10,
0x30704BC1,0x0D106271,0x4AB018A1,0x77D03111,0xC5F0ED01,0xF890C4B1,0xBF30BE61,0x825097D1,
0x60E09782,0x5D80BE32,0x1A20C4E2,0x2740ED52,0x95603142,0xA80018F2,0xEFA06222,0xD2C04B92,
0x5090DC43,0x6DF0F5F3,0x2A508F23,0x1730A693,0xA5107A83,0x98705333,0xDFD029E3,0xE2B00053,
0xC1C12F04,0xFCA106B4,0xBB017C64,0x866155D4,0x344189C4,0x0921A074,0x4E81DAA4,0x73E1F314,
0xF1B164C5,0xCCD14D75,0x8B7137A5,0xB6111E15,0x0431C205,0x3951EBB5,0x7EF19165,0x4391B8D5,
0xA121B886,0x9C419136,0xDBE1EBE6,0xE681C256,0x54A11E46,0x69C137F6,0x2E614D26,0x13016496,
0x9151F347,0xAC31DAF7,0xEB91A027,0xD6F18997,0x64D15587,0x59B17C37,0x1E1106E7,0x23712F57,
0x58F35849,0x659371F9,0x22330B29,0x1F532299,0xAD73FE89,0x9013D739,0xD7B3ADE9,0xEAD38459,
0x68831388,0x55E33A38,0x124340E8,0x2F236958,0x9D03B548,0xA0639CF8,0xE7C3E628,0xDAA3CF98,
0x3813CFCB,0x0573E67B,0x42D39CAB,0x7FB3B51B,0xCD93690B,0xF0F340BB,0xB7533A6B,0x8A3313DB,
0x0863840A,0x3503ADBA,0x72A3D76A,0x4FC3FEDA,0xFDE322CA,0xC0830B7A,0x872371AA,0xBA43581A,
0x9932774D,0xA4525EFD,0xE3F2242D,0xDE920D9D,0x6CB2D18D,0x51D2F83D,0x167282ED,0x2B12AB5D,
0xA9423C8C,0x9422153C,0xD3826FEC,0xEEE2465C,0x5CC29A4C,0x61A2B3FC,0x2602C92C,0x1B62E09C,
0xF9D2E0CF,0xC4B2C97F,0x8312B3AF,0xBE729A1F,0x0C52460F,0x31326FBF,0x7692156F,0x4BF23CDF,
0xC9A2AB0E,0xF4C282BE,0xB362F86E,0x8E02D1DE,0x3C220DCE,0x0142247E,0x46E25EAE,0x7B82771E,
0xB1E6B092,0x8C869922,0xCB26E3F2,0xF646CA42,0x44661652,0x79063FE2,0x3EA64532,0x03C66C82,
0x8196FB53,0xBCF6D2E3,0xFB56A833,0xC6368183,0x74165D93,0x49767423,0x0ED60EF3,0x33B62743,
0xD1062710,0xEC660EA0,0xABC67470,0x96A65DC0,0x248681D0,0x19E6A860,0x5E46D2B0,0x6326FB00,
0xE1766CD1,0xDC164561,0x9BB63FB1,0xA6D61601,0x14F6CA11,0x2996E3A1,0x6E369971,0x5356B0C1,
0x70279F96,0x4D47B626,0x0AE7CCF6,0x3787E546,0x85A73956,0xB8C710E6,0xFF676A36,0xC2074386,
0x4057D457,0x7D37FDE7,0x3A978737,0x07F7AE87,0xB5D77297,0x88B75B27,0xCF1721F7,0xF2770847,
0x10C70814,0x2DA721A4,0x6A075B74,0x576772C4,0xE547AED4,0xD8278764,0x9F87FDB4,0xA2E7D404,
0x20B743D5,0x1DD76A65,0x5A7710B5,0x67173905,0xD537E515,0xE857CCA5,0xAFF7B675,0x92979FC5,
0xE915E8DB,0xD475C16B,0x93D5BBBB,0xAEB5920B,0x1C954E1B,0x21F567AB,0x66551D7B,0x5B3534CB,
0xD965A31A,0xE4058AAA,0xA3A5F07A,0x9EC5D9CA,0x2CE505DA,0x11852C6A,0x562556BA,0x6B457F0A,
0x89F57F59,0xB49556E9,0xF3352C39,0xCE550589,0x7C75D999,0x4115F029,0x06B58AF9,0x3BD5A349,
0xB9853498,0x84E51D28,0xC34567F8,0xFE254E48,0x4C059258,0x7165BBE8,0x36C5C138,0x0BA5E888,
0x28D4C7DF,0x15B4EE6F,0x521494BF,0x6F74BD0F,0xDD54611F,0xE03448AF,0xA794327F,0x9AF41BCF,
0x18A48C1E,0x25C4A5AE,0x6264DF7E,0x5F04F6CE,0xED242ADE,0xD044036E,0x97E479BE,0xAA84500E,
0x4834505D,0x755479ED,0x32F4033D,0x0F942A8D,0xBDB4F69D,0x80D4DF2D,0xC774A5FD,0xFA148C4D,
0x78441B9C,0x4524322C,0x028448FC,0x3FE4614C,0x8DC4BD5C,0xB0A494EC,0xF704EE3C,0xCA64C78C,
},
{
0x00000000,0xCB5CD3A5,0x4DC8A10B,0x869472AE,0x9B914216,0x50CD91B3,0xD659E31D,0x1D0530B8,
0xEC53826D,0x270F51C8,0xA19B2366,0x6AC7F0C3,0x77C2C07B,0xBC9E13DE,0x3A0A6170,0xF156B2D5,
0x03D6029B,0xC88AD13E,0x4E1EA390,0x85427035,0x9847408D,0x531B9328,0xD58FE186,0x1ED33223,
0xEF8580F6,0x24D95353,0xA24D21FD,0x6911F258,0x7414C2E0,0xBF481145,0x39DC63EB,0xF280B04E,
0x07AC0536,0xCCF0D693,0x4A64A43D,0x81387798,0x9C3D4720,0x57619485,0xD1F5E62B,0x1AA9358E,
0xEBFF875B,0x20A354FE,0xA6372650,0x6D6BF5F5,0x706EC54D,0xBB3216E8,0x3DA66446,0xF6FAB7E3,
0x047A07AD,0xCF26D408,0x49B2A6A6,0x82EE7503,0x9FEB45BB,0x54B7961E,0xD223E4B0,0x197F3715,
0xE82985C0,0x23755665,0xA5E124CB,0x6EBDF76E,0x73B8C7D6,0xB8E41473,0x3E7066DD,0xF52CB578,
0x0F580A6C,0xC404D9C9,0x4290AB67,0x89CC78C2,0x94C9487A,0x5F959BDF,0xD901E971,0x125D3AD4,
0xE30B8801,0x28575BA4,0xAEC3290A,0x659FFAAF,0x789ACA17,0xB3C619B2,0x35526B1C,0xFE0EB8B9,
0x0C8E08F7,0xC7D2DB52,0x4146A9FC,0x8A1A7A59,0x971F4AE1,0x5C439944,0xDAD7EBEA,0x118B384F,
0xE0DD8A9A,0x2B81593F,0xAD152B91,0x6649F834,0x7B4CC88C,0xB0101B29,0x36846987,0xFDD8BA22,
0x08F40F5A,0xC3A8DCFF,0x453CAE51,0x8E607DF4,0x93654D4C,0x58399EE9,0xDEADEC47,0x15F13FE2,
0xE4A78D37,0x2FFB5E92,0xA96F2C3C,0x6233FF99,0x7F36CF21,0xB46A1C84,0x32FE6E2A,0xF9A2BD8F,
0x0B220DC1,0xC07EDE64,0x46EAACCA,0x8DB67F6F,0x90B34FD7,0x5BEF9C72,0xDD7BEEDC,0x16273D79,
0xE7718FAC,0x2C2D5C09,0xAAB92EA7,0x61E5FD02,0x7CE0CDBA,0xB7BC1E1F,0x31286CB1,0xFA74BF14,
0x1EB014D8,0xD5ECC77D,0x5378B5D3,0x98246676,0x852156CE,0x4E7D856B,0xC8E9F7C5,0x03B52460,
0xF2E396B5,0x39BF4510,0xBF2B37BE,0x7477E41B,0x6972D4A3,0xA22E0706,0x24BA75A8,0xEFE6A60D,
0x1D661643,0xD63AC5E6,0x50AEB748,0x9BF264ED,0x86F75455,0x4DAB87F0,0xCB3FF55E,0x006326FB,
0xF135942E,0x3A69478B,0xBCFD3525,0x77A1E680,0x6AA4D638,0xA1F8059D,0x276C7733,0xEC30A496,
0x191C11EE,0xD240C24B,0x54D4B0E5,0x9F886340,0x828D53F8,0x49D1805D,0xCF45F2F3,0x04192156,
0xF54F9383,0x3E134026,0xB8873288,0x73DBE12D,0x6EDED195,0xA5820230,0x2316709E,0xE84AA33B,
0x1ACA1375,0xD196C0D0,0x5702B27E,0x9C5E61DB,0x815B5163,0x4A0782C6,0xCC93F068,0x07CF23CD,
0xF6999118,0x3DC542BD,0xBB513013,0x700DE3B6,0x6D08D30E,0xA65400AB,0x20C07205,0xEB9CA1A0,
0x11E81EB4,0xDAB4CD11,0x5C20BFBF,0x977C6C1A,0x8A795CA2,0x41258F07,0xC7B1FDA9,0x0CED2E0C,
0xFDBB9CD9,0x36E74F7C,0xB0733DD2,0x7B2FEE77,0x662ADECF,0xAD760D6A,0x2BE27FC4,0xE0BEAC61,
0x123E1C2F,0xD962CF8A,0x5FF6BD24,0x94AA6E81,0x89AF5E39,0x42F38D9C,0xC467FF32,0x0F3B2C97,
0xFE6D9E42,0x35314DE7,0xB3A53F49,0x78F9ECEC,0x65FCDC54,0xAEA00FF1,0x28347D5F,0xE368AEFA,
0x16441B82,0xDD18C827,0x5B8CBA89,0x90D0692C,0x8DD55994,0x46898A31,0xC01DF89F,0x0B412B3A,
0xFA1799EF,0x314B4A4A,0xB7DF38E4,0x7C83EB41,0x6186DBF9,0xAADA085C,0x2C4E7AF2,0xE712A957,
0x15921919,0xDECECABC,0x585AB812,0x93066BB7,0x8E035B0F,0x455F88AA,0xC3CBFA04,0x089729A1,
0xF9C19B74,0x329D48D1,0xB4093A7F,0x7F55E9DA,0x6250D962,0xA90C0AC7,0x2F987869,0xE4C4ABCC,
},
{
0x00000000,0xA6770BB4,0x979F1129,0x31E81A9D,0xF44F2413,0x52382FA7,0x63D0353A,0xC5A73E8E,
0x33EF4E67,0x959845D3,0xA4705F4E,0x020754FA,0xC7A06A74,0x61D761C0,0x503F7B5D,0xF64870E9,
0x67DE9CCE,0xC1A9977A,0xF0418DE7,0x56368653,0x9391B8DD,0x35E6B369,0x040EA9F4,0xA279A240,
0x5431D2A9,0xF246D91D,0xC3AEC380,0x65D9C834,0xA07EF6BA,0x0609FD0E,0x37E1E793,0x9196EC27,
0xCFBD399C,0x69CA3228,0x582228B5,0xFE552301,0x3BF21D8F,0x9D85163B,0xAC6D0CA6,0x0A1A0712,
0xFC5277FB,0x5A257C4F,0x6BCD66D2,0xCDBA6D66,0x081D53E8,0xAE6A585C,0x9F8242C1,0x39F54975,
0xA863A552,0x0E14AEE6,0x3FFCB47B,0x998BBFCF,0x5C2C8141,0xFA5B8AF5,0xCBB39068,0x6DC49BDC,
0x9B8CEB35,0x3DFBE081,0x0C13FA1C,0xAA64F1A8,0x6FC3CF26,0xC9B4C492,0xF85CDE0F,0x5E2BD5BB,
0x440B7579,0xE27C7ECD,0xD3946450,0x75E36FE4,0xB044516A,0x16335ADE,0x27DB4043,0x81AC4BF7,
0x77E43B1E,0xD19330AA,0xE07B2A37,0x460C2183,0x83AB1F0D,0x25DC14B9,0x14340E24,0xB2430590,
0x23D5E9B7,0x85A2E203,0xB44AF89E,0x123DF32A,0xD79ACDA4,0x71EDC610,0x4005DC8D,0xE672D739,
0x103AA7D0,0xB64DAC64,0x87A5B6F9,0x21D2BD4D,0xE47583C3,0x42028877,0x73EA92EA,0xD59D995E,
0x8BB64CE5,0x2DC14751,0x1C295DCC,0xBA5E5678,0x7FF968F6,0xD98E6342,0xE86679DF,0x4E11726B,
0xB8590282,0x1E2E0936,0x2FC613AB,0x89B1181F,0x4C162691,0xEA612D25,0xDB8937B8,0x7DFE3C0C,
0xEC68D02B,0x4A1FDB9F,0x7BF7C102,0xDD80CAB6,0x1827F438,0xBE50FF8C,0x8FB8E511,0x29CFEEA5,
0xDF879E4C,0x79F095F8,0x48188F65,0xEE6F84D1,0x2BC8BA5F,0x8DBFB1EB,0xBC57AB76,0x1A20A0C2,
0x8816EAF2,0x2E61E146,0x1F89FBDB,0xB9FEF06F,0x7C59CEE1,0xDA2EC555,0xEBC6DFC8,0x4DB1D47C,
0xBBF9A495,0x1D8EAF21,0x2C66B5BC,0x8A11BE08,0x4FB68086,0xE9C18B32,0xD82991AF,0x7E5E9A1B,
0xEFC8763C,0x49BF7D88,0x78576715,0xDE206CA1,0x1B87522F,0xBDF0599B,0x8C184306,0x2A6F48B2,
0xDC27385B,0x7A5033EF,0x4BB82972,0xEDCF22C6,0x28681C48,0x8E1F17FC,0xBFF70D61,0x198006D5,
0x47ABD36E,0xE1DCD8DA,0xD034C247,0x7643C9F3,0xB3E4F77D,0x1593FCC9,0x247BE654,0x820CEDE0,
0x74449D09,0xD23396BD,0xE3DB8C20,0x45AC8794,0x800BB91A,0x267CB2AE,0x1794A833,0xB1E3A387,
0x20754FA0,0x86024414,0xB7EA5E89,0x119D553D,0xD43A6BB3,0x724D6007,0x43A57A9A,0xE5D2712E,
0x139A01C7,0xB5ED0A73,0x840510EE,0x22721B5A,0xE7D525D4,0x41A22E60,0x704A34FD,0xD63D3F49,
0xCC1D9F8B,0x6A6A943F,0x5B828EA2,0xFDF58516,0x3852BB98,0x9E25B02C,0xAFCDAAB1,0x09BAA105,
0xFFF2D1EC,0x5985DA58,0x686DC0C5,0xCE1ACB71,0x0BBDF5FF,0xADCAFE4B,0x9C22E4D6,0x3A55EF62,
0xABC30345,0x0DB408F1,0x3C5C126C,0x9A2B19D8,0x5F8C2756,0xF9FB2CE2,0xC813367F,0x6E643DCB,
0x982C4D22,0x3E5B4696,0x0FB35C0B,0xA9C457BF,0x6C636931,0xCA146285,0xFBFC7818,0x5D8B73AC,
0x03A0A617,0xA5D7ADA3,0x943FB73E,0x3248BC8A,0xF7EF8204,0x519889B0,0x6070932D,0xC6079899,
0x304FE870,0x9638E3C4,0xA7D0F959,0x01A7F2ED,0xC400CC63,0x6277C7D7,0x539FDD4A,0xF5E8D6FE,
0x647E3AD9,0xC209316D,0xF3E12BF0,0x55962044,0x90311ECA,0x3646157E,0x07AE0FE3,0xA1D90457,
0x579174BE,0xF1E67F0A,0xC00E6597,0x66796E23,0xA3DE50AD,0x05A95B19,0x34414184,0x92364A30,
},
{
0x00000000,0xCCAA009E,0x4225077D,0x8E8F07E3,0x844A0EFA,0x48E00E64,0xC66F0987,0x0AC50919,
0xD3E51BB5,0x1F4F1B2B,0x91C01CC8,0x5D6A1C56,0x57AF154F,0x9B0515D1,0x158A1232,0xD92012AC,
0x7CBB312B,0xB01131B5,0x3E9E3656,0xF23436C8,0xF8F13FD1,0x345B3F4F,0xBAD438AC,0x767E3832,
0xAF5E2A9E,0x63F42A00,0xED7B2DE3,0x21D12D7D,0x2B142464,0xE7BE24FA,0x69312319,0xA59B2387,
0xF9766256,0x35DC62C8,0xBB53652B,0x77F965B5,0x7D3C6CAC,0xB1966C32,0x3F196BD1,0xF3B36B4F,
0x2A9379E3,0xE639797D,0x68B67E9E,0xA41C7E00,0xAED97719,0x62737787,0xECFC7064,0x205670FA,
0x85CD537D,0x496753E3,0xC7E85400,0x0B42549E,0x01875D87,0xCD2D5D19,0x43A25AFA,0x8F085A64,
0x562848C8,0x9A824856,0x140D4FB5,0xD8A74F2B,0xD2624632,0x1EC846AC,0x9047414F,0x5CED41D1,
0x299DC2ED,0xE537C273,0x6BB8C590,0xA712C50E,0xADD7CC17,0x617DCC89,0xEFF2CB6A,0x2358CBF4,
0xFA78D958,0x36D2D9C6,0xB85DDE25,0x74F7DEBB,0x7E32D7A2,0xB298D73C,0x3C17D0DF,0xF0BDD041,
0x5526F3C6,0x998CF358,0x1703F4BB,0xDBA9F425,0xD16CFD3C,0x1DC6FDA2,0x9349FA41,0x5FE3FADF,
0x86C3E873,0x4A69E8ED,0xC4E6EF0E,0x084CEF90,0x0289E689,0xCE23E617,0x40ACE1F4,0x8C06E16A,
0xD0EBA0BB,0x1C41A025,0x92CEA7C6,0x5E64A758,0x54A1AE41,0x980BAEDF,0x1684A93C,0xDA2EA9A2,
0x030EBB0E,0xCFA4BB90,0x412BBC73,0x8D81BCED,0x8744B5F4,0x4BEEB56A,0xC561B289,0x09CBB217,
0xAC509190,0x60FA910E,0xEE7596ED,0x22DF9673,0x281A9F6A,0xE4B09FF4,0x6A3F9817,0xA6959889,
0x7FB58A25,0xB31F8ABB,0x3D908D58,0xF13A8DC6,0xFBFF84DF,0x37558441,0xB9DA83A2,0x7570833C,
0x533B85DA,0x9F918544,0x111E82A7,0xDDB48239,0xD7718B20,0x1BDB8BBE,0x95548C5D,0x59FE8CC3,
0x80DE9E6F,0x4C749EF1,0xC2FB9912,0x0E51998C,0x04949095,0xC83E900B,0x46B197E8,0x8A1B9776,
0x2F80B4F1,0xE32AB46F,0x6DA5B38C,0xA10FB312,0xABCABA0B,0x6760BA95,0xE9EFBD76,0x2545BDE8,
0xFC65AF44,0x30CFAFDA,0xBE40A839,0x72EAA8A7,0x782FA1BE,0xB485A120,0x3A0AA6C3,0xF6A0A65D,
0xAA4DE78C,0x66E7E712,0xE868E0F1,0x24C2E06F,0x2E07E976,0xE2ADE9E8,0x6C22EE0B,0xA088EE95,
0x79A8FC39,0xB502FCA7,0x3B8DFB44,0xF727FBDA,0xFDE2F2C3,0x3148F25D,0xBFC7F5BE,0x736DF520,
0xD6F6D6A7,0x1A5CD639,0x94D3D1DA,0x5879D144,0x52BCD85D,0x9E16D8C3,0x1099DF20,0xDC33DFBE,
0x0513CD12,0xC9B9CD8C,0x4736CA6F,0x8B9CCAF1,0x8159C3E8,0x4DF3C376,0xC37CC495,0x0FD6C40B,
0x7AA64737,0xB60C47A9,0x3883404A,0xF42940D4,0xFEEC49CD,0x32464953,0xBCC94EB0,0x70634E2E,
0xA9435C82,0x65E95C1C,0xEB665BFF,0x27CC5B61,0x2D095278,0xE1A352E6,0x6F2C5505,0xA386559B,
0x061D761C,0xCAB77682,0x44387161,0x889271FF,0x825778E6,0x4EFD7878,0xC0727F9B,0x0CD87F05,
0xD5F86DA9,0x19526D37,0x97DD6AD4,0x5B776A4A,0x51B26353,0x9D1863CD,0x1397642E,0xDF3D64B0,
0x83D02561,0x4F7A25FF,0xC1F5221C,0x0D5F2282,0x079A2B9B,0xCB302B05,0x45BF2CE6,0x89152C78,
0x50353ED4,0x9C9F3E4A,0x121039A9,0xDEBA3937,0xD47F302E,0x18D530B0,0x965A3753,0x5AF037CD,
0xFF6B144A,0x33C114D4,0xBD4E1337,0x71E413A9,0x7B211AB0,0xB78B1A2E,0x39041DCD,0xF5AE1D53,
0x2C8E0FFF,0xE0240F61,0x6EAB0882,0xA201081C,0xA8C40105,0x646E019B,0xEAE10678,0x264B06E6,
}
};
/// swap endianess
static inline uint32_t swap(uint32_t x)
{
#if defined(__GNUC__) || defined(__clang__)
return __builtin_bswap32(x);
#else
return (x >> 24) |
((x >> 8) & 0x0000FF00) |
((x << 8) & 0x00FF0000) |
(x << 24);
#endif
}
unsigned int crc32_calc_slice_by_8(unsigned int previousCrc32, const void* data, int length)
{
uint32_t crc = ~previousCrc32; // same as previousCrc32 ^ 0xFFFFFFFF
const uint32_t* current = (const uint32_t*)data;
// process eight bytes at once (Slicing-by-8)
while (length >= 8)
{
#if __BYTE_ORDER == __BIG_ENDIAN
uint32_t one = *current++ ^ swap(crc);
uint32_t two = *current++;
crc = Crc32Lookup[0][two & 0xFF] ^
Crc32Lookup[1][(two >> 8) & 0xFF] ^
Crc32Lookup[2][(two >> 16) & 0xFF] ^
Crc32Lookup[3][(two >> 24) & 0xFF] ^
Crc32Lookup[4][one & 0xFF] ^
Crc32Lookup[5][(one >> 8) & 0xFF] ^
Crc32Lookup[6][(one >> 16) & 0xFF] ^
Crc32Lookup[7][(one >> 24) & 0xFF];
#else
uint32_t one = *current++ ^ crc;
uint32_t two = *current++;
crc = Crc32Lookup[0][(two >> 24) & 0xFF] ^
Crc32Lookup[1][(two >> 16) & 0xFF] ^
Crc32Lookup[2][(two >> 8) & 0xFF] ^
Crc32Lookup[3][two & 0xFF] ^
Crc32Lookup[4][(one >> 24) & 0xFF] ^
Crc32Lookup[5][(one >> 16) & 0xFF] ^
Crc32Lookup[6][(one >> 8) & 0xFF] ^
Crc32Lookup[7][one & 0xFF];
#endif
length -= 8;
}
const uint8_t* currentChar = (const uint8_t*)current;
// remaining 1 to 7 bytes (standard algorithm)
while (length-- != 0)
crc = (crc >> 8) ^ Crc32Lookup[0][(crc & 0xFF) ^ *currentChar++];
return ~crc; // same as crc ^ 0xFFFFFFFF
}
unsigned int crc32_calc(unsigned int c, const void* data, int length)
{
if (length >= 16)
{
return crc32_calc_slice_by_8(c, data, length);
}
unsigned char* p = (unsigned char*)data;
if (length == 0)
return c;
c ^= 0xFFFFFFFF;
while (length)
{
unsigned char temp = *p;
temp ^= (unsigned char)c;
c = (c >> 8) ^ Crc32Lookup[0][temp];
// next
length--;
p++;
}
return ~c;
}

3
src/util/crypto/crc32.h Normal file
View file

@ -0,0 +1,3 @@
#pragma once
unsigned int crc32_calc(unsigned int c, const void* data, int length);

360
src/util/crypto/md5.cpp Normal file
View file

@ -0,0 +1,360 @@
/*
* This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
* MD5 Message-Digest Algorithm (RFC 1321).
*
* Homepage:
* http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
*
* Author:
* Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
*
* This software was written by Alexander Peslyak in 2001. No copyright is
* claimed, and the software is hereby placed in the public domain.
* In case this attempt to disclaim copyright and place the software in the
* public domain is deemed null and void, then the software is
* Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
* general public under the following terms:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted.
*
* There's ABSOLUTELY NO WARRANTY, express or implied.
*
* (This is a heavily cut-down "BSD license".)
*
* This differs from Colin Plumb's older public domain implementation in that
* no exactly 32-bit integer data type is required (any 32-bit or wider
* unsigned integer data type will do), there's no compile-time endianness
* configuration, and the function prototypes match OpenSSL's. No code from
* Colin Plumb's implementation has been reused; this comment merely compares
* the properties of the two independent implementations.
*
* The primary goals of this implementation are portability and ease of use.
* It is meant to be fast, but not as fast as possible. Some known
* optimizations are not included to reduce source code size and avoid
* compile-time configuration.
*/
#ifndef HAVE_OPENSSL
#include <string.h>
#include "md5.h"
/*
* The basic MD5 functions.
*
* F and G are optimized compared to their RFC 1321 definitions for
* architectures that lack an AND-NOT instruction, just like in Colin Plumb's
* implementation.
*/
#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
#define H(x, y, z) (((x) ^ (y)) ^ (z))
#define H2(x, y, z) ((x) ^ ((y) ^ (z)))
#define I(x, y, z) ((y) ^ ((x) | ~(z)))
/*
* The MD5 transformation for all four rounds.
*/
#define STEP(f, a, b, c, d, x, t, s) \
(a) += f((b), (c), (d)) + (x) + (t); \
(a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
(a) += (b);
/*
* SET reads 4 input bytes in little-endian byte order and stores them in a
* properly aligned word in host byte order.
*
* The check for little-endian architectures that tolerate unaligned memory
* accesses is just an optimization. Nothing will break if it fails to detect
* a suitable architecture.
*
* Unfortunately, this optimization may be a C strict aliasing rules violation
* if the caller's data buffer has effective type that cannot be aliased by
* MD5_u32plus. In practice, this problem may occur if these MD5 routines are
* inlined into a calling function, or with future and dangerously advanced
* link-time optimizations. For the time being, keeping these MD5 routines in
* their own translation unit avoids the problem.
*/
#if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
#define SET(n) \
(*(MD5_u32plus *)&ptr[(n) * 4])
#define GET(n) \
SET(n)
#else
#define SET(n) \
(ctx->block[(n)] = \
(MD5_u32plus)ptr[(n) * 4] | \
((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \
((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \
((MD5_u32plus)ptr[(n) * 4 + 3] << 24))
#define GET(n) \
(ctx->block[(n)])
#endif
/*
* This processes one or more 64-byte data blocks, but does NOT update the bit
* counters. There are no alignment requirements.
*/
static const void *body(MD5_CTX *ctx, const void *data, unsigned long size)
{
const unsigned char *ptr;
MD5_u32plus a, b, c, d;
MD5_u32plus saved_a, saved_b, saved_c, saved_d;
ptr = (const unsigned char *)data;
a = ctx->a;
b = ctx->b;
c = ctx->c;
d = ctx->d;
do {
saved_a = a;
saved_b = b;
saved_c = c;
saved_d = d;
/* Round 1 */
STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
/* Round 2 */
STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
/* Round 3 */
STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11)
STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23)
STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11)
STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23)
STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11)
STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23)
STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11)
STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23)
/* Round 4 */
STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
a += saved_a;
b += saved_b;
c += saved_c;
d += saved_d;
ptr += 64;
} while (size -= 64);
ctx->a = a;
ctx->b = b;
ctx->c = c;
ctx->d = d;
return ptr;
}
void MD5_Init(MD5_CTX *ctx)
{
ctx->a = 0x67452301;
ctx->b = 0xefcdab89;
ctx->c = 0x98badcfe;
ctx->d = 0x10325476;
ctx->lo = 0;
ctx->hi = 0;
}
void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size)
{
MD5_u32plus saved_lo;
unsigned long used, available;
saved_lo = ctx->lo;
if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
ctx->hi++;
ctx->hi += size >> 29;
used = saved_lo & 0x3f;
if (used) {
available = 64 - used;
if (size < available) {
memcpy(&ctx->buffer[used], data, size);
return;
}
memcpy(&ctx->buffer[used], data, available);
data = (const unsigned char *)data + available;
size -= available;
body(ctx, ctx->buffer, 64);
}
if (size >= 64) {
data = body(ctx, data, size & ~(unsigned long)0x3f);
size &= 0x3f;
}
memcpy(ctx->buffer, data, size);
}
#define OUT_MD5(dst, src) \
(dst)[0] = (unsigned char)(src); \
(dst)[1] = (unsigned char)((src) >> 8); \
(dst)[2] = (unsigned char)((src) >> 16); \
(dst)[3] = (unsigned char)((src) >> 24);
void MD5_Final(unsigned char *result, MD5_CTX *ctx)
{
unsigned long used, available;
used = ctx->lo & 0x3f;
ctx->buffer[used++] = 0x80;
available = 64 - used;
if (available < 8)
{
memset(&ctx->buffer[used], 0, available);
body(ctx, ctx->buffer, 64);
used = 0;
available = 64;
}
memset(&ctx->buffer[used], 0, available - 8);
ctx->lo <<= 3;
OUT_MD5(&ctx->buffer[56], ctx->lo)
OUT_MD5(&ctx->buffer[60], ctx->hi)
body(ctx, ctx->buffer, 64);
OUT_MD5(&result[0], ctx->a)
OUT_MD5(&result[4], ctx->b)
OUT_MD5(&result[8], ctx->c)
OUT_MD5(&result[12], ctx->d)
memset(ctx, 0, sizeof(*ctx));
}
#endif
// HMAC-MD5
void hmacMD5_init_rfc2104(const unsigned char* key, int key_len, HMACMD5Ctx *ctx)
{
int i;
memset(ctx, 0, sizeof(HMACMD5Ctx));
/* if key is longer than 64 bytes reset it to key=MD5(key) */
if (key_len > 64)
{
unsigned char tk[16];
MD5_CTX tctx;
MD5_Init(&tctx);
MD5_Update(&tctx, key, key_len);
MD5_Final(tk, &tctx);
key = tk;
key_len = 16;
}
/* start out by storing key in pads */
memcpy(ctx->k_ipad, key, key_len);
memcpy(ctx->k_opad, key, key_len);
/* XOR key with ipad and opad values */
for (i = 0; i < 64; i++)
{
ctx->k_ipad[i] ^= 0x36;
ctx->k_opad[i] ^= 0x5c;
}
MD5_Init(&ctx->ctx);
MD5_Update(&ctx->ctx, ctx->k_ipad, 64);
}
void hmacMD5_init_limK_to_64(const unsigned char* key, int key_len, HMACMD5Ctx *ctx)
{
if (key_len > 64)
{
key_len = 64;
}
hmacMD5_init_rfc2104(key, key_len, ctx);
}
void hmacMD5_update(const unsigned char* text, int text_len, HMACMD5Ctx *ctx)
{
MD5_Update(&ctx->ctx, text, text_len);
}
void hmacMD5_final(unsigned char* digest, HMACMD5Ctx *ctx)
{
MD5_CTX ctx_o;
MD5_Final(digest, &ctx->ctx);
MD5_Init(&ctx_o);
MD5_Update(&ctx_o, ctx->k_opad, 64);
MD5_Update(&ctx_o, digest, 16);
MD5_Final(digest, &ctx_o);
}
void hmacMD5(const unsigned char* key, int keyLen, const unsigned char* text, int textLen, unsigned char* digest)
{
HMACMD5Ctx ctx;
hmacMD5_init_limK_to_64(key, keyLen, &ctx);
hmacMD5_update(text, textLen, &ctx);
hmacMD5_final(digest, &ctx);
}

60
src/util/crypto/md5.h Normal file
View file

@ -0,0 +1,60 @@
/*
* This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
* MD5 Message-Digest Algorithm (RFC 1321).
*
* Homepage:
* http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
*
* Author:
* Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
*
* This software was written by Alexander Peslyak in 2001. No copyright is
* claimed, and the software is hereby placed in the public domain.
* In case this attempt to disclaim copyright and place the software in the
* public domain is deemed null and void, then the software is
* Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
* general public under the following terms:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted.
*
* There's ABSOLUTELY NO WARRANTY, express or implied.
*
* See md5.c for more information.
*/
#ifdef HAVE_OPENSSL
#include <openssl/md5.h>
#elif !defined(_MD5_H)
#define _MD5_H
/* Any 32-bit or wider unsigned integer data type will do */
typedef unsigned int MD5_u32plus;
typedef struct
{
MD5_u32plus lo, hi;
MD5_u32plus a, b, c, d;
unsigned char buffer[64];
MD5_u32plus block[16];
}MD5_CTX;
extern void MD5_Init(MD5_CTX *ctx);
extern void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size);
extern void MD5_Final(unsigned char *result, MD5_CTX *ctx);
#endif
// HMAC-MD5
typedef struct
{
MD5_CTX ctx;
unsigned char k_ipad[64];
unsigned char k_opad[64];
}HMACMD5Ctx;
void hmacMD5_init_limK_to_64(const unsigned char* key, int key_len, HMACMD5Ctx *ctx);
void hmacMD5_update(const unsigned char* text, int text_len, HMACMD5Ctx *ctx);
void hmacMD5_final(unsigned char* digest, HMACMD5Ctx *ctx);
void hmacMD5(const unsigned char* key, int keyLen, const unsigned char* text, int textLen, unsigned char* digest);

View file

@ -0,0 +1,52 @@
#pragma once
#include <mutex>
#include <memory>
template<typename T>
class SingletonClass
{
public:
static T* getInstance()
{
static T instance;
return &instance;
}
protected:
SingletonClass() = default;
};
template<typename T>
class SingletonRef
{
public:
/*static std::shared_ptr<T> getInstance() C++20 only
{
static std::atomic<std::weak_ptr<T>> s_instance;
std::shared_ptr<T> result;
s_instance.compare_exchange_weak(result, std::make_shared<T>());
return result;
}*/
static std::shared_ptr<T> getInstance()
{
std::scoped_lock lock(s_mutex);
auto result = s_instance.lock();
if(!result)
{
result = std::make_shared<T>();
s_instance = result;
}
return result;
}
protected:
SingletonRef() = default;
private:
static inline std::weak_ptr<T> s_instance;
static inline std::mutex s_mutex;
};

View file

@ -0,0 +1,105 @@
#pragma once
#include <mutex>
#include <condition_variable>
#include <queue>
template <typename T>
class ConcurrentQueue
{
public:
ConcurrentQueue() = default;
ConcurrentQueue(const ConcurrentQueue&) = delete;
ConcurrentQueue& operator=(const ConcurrentQueue&) = delete;
size_t push(const T& item)
{
std::unique_lock<std::mutex> mlock(m_mutex);
m_queue.push(item);
const size_t result = m_queue.size();
mlock.unlock();
m_condVar.notify_one();
return result;
}
template<typename ... Args>
size_t push(Args&& ... args)
{
std::unique_lock<std::mutex> mlock(m_mutex);
m_queue.emplace(std::forward<Args>(args)...);
const size_t result = m_queue.size();
mlock.unlock();
m_condVar.notify_one();
return result;
}
T peek()
{
std::unique_lock<std::mutex> mlock(m_mutex);
while (m_queue.empty())
{
m_condVar.wait(mlock);
}
return m_queue.front();
}
bool peek2(T& item)
{
std::unique_lock<std::mutex> mlock(m_mutex);
if (m_queue.empty())
return false;
item = m_queue.front();
m_queue.pop();
return true;
}
T pop()
{
std::unique_lock<std::mutex> mlock(m_mutex);
while (m_queue.empty())
{
m_condVar.wait(mlock);
}
auto val = m_queue.front();
m_queue.pop();
return val;
}
void pop(T& item)
{
std::unique_lock<std::mutex> mlock(m_mutex);
while (m_queue.empty())
{
m_condVar.wait(mlock);
}
item = m_queue.front();
m_queue.pop();
}
void clear()
{
std::unique_lock<std::mutex> mlock(m_mutex);
while (!m_queue.empty())
m_queue.pop();
}
size_t size()
{
std::unique_lock<std::mutex> mlock(m_mutex);
return m_queue.size();
}
bool empty()
{
std::unique_lock<std::mutex> mlock(m_mutex);
return m_queue.empty();
}
private:
std::mutex m_mutex;
std::condition_variable m_condVar;
std::queue<T> m_queue;
};

View file

@ -0,0 +1,35 @@
#pragma once
// https://ideone.com/k0H8Ei
#include <iostream>
#include <map>
#include <boost/iterator/transform_iterator.hpp>
template <typename T, typename F>
struct map_adaptor
{
map_adaptor(const T& t, const F& f) : _t(t), _f(f) {}
map_adaptor(map_adaptor& a) = delete;
map_adaptor(map_adaptor&& a) = default;
[[nodiscard]] auto begin() { return boost::make_transform_iterator(_t.begin(), _f); }
[[nodiscard]] auto end() { return boost::make_transform_iterator(_t.end(), _f); }
[[nodiscard]] auto cbegin() const { return boost::make_transform_iterator(_t.cbegin(), _f); }
[[nodiscard]] auto cend() const { return boost::make_transform_iterator(_t.cend(), _f); }
protected:
const T& _t;
F _f;
};
template <typename T, typename F>
auto get_map_adaptor(const T& t, const F& f) { return map_adaptor<T, F>(t, f); }
template <typename T>
auto get_keys(const T& t) { return get_map_adaptor(t, [](const auto& p) { return p.first; }); }
template <typename T>
auto get_values(const T& t) { return get_map_adaptor(t, [](const auto& p) { return p.second; }); }

View file

@ -0,0 +1,128 @@
#pragma once
template<typename T>
class MemoryPool
{
static_assert(sizeof(T) >= sizeof(void*)); // object must be large enough to store a single pointer
public:
MemoryPool(int allocationGranularity)
: m_numObjectsAllocated(0)
{
m_allocationGranularity = allocationGranularity;
m_nextFreeObject = nullptr;
}
template <typename... Ts>
T* allocObj(Ts&&... args)
{
if (m_nextFreeObject)
{
T* allocatedObj = m_nextFreeObject;
m_nextFreeObject = *(T**)allocatedObj;
new (allocatedObj) T(std::forward<Ts>(args)...);
return allocatedObj;
}
// enlarge pool
increasePoolSize();
T* allocatedObj = m_nextFreeObject;
m_nextFreeObject = *(T**)allocatedObj;
new (allocatedObj) T(std::forward<Ts>(args)...);
return allocatedObj;
}
void freeObj(T* obj)
{
obj->~T();
pushElementOnFreeStack(obj);
}
private:
void pushElementOnFreeStack(T* obj)
{
*(T**)obj = m_nextFreeObject;
m_nextFreeObject = obj;
}
void increasePoolSize()
{
T* newElements = static_cast<T*>(::operator new(m_allocationGranularity * sizeof(T), std::nothrow));
m_numObjectsAllocated += m_allocationGranularity;
for (int i = 0; i < m_allocationGranularity; i++)
{
pushElementOnFreeStack(newElements);
newElements++;
}
}
private:
T* m_nextFreeObject;
int m_allocationGranularity;
int m_numObjectsAllocated;
};
// this memory pool calls the default constructor when the internal pool is allocated
// no constructor/destructor will be called on acquire/release
template<typename T>
class MemoryPoolPermanentObjects
{
struct internalObject_t
{
T v;
internalObject_t* next;
};
public:
MemoryPoolPermanentObjects(int allocationGranularity)
: m_numObjectsAllocated(0)
{
m_allocationGranularity = allocationGranularity;
m_nextFreeObject = nullptr;
}
template <typename... Ts>
T* acquireObj(Ts&&... args)
{
if (m_nextFreeObject)
{
internalObject_t* allocatedObject = m_nextFreeObject;
m_nextFreeObject = allocatedObject->next;
return &allocatedObject->v;
}
// enlarge pool
increasePoolSize();
internalObject_t* allocatedObject = m_nextFreeObject;
m_nextFreeObject = allocatedObject->next;
return &allocatedObject->v;
}
void releaseObj(T* obj)
{
internalObject_t* internalObj = (internalObject_t*)((uint8*)obj - offsetof(internalObject_t, v));
pushElementOnFreeStack(internalObj);
}
private:
void pushElementOnFreeStack(internalObject_t* obj)
{
obj->next = m_nextFreeObject;
m_nextFreeObject = obj;
}
void increasePoolSize()
{
internalObject_t* newElements = static_cast<internalObject_t*>(::operator new(m_allocationGranularity * sizeof(internalObject_t), std::nothrow));
m_numObjectsAllocated += m_allocationGranularity;
for (int i = 0; i < m_allocationGranularity; i++)
{
new (&newElements->v) T();
pushElementOnFreeStack(newElements);
newElements++;
}
}
private:
internalObject_t* m_nextFreeObject;
int m_allocationGranularity;
int m_numObjectsAllocated;
};

View file

@ -0,0 +1,171 @@
#pragma once
#include <mutex>
#include <condition_variable>
class Semaphore
{
public:
void notify()
{
std::lock_guard lock(m_mutex);
++m_count;
m_condition.notify_one();
}
void wait()
{
std::unique_lock lock(m_mutex);
while (m_count == 0)
{
m_condition.wait(lock);
}
--m_count;
}
bool try_wait()
{
std::lock_guard lock(m_mutex);
if (m_count == 0)
return false;
--m_count;
return true;
}
void reset()
{
std::lock_guard lock(m_mutex);
m_count = 0;
}
private:
std::mutex m_mutex;
std::condition_variable m_condition;
uint64 m_count = 0;
};
class CounterSemaphore
{
public:
void reset()
{
std::lock_guard lock(m_mutex);
m_count = 0;
}
void increment()
{
std::lock_guard lock(m_mutex);
++m_count;
if (m_count == 1)
m_condition.notify_all();
}
void decrement()
{
std::lock_guard lock(m_mutex);
--m_count;
cemu_assert_debug(m_count >= 0);
if (m_count == 0)
m_condition.notify_all();
}
// decrement only if non-zero
// otherwise wait
void decrementWithWait()
{
std::unique_lock lock(m_mutex);
while (m_count == 0)
m_condition.wait(lock);
m_count--;
}
// decrement only if non-zero
// otherwise wait
// may wake up spuriously
bool decrementWithWaitAndTimeout(uint32 ms)
{
std::unique_lock lock(m_mutex);
if (m_count == 0)
{
m_condition.wait_for(lock, std::chrono::milliseconds(ms));
}
if (m_count == 0)
return false;
m_count--;
return true;
}
void waitUntilZero()
{
std::unique_lock lock(m_mutex);
while (m_count != 0)
m_condition.wait(lock);
}
void waitUntilNonZero()
{
std::unique_lock lock(m_mutex);
while (m_count == 0)
m_condition.wait(lock);
}
bool isZero() const
{
return m_count == 0;
}
private:
std::mutex m_mutex;
std::condition_variable m_condition;
sint64 m_count = 0;
};
template<typename T>
class StateSemaphore
{
public:
StateSemaphore(T initialState) : m_state(initialState) {};
T getValue()
{
std::unique_lock lock(m_mutex);
return m_state;
}
bool hasState(T state)
{
std::unique_lock lock(m_mutex);
return m_state == state;
}
void setValue(T newState)
{
std::unique_lock lock(m_mutex);
m_state = newState;
m_condition.notify_all();
}
void setValue(T newState, T expectedValue)
{
std::unique_lock lock(m_mutex);
while (m_state != expectedValue)
m_condition.wait(lock);
m_state = newState;
m_condition.notify_all();
}
void waitUntilValue(T state)
{
std::unique_lock lock(m_mutex);
while (m_state != state)
m_condition.wait(lock);
}
private:
std::mutex m_mutex;
std::condition_variable m_condition;
T m_state;
};

View file

@ -0,0 +1,353 @@
#pragma once
class MemStreamReader
{
public:
MemStreamReader(const uint8* data, sint32 size) : m_data(data), m_size(size)
{
m_cursorPos = 0;
}
template<typename T> T readBE();
template<typename T> T readLE();
template<> uint8 readBE()
{
if (!reserveReadLength(sizeof(uint8)))
return 0;
uint8 v = m_data[m_cursorPos];
m_cursorPos += sizeof(uint8);
return v;
}
template<> uint16 readBE()
{
if (!reserveReadLength(sizeof(uint16)))
return 0;
const uint8* p = m_data + m_cursorPos;
uint16 v;
std::memcpy(&v, p, sizeof(v));
v = _BE(v);
m_cursorPos += sizeof(uint16);
return v;
}
template<> uint32 readBE()
{
if (!reserveReadLength(sizeof(uint32)))
return 0;
const uint8* p = m_data + m_cursorPos;
uint32 v;
std::memcpy(&v, p, sizeof(v));
v = _BE(v);
m_cursorPos += sizeof(uint32);
return v;
}
template<> uint64 readBE()
{
if (!reserveReadLength(sizeof(uint64)))
return 0;
const uint8* p = m_data + m_cursorPos;
uint64 v;
std::memcpy(&v, p, sizeof(v));
v = _BE(v);
m_cursorPos += sizeof(uint64);
return v;
}
template<> std::string readBE()
{
std::string s;
uint32 stringSize = readBE<uint32>();
if (hasError())
return s;
if (stringSize >= (32 * 1024 * 1024))
{
// out of bounds read or suspiciously large string
m_hasError = true;
return std::string();
}
s.resize(stringSize);
readData(s.data(), stringSize);
return s;
}
template<> uint8 readLE()
{
return readBE<uint8>();
}
template<> uint32 readLE()
{
if (!reserveReadLength(sizeof(uint32)))
return 0;
const uint8* p = m_data + m_cursorPos;
uint32 v;
std::memcpy(&v, p, sizeof(v));
v = _LE(v);
m_cursorPos += sizeof(uint32);
return v;
}
template<> uint64 readLE()
{
if (!reserveReadLength(sizeof(uint64)))
return 0;
const uint8* p = m_data + m_cursorPos;
uint64 v;
std::memcpy(&v, p, sizeof(v));
v = _LE(v);
m_cursorPos += sizeof(uint64);
return v;
}
template<typename T>
std::vector<T> readPODVector()
{
uint32 numElements = readBE<uint32>();
if (hasError())
{
return std::vector<T>();
}
std::vector<T> v;
v.reserve(numElements);
v.resize(numElements);
readData(v.data(), v.size() * sizeof(T));
return v;
}
// read string terminated by newline character (or end of stream)
// will also trim off any carriage return
std::string_view readLine()
{
size_t length = 0;
if (m_cursorPos >= m_size)
{
m_hasError = true;
return std::basic_string_view((const char*)nullptr, 0);
}
// end of line is determined by '\n'
const char* lineStrBegin = (const char*)(m_data + m_cursorPos);
const char* lineStrEnd = nullptr;
while (m_cursorPos < m_size)
{
if (m_data[m_cursorPos] == '\n')
{
lineStrEnd = (const char*)(m_data + m_cursorPos);
m_cursorPos++; // skip the newline character
break;
}
m_cursorPos++;
}
if(lineStrEnd == nullptr)
lineStrEnd = (const char*)(m_data + m_cursorPos);
// truncate any '\r' at the beginning and end
while (lineStrBegin < lineStrEnd)
{
if (lineStrBegin[0] != '\r')
break;
lineStrBegin++;
}
while (lineStrEnd > lineStrBegin)
{
if (lineStrEnd[-1] != '\r')
break;
lineStrEnd--;
}
length = (lineStrEnd - lineStrBegin);
return std::basic_string_view((const char*)lineStrBegin, length);
}
bool readData(void* ptr, size_t size)
{
if (m_cursorPos + size > m_size)
{
m_cursorPos = m_size;
m_hasError = true;
return false;
}
memcpy(ptr, m_data + m_cursorPos, size);
m_cursorPos += (sint32)size;
return true;
}
std::span<uint8> readDataNoCopy(size_t size)
{
if (m_cursorPos + size > m_size)
{
m_cursorPos = m_size;
m_hasError = true;
return std::span<uint8>();
}
auto r = std::span<uint8>((uint8*)m_data + m_cursorPos, size);
m_cursorPos += (sint32)size;
return r;
}
// returns true if any of the reads was out of bounds
bool hasError() const
{
return m_hasError;
}
bool isEndOfStream() const
{
return m_cursorPos == m_size;
}
private:
bool reserveReadLength(size_t length)
{
if (m_cursorPos + length > m_size)
{
m_cursorPos = m_size;
m_hasError = true;
return false;
}
return true;
}
void skipCRLF()
{
while (m_cursorPos < m_size)
{
if (m_data[m_cursorPos] != '\r' && m_data[m_cursorPos] != '\n')
break;
m_cursorPos++;
}
}
const uint8* m_data;
sint32 m_size;
sint32 m_cursorPos;
bool m_hasError{ false };
};
class MemStreamWriter
{
public:
MemStreamWriter(size_t reservedSize)
{
if (reservedSize > 0)
m_buffer.reserve(reservedSize);
else
m_buffer.reserve(128);
}
void writeData(const void* ptr, size_t size)
{
m_buffer.resize(m_buffer.size() + size);
uint8* p = m_buffer.data() + m_buffer.size() - size;
memcpy(p, ptr, size);
}
template<typename T> void writeBE(const T& v);
template<>
void writeBE<uint64>(const uint64& v)
{
m_buffer.resize(m_buffer.size() + 8);
uint8* p = m_buffer.data() + m_buffer.size() - 8;
uint64 tmp = _BE(v);
std::memcpy(p, &tmp, sizeof(tmp));
}
template<>
void writeBE<uint32>(const uint32& v)
{
m_buffer.resize(m_buffer.size() + 4);
uint8* p = m_buffer.data() + m_buffer.size() - 4;
uint32 tmp = _BE(v);
std::memcpy(p, &tmp, sizeof(tmp));
}
template<>
void writeBE<uint16>(const uint16& v)
{
m_buffer.resize(m_buffer.size() + 2);
uint8* p = m_buffer.data() + m_buffer.size() - 2;
uint16 tmp = _BE(v);
std::memcpy(p, &tmp, sizeof(tmp));
}
template<>
void writeBE<uint8>(const uint8& v)
{
m_buffer.emplace_back(v);
}
template<>
void writeBE<std::string>(const std::string& v)
{
writeBE<uint32>((uint32)v.size());
writeData(v.data(), v.size());
}
template<typename T> void writeLE(const T& v);
template<>
void writeLE<uint64>(const uint64& v)
{
m_buffer.resize(m_buffer.size() + 8);
uint8* p = m_buffer.data() + m_buffer.size() - 8;
uint64 tmp = _LE(v);
std::memcpy(p, &tmp, sizeof(tmp));
}
template<>
void writeLE<uint32>(const uint32& v)
{
m_buffer.resize(m_buffer.size() + 4);
uint8* p = m_buffer.data() + m_buffer.size() - 4;
uint32 tmp = _LE(v);
std::memcpy(p, &tmp, sizeof(tmp));
}
template<typename T>
void writePODVector(const std::vector<T>& v)
{
writeBE<uint32>(v.size());
writeData(v.data(), v.size() * sizeof(T));
}
// get result buffer without copy
// resets internal state
void getResultAndReset(std::vector<uint8>& data)
{
std::swap(m_buffer, data);
m_buffer.clear();
}
std::span<uint8> getResult()
{
return std::span<uint8>(m_buffer.data(), m_buffer.size());
}
private:
std::vector<uint8> m_buffer;
};
class SerializerHelper
{
public:
bool serialize(std::vector<uint8>& data)
{
MemStreamWriter streamWriter(0);
bool r = serializeImpl(streamWriter);
if (!r)
return false;
streamWriter.getResultAndReset(data);
return true;
}
bool deserialize(std::vector<uint8>& data)
{
MemStreamReader memStreamReader(data.data(), (sint32)data.size());
return deserializeImpl(memStreamReader);
}
protected:
virtual bool serializeImpl(MemStreamWriter& streamWriter) = 0;
virtual bool deserializeImpl(MemStreamReader& streamReader) = 0;
};

View file

@ -0,0 +1,17 @@
#pragma once
template<typename TType>
class Singleton
{
protected:
Singleton() = default;
Singleton(const Singleton&) = delete;
Singleton(Singleton&&) noexcept = delete;
public:
static TType& instance() noexcept
{
static TType s_instance;
return s_instance;
}
};

View file

@ -0,0 +1,86 @@
#pragma once
class StringBuf
{
public:
StringBuf(uint32 bufferSize)
{
this->str = (uint8*)malloc(bufferSize + 4);
this->allocated = true;
this->length = 0;
this->limit = bufferSize;
}
~StringBuf()
{
if (this->allocated)
free(this->str);
}
template<typename TFmt, typename ... TArgs>
void addFmt(const TFmt& format, TArgs&&... args)
{
auto r = fmt::vformat_to_n((char*)(this->str + this->length), (size_t)(this->limit - this->length), fmt::to_string_view(format), fmt::make_args_checked<TArgs...>(format, args...));
this->length += (uint32)r.size;
}
void add(const char* appendedStr)
{
const char* outputStart = (char*)(this->str + this->length);
char* output = (char*)outputStart;
const char* outputEnd = (char*)(this->str + this->limit - 1);
while (output < outputEnd)
{
char c = *appendedStr;
if (c == '\0')
break;
*output = c;
appendedStr++;
output++;
}
this->length += (uint32)(output - outputStart);
*output = '\0';
}
void add(std::string_view appendedStr)
{
size_t remainingLen = this->limit - this->length;
size_t copyLen = appendedStr.size();
if (remainingLen > copyLen)
copyLen = remainingLen;
char* outputStart = (char*)(this->str + this->length);
std::copy(appendedStr.data(), appendedStr.data() + copyLen, outputStart);
outputStart[copyLen] = '\0';
}
void reset()
{
length = 0;
}
uint32 getLen() const
{
return length;
}
const char* c_str() const
{
str[length] = '\0';
return (const char*)str;
}
void shrink_to_fit()
{
if (!this->allocated)
return;
uint32 newLimit = this->length;
this->str = (uint8*)realloc(this->str, newLimit + 4);
this->limit = newLimit;
}
private:
uint8* str;
uint32 length; /* in bytes */
uint32 limit; /* in bytes */
bool allocated;
};

View file

@ -0,0 +1,112 @@
#pragma once
#include "boost/nowide/convert.hpp"
#include <charconv>
namespace StringHelpers
{
// convert Wii U big-endian wchar_t string to utf8 string
static std::string ToUtf8(const uint16be* ptr, size_t maxLength)
{
std::wstringstream result;
while (*ptr != 0 && maxLength > 0)
{
auto c = (uint16)*ptr;
result << static_cast<wchar_t>(c);
ptr++;
maxLength--;
}
return boost::nowide::narrow(result.str());
}
static std::string ToUtf8(std::span<uint16be> input)
{
return ToUtf8(input.data(), input.size());
}
// convert utf8 string to Wii U big-endian wchar_t string
static std::basic_string<uint16be> FromUtf8(std::string_view str)
{
std::basic_string<uint16be> tmpStr;
std::wstring w = boost::nowide::widen(str.data(), str.size());
for (auto& c : w)
tmpStr.push_back((uint16)c);
return tmpStr;
}
static sint32 ToInt(const std::string_view& input, sint32 defaultValue = 0)
{
sint32 value = defaultValue;
if (input.size() >= 2 && (input[0] == '0' && (input[1] == 'x' || input[1] == 'X')))
{
// hex number
const std::from_chars_result result = std::from_chars(input.data() + 2, input.data() + input.size(), value, 16);
if (result.ec == std::errc::invalid_argument || result.ec == std::errc::result_out_of_range)
return defaultValue;
}
else
{
// decimal value
const std::from_chars_result result = std::from_chars(input.data(), input.data() + input.size(), value);
if (result.ec == std::errc::invalid_argument || result.ec == std::errc::result_out_of_range)
return defaultValue;
}
return value;
}
static sint64 ToInt64(const std::string_view& input, sint64 defaultValue = 0)
{
sint64 value = defaultValue;
if (input.size() >= 2 && (input[0] == '0' && (input[1] == 'x' || input[1] == 'X')))
{
// hex number
const std::from_chars_result result = std::from_chars(input.data() + 2, input.data() + input.size(), value, 16);
if (result.ec == std::errc::invalid_argument || result.ec == std::errc::result_out_of_range)
return defaultValue;
}
else
{
// decimal value
const std::from_chars_result result = std::from_chars(input.data(), input.data() + input.size(), value);
if (result.ec == std::errc::invalid_argument || result.ec == std::errc::result_out_of_range)
return defaultValue;
}
return value;
}
static size_t ParseHexString(std::string_view input, uint8* output, size_t maxOutputLength)
{
size_t parsedLen = 0;
for (size_t i = 0; i < input.size() - 1; i += 2)
{
if (maxOutputLength <= 0)
break;
uint8 b = 0;
uint8 c = input[i + 0];
// high nibble
if (c >= '0' && c <= '9')
b |= ((c - '0') << 4);
else if (c >= 'a' && c <= 'f')
b |= ((c - 'a' + 10) << 4);
else if (c >= 'A' && c <= 'F')
b |= ((c - 'A' + 10) << 4);
else
break;
// low nibble
c = input[i + 1];
if (c >= '0' && c <= '9')
b |= (c - '0');
else if (c >= 'a' && c <= 'f')
b |= (c - 'a' + 10);
else if (c >= 'A' && c <= 'F')
b |= (c - 'A' + 10);
else
break;
*output = b;
output++;
maxOutputLength--;
parsedLen++;
}
return parsedLen;
}
};

View file

@ -0,0 +1,252 @@
#pragma once
class StringTokenParser
{
public:
StringTokenParser() : m_str(nullptr), m_len(0) {};
StringTokenParser(const char* input, sint32 inputLen) : m_str(input), m_len(inputLen) {};
StringTokenParser(std::string_view str) : m_str(str.data()), m_len((sint32)str.size()) {};
// skip whitespaces at current ptr position
void skipWhitespaces()
{
m_str = _skipWhitespaces(m_str, m_len);
}
// decrease string length as long as there is a whitespace at the end
void trimWhitespaces()
{
while (m_len > 0)
{
const char c = m_str[m_len - 1];
if (c != ' ' && c != '\t')
break;
m_len--;
}
}
bool isEndOfString()
{
return m_len <= 0;
}
sint32 skipToCharacter(const char c)
{
auto str = m_str;
auto len = m_len;
sint32 idx = 0;
while (len > 0)
{
if (*str == c)
{
m_str = str;
m_len = len;
return idx;
}
len--;
str++;
idx++;
}
return -1;
}
bool matchWordI(const char* word)
{
auto str = m_str;
auto length = m_len;
str = _skipWhitespaces(str, length);
for (sint32 i = 0; i <= length; i++)
{
if (word[i] == '\0')
{
m_str = str + i;
m_len = length - i;
return true;
}
if (i == length)
return false;
char c1 = str[i];
char c2 = word[i];
c1 = _toUpperCase(c1);
c2 = _toUpperCase(c2);
if (c1 != c2)
return false;
}
return false;
}
bool compareCharacter(sint32 relativeIndex, const char c)
{
if (relativeIndex >= m_len)
return false;
return m_str[relativeIndex] == c;
}
bool compareCharacterI(sint32 relativeIndex, const char c)
{
if (relativeIndex >= m_len)
return false;
return _toUpperCase(m_str[relativeIndex]) == _toUpperCase(c);
}
void skipCharacters(sint32 count)
{
if (count > m_len)
count = m_len;
m_str += count;
m_len -= count;
}
bool parseU32(uint32& val)
{
auto str = m_str;
auto length = m_len;
str = _skipWhitespaces(str, length);
uint32 value = 0;
sint32 index = 0;
bool isHex = false;
if (length <= 0)
return false;
if (length >= 2 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X'))
{
isHex = true;
index += 2;
}
else if (str[index] == '0')
{
isHex = true;
index++;
}
if (length <= index)
return false;
if (isHex)
{
sint32 firstDigitIndex = index;
for (; index < length; index++)
{
const char c = str[index];
if (c >= '0' && c <= '9')
{
value *= 0x10;
value += (c - '0');
}
else if (c >= 'a' && c <= 'f')
{
value *= 0x10;
value += (c - 'a' + 10);
}
else if (c >= 'A' && c <= 'F')
{
value *= 0x10;
value += (c - 'A' + 10);
}
else
break;
}
if (index == firstDigitIndex)
return false;
m_str = str + index;
m_len = length - index;
}
else
{
sint32 firstDigitIndex = index;
for (; index < length; index++)
{
const char c = str[index];
if (c >= '0' && c <= '9')
{
value *= 10;
value += (c - '0');
}
else
break;
}
if (index == firstDigitIndex)
return false;
m_str = str + index;
m_len = length - index;
}
val = value;
return true;
}
bool parseSymbolName(const char*& symbolStr, sint32& symbolLength)
{
auto str = m_str;
auto length = m_len;
str = _skipWhitespaces(str, length);
// symbols must start with a letter or _
if (length <= 0)
return false;
if (!(str[0] >= 'a' && str[0] <= 'z') &&
!(str[0] >= 'A' && str[0] <= 'Z') &&
!(str[0] == '_'))
return false;
sint32 idx = 1;
while (idx < length)
{
const char c = str[idx];
if (!(c >= 'a' && c <= 'z') &&
!(c >= 'A' && c <= 'Z') &&
!(c >= '0' && c <= '9') &&
!(c == '_') && !(c == '.'))
break;
idx++;
}
symbolStr = str;
symbolLength = idx;
m_str = str + idx;
m_len = length - idx;
return true;
}
const char* getCurrentPtr()
{
return m_str;
}
sint32 getCurrentLen()
{
return m_len;
}
void storeParserState(StringTokenParser* bak)
{
bak->m_str = m_str;
bak->m_len = m_len;
}
void restoreParserState(const StringTokenParser* bak)
{
m_str = bak->m_str;
m_len = bak->m_len;
}
private:
const char* _skipWhitespaces(const char* str, sint32& length)
{
while (length > 0)
{
if (*str != ' ' && *str != '\t')
break;
str++;
length--;
}
return str;
}
char _toUpperCase(const char c)
{
if (c >= 'a' && c <= 'z')
return c + ('A' - 'a');
return c;
}
private:
const char* m_str;
sint32 m_len;
};

View file

@ -0,0 +1,23 @@
#pragma once
#include "util/helpers/helpers.h"
class SystemException : public std::runtime_error
{
public:
SystemException()
: std::runtime_error(GetSystemErrorMessage().c_str()), m_error_code(GetExceptionError())
{}
SystemException(const std::exception& ex)
: std::runtime_error(GetSystemErrorMessage(ex).c_str()), m_error_code(GetExceptionError())
{}
SystemException(const std::error_code& ec)
: std::runtime_error(GetSystemErrorMessage(ec).c_str()), m_error_code(GetExceptionError())
{}
[[nodiscard]] DWORD GetErrorCode() const { return m_error_code; }
private:
DWORD m_error_code;
};

View file

@ -0,0 +1,20 @@
#pragma once
template<typename TCtor, typename TDtor>
class TempState
{
public:
TempState(TCtor ctor, TDtor dtor)
: m_dtor(std::move(dtor))
{
ctor();
}
~TempState()
{
m_dtor();
}
private:
TDtor m_dtor;
};

View file

@ -0,0 +1,15 @@
#pragma once
// expects the enum class (T) to have a value called ENUM_COUNT which is the maximum value + 1
template<typename E, class T>
class enum_array : public std::array<T, static_cast<size_t>(E::ENUM_COUNT)> {
public:
T& operator[] (E e) {
return std::array<T, static_cast<size_t>(E::ENUM_COUNT)>::operator[]((std::size_t)e);
}
const T& operator[] (E e) const {
return std::array<T, static_cast<size_t>(E::ENUM_COUNT)>::operator[]((std::size_t)e);
}
};

View file

@ -0,0 +1,74 @@
#pragma once
#include<array>
template<typename T, uint32 maxElements, bool checkMaxSize = true>
class FixedSizeList
{
public:
std::array<T, maxElements> m_elementArray;
int count = 0;
void add(T n)
{
if (checkMaxSize && count >= maxElements)
return;
m_elementArray[count] = n;
count++;
}
void addUnique(T n)
{
if (checkMaxSize && count >= maxElements)
return;
for (int i = 0; i < count; i++)
{
if (m_elementArray[i] == n)
return;
}
m_elementArray[count] = n;
count++;
}
void remove(T n)
{
for (int i = 0; i < count; i++)
{
if (m_elementArray[i] == n)
{
m_elementArray[i] = m_elementArray[count - 1];
count--;
return;
}
}
}
bool containsAndRemove(T n)
{
for (int i = 0; i < count; i++)
{
if (m_elementArray[i] == n)
{
m_elementArray[i] = m_elementArray[count - 1];
count--;
return true;
}
}
return false;
}
sint32 find(T n)
{
for (int i = 0; i < count; i++)
{
if (m_elementArray[i] == n)
{
return i;
}
}
return -1;
}
private:
};

View file

@ -0,0 +1,38 @@
#pragma once
// minimal but efficient non-recursive spinlock implementation
#include <atomic>
class FSpinlock
{
public:
void acquire()
{
while( true )
{
if (!m_lockBool.exchange(true, std::memory_order_acquire))
break;
while (m_lockBool.load(std::memory_order_relaxed)) _mm_pause();
}
}
bool tryAcquire()
{
return !m_lockBool.exchange(true, std::memory_order_acquire);
}
void release()
{
m_lockBool.store(false, std::memory_order_release);
}
bool isHolding() const
{
return m_lockBool.load(std::memory_order_relaxed);
}
private:
std::atomic<bool> m_lockBool = false;
};

View file

@ -0,0 +1,434 @@
#include "helpers.h"
#include <algorithm>
#include <functional>
#include <cctype>
#include <random>
#include <wx/translation.h>
#include "config/ActiveSettings.h"
#if BOOST_OS_WINDOWS
#include <TlHelp32.h>
#endif
std::string& ltrim(std::string& str, const std::string& chars)
{
str.erase(0, str.find_first_not_of(chars));
return str;
}
std::string& rtrim(std::string& str, const std::string& chars)
{
str.erase(str.find_last_not_of(chars) + 1);
return str;
}
std::string& trim(std::string& str, const std::string& chars)
{
return ltrim(rtrim(str, chars), chars);
}
std::string_view& ltrim(std::string_view& str, const std::string& chars)
{
str.remove_prefix(std::min(str.find_first_not_of(chars), str.size()));
return str;
}
std::string_view& rtrim(std::string_view& str, const std::string& chars)
{
str.remove_suffix(std::max(str.size() - str.find_last_not_of(chars) - 1, (size_t)0));
return str;
}
std::string_view& trim(std::string_view& str, const std::string& chars)
{
return ltrim(rtrim(str, chars), chars);
}
#if BOOST_OS_WINDOWS > 0
std::wstring GetSystemErrorMessageW()
{
return GetSystemErrorMessageW(GetLastError());
}
std::wstring GetSystemErrorMessageW(DWORD error_code)
{
if(error_code == ERROR_SUCCESS)
return {};
LPWSTR lpMsgBuf = nullptr;
FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr, error_code, 0, (LPWSTR)&lpMsgBuf, 0, nullptr);
if (lpMsgBuf)
{
std::wstring str = fmt::format(L"{}: {}", _("Error").ToStdWstring(), lpMsgBuf); // TRANSLATE
LocalFree(lpMsgBuf);
return str;
}
return fmt::format(L"{}: {:#x}", _("Error code").ToStdWstring(), error_code);
}
std::string GetSystemErrorMessage(DWORD error_code)
{
if(error_code == ERROR_SUCCESS)
return {};
LPSTR lpMsgBuf = nullptr;
FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr, error_code, 0, (LPSTR)&lpMsgBuf, 0, nullptr);
if (lpMsgBuf)
{
std::string str = fmt::format("{}: {}", _("Error").ToStdString(), lpMsgBuf); // TRANSLATE
LocalFree(lpMsgBuf);
return str;
}
return fmt::format("{}: {:#x}", _("Error code").ToStdString(), error_code);
}
std::string GetSystemErrorMessage()
{
return GetSystemErrorMessage(GetLastError());
}
#else
std::string GetSystemErrorMessage()
{
return "";
}
#endif
std::string GetSystemErrorMessage(const std::exception& ex)
{
const std::string msg = GetSystemErrorMessage();
if(msg.empty())
return ex.what();
return fmt::format("{}\n{}",msg, ex.what());
}
std::string GetSystemErrorMessage(const std::error_code& ec)
{
const std::string msg = GetSystemErrorMessage();
if(msg.empty())
return ec.message();
return fmt::format("{}\n{}",msg, ec.message());
}
#if BOOST_OS_WINDOWS > 0
const DWORD MS_VC_EXCEPTION = 0x406D1388;
#pragma pack(push,8)
typedef struct tagTHREADNAME_INFO
{
DWORD dwType; // Must be 0x1000.
LPCSTR szName; // Pointer to name (in user addr space).
DWORD dwThreadID; // Thread ID (-1=caller thread).
DWORD dwFlags; // Reserved for future use, must be zero.
} THREADNAME_INFO;
#pragma pack(pop)
#endif
void SetThreadName(const char* name)
{
#if BOOST_OS_WINDOWS > 0
#ifndef _PUBLIC_RELEASE
THREADNAME_INFO info;
info.dwType = 0x1000;
info.szName = name;
info.dwThreadID = GetCurrentThreadId();
info.dwFlags = 0;
#pragma warning(push)
#pragma warning(disable: 6320 6322)
__try {
RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR), (ULONG_PTR*)&info);
}
__except (EXCEPTION_EXECUTE_HANDLER) {
}
#pragma warning(pop)
#endif
#else
pthread_setname_np(pthread_self(), name);
#endif
}
#if BOOST_OS_WINDOWS > 0
std::pair<DWORD, DWORD> GetWindowsVersion()
{
using RtlGetVersion_t = LONG(*)(POSVERSIONINFOEXW);
static RtlGetVersion_t pRtlGetVersion = nullptr;
if(!pRtlGetVersion)
pRtlGetVersion = (RtlGetVersion_t)GetProcAddress(GetModuleHandleA("ntdll.dll"), "RtlGetVersion");
cemu_assert(pRtlGetVersion);
OSVERSIONINFOEXW version_info{};
pRtlGetVersion(&version_info);
return { version_info.dwMajorVersion, version_info.dwMinorVersion };
}
bool IsWindows81OrGreater()
{
const auto [major, minor] = GetWindowsVersion();
return major > 6 || (major == 6 && minor >= 3);
}
bool IsWindows10OrGreater()
{
const auto [major, minor] = GetWindowsVersion();
return major >= 10;
}
#endif
fs::path GetParentProcess()
{
fs::path result;
#if BOOST_OS_WINDOWS
HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
if(hSnapshot != INVALID_HANDLE_VALUE)
{
DWORD pid = GetCurrentProcessId();
PROCESSENTRY32 pe{};
pe.dwSize = sizeof(pe);
for(BOOL ret = Process32First(hSnapshot, &pe); ret; ret = Process32Next(hSnapshot, &pe))
{
if(pe.th32ProcessID == pid)
{
HANDLE hProcess = OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, FALSE, pe.th32ParentProcessID);
if(hProcess)
{
wchar_t tmp[MAX_PATH];
DWORD size = std::size(tmp);
if (QueryFullProcessImageNameW(hProcess, 0, tmp, &size) && size > 0)
result = tmp;
CloseHandle(hProcess);
}
break;
}
}
CloseHandle(hSnapshot);
}
#else
assert_dbg();
#endif
return result;
}
std::string ltrim_copy(const std::string& s)
{
std::string result = s;
ltrim(result);
return result;
}
std::string rtrim_copy(const std::string& s)
{
std::string result = s;
rtrim(result);
return result;
}
uint32_t GetPhysicalCoreCount()
{
static uint32_t s_core_count = 0;
if (s_core_count != 0)
return s_core_count;
#if BOOST_OS_WINDOWS
auto core_count = std::thread::hardware_concurrency();
// Get physical cores
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr;
DWORD returnLength = 0;
GetLogicalProcessorInformation(buffer, &returnLength);
if (returnLength > 0)
{
buffer = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(returnLength);
if (GetLogicalProcessorInformation(buffer, &returnLength))
{
uint32_t counter = 0;
for (DWORD i = 0; i < returnLength / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION); ++i)
{
if (buffer[i].Relationship == RelationProcessorCore)
++counter;
}
if (counter > 0 && counter < core_count)
core_count = counter;
}
free(buffer);
}
s_core_count = core_count;
return core_count;
#else
return std::thread::hardware_concurrency();
#endif
}
bool TestWriteAccess(const fs::path& p)
{
// must be path and must exist
if (!fs::exists(p) || !fs::is_directory(p))
return false;
// retry 3 times
for (int i = 0; i < 3; ++i)
{
const auto filename = p / fmt::format("_{}.tmp", GenerateRandomString(8));
if (fs::exists(filename))
continue;
std::ofstream file(filename);
if (!file.is_open()) // file couldn't be created
break;
file.close();
std::error_code ec;
fs::remove(filename, ec);
return true;
}
return false;
}
// make path relative to Cemu directory
fs::path MakeRelativePath(const fs::path& path)
{
try
{
const fs::path base = ActiveSettings::GetPath();
return fs::relative(path, base);
}
catch (const std::exception&)
{
return path;
}
}
#ifdef HAS_DIRECTINPUT
bool GUIDFromString(const char* string, GUID& guid)
{
unsigned long p0;
int p1, p2, p3, p4, p5, p6, p7, p8, p9, p10;
const sint32 count = sscanf_s(string, "%08lX-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X", &p0, &p1, &p2, &p3, &p4, &p5, &p6, &p7, &p8, &p9, &p10);
if (count != 11)
return false;
guid.Data1 = p0;
guid.Data2 = p1;
guid.Data3 = p2;
guid.Data4[0] = p3;
guid.Data4[1] = p4;
guid.Data4[2] = p5;
guid.Data4[3] = p6;
guid.Data4[4] = p7;
guid.Data4[5] = p8;
guid.Data4[6] = p9;
guid.Data4[7] = p10;
return count == 11;
}
std::string StringFromGUID(const GUID& guid)
{
char temp[256];
sprintf(temp, "%08lX-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X",
guid.Data1, guid.Data2, guid.Data3,
guid.Data4[0], guid.Data4[1], guid.Data4[2], guid.Data4[3], guid.Data4[4], guid.Data4[5], guid.Data4[6], guid.Data4[7]);
return std::string(temp);
}
std::wstring WStringFromGUID(const GUID& guid)
{
wchar_t temp[256];
swprintf_s(temp, L"%08lX-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X",
guid.Data1, guid.Data2, guid.Data3,
guid.Data4[0], guid.Data4[1], guid.Data4[2], guid.Data4[3], guid.Data4[4], guid.Data4[5], guid.Data4[6], guid.Data4[7]);
return std::wstring(temp);
}
#endif
std::vector<std::string_view> TokenizeView(std::string_view str, char delimiter)
{
std::vector<std::string_view> result;
size_t last_token_index = 0;
for (auto index = str.find(delimiter); index != std::string_view::npos; index = str.find(delimiter, index + 1))
{
const auto token = str.substr(last_token_index, index - last_token_index);
result.emplace_back(token);
last_token_index = index + 1;
}
try
{
const auto token = str.substr(last_token_index);
result.emplace_back(token);
}
catch (const std::invalid_argument&) {}
return result;
}
std::vector<std::string> Tokenize(std::string_view str, char delimiter)
{
std::vector<std::string> result;
size_t last_token_index = 0;
for (auto index = str.find(delimiter); index != std::string_view::npos; index = str.find(delimiter, index + 1))
{
const auto token = str.substr(last_token_index, index - last_token_index);
result.emplace_back(token);
last_token_index = index + 1;
}
try
{
const auto token = str.substr(last_token_index);
result.emplace_back(token);
}
catch (const std::invalid_argument&) {}
return result;
}
std::string GenerateRandomString(size_t length)
{
const std::string kCharacters{
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"1234567890" };
return GenerateRandomString(length, kCharacters);
}
std::string GenerateRandomString(size_t length, std::string_view characters)
{
assert(!characters.empty());
std::stringstream result;
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<decltype(characters.size())> index_dist(0, characters.size() - 1);
for (uint32_t i = 0; i < length; ++i)
{
result << characters[index_dist(gen)];
}
return result.str();
}

239
src/util/helpers/helpers.h Normal file
View file

@ -0,0 +1,239 @@
#pragma once
#include <charconv>
#include <filesystem>
#include <string_view>
#include "util/math/vector2.h"
#include "util/math/vector3.h"
#ifdef __clang__
#include "Common/linux/fast_float.h"
#endif
template <typename TType>
constexpr auto to_underlying(TType v) noexcept
{
return static_cast<std::underlying_type_t<TType>>(v);
}
// wrapper to allow reverse iteration with range-based loops before C++20
template<typename T>
class reverse_itr {
private:
T& iterable_;
public:
explicit reverse_itr(T& iterable) : iterable_{ iterable } {}
auto begin() const { return std::rbegin(iterable_); }
auto end() const { return std::rend(iterable_); }
};
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
template<typename T>
T deg2rad(T v) { return v * static_cast<T>(M_PI) / static_cast<T>(180); }
template<typename T>
T rad2deg(T v) { return v * static_cast<T>(180) / static_cast<T>(M_PI); }
template<typename T>
Vector3<T> deg2rad(const Vector3<T>& v) { return { deg2rad(v.x), deg2rad(v.y), deg2rad(v.z) }; }
template<typename T>
Vector3<T> rad2deg(const Vector3<T>& v) { return { rad2deg(v.x), rad2deg(v.y), rad2deg(v.z) }; }
template<typename T>
Vector2<T> deg2rad(const Vector2<T>& v) { return { deg2rad(v.x), deg2rad(v.y) }; }
template<typename T>
Vector2<T> rad2deg(const Vector2<T>& v) { return { rad2deg(v.x), rad2deg(v.y) }; }
uint32_t GetPhysicalCoreCount();
// Creates a temporary file to test for write access
bool TestWriteAccess(const fs::path& p);
fs::path MakeRelativePath(const fs::path& path);
#ifdef HAS_DIRECTINPUT
bool GUIDFromString(const char* string, GUID& guid);
std::string StringFromGUID(const GUID& guid);
std::wstring WStringFromGUID(const GUID& guid);
#endif
std::vector<std::string_view> TokenizeView(std::string_view string, char delimiter);
std::vector<std::string> Tokenize(std::string_view string, char delimiter);
std::string ltrim_copy(const std::string& s);
std::string rtrim_copy(const std::string& s);
std::string& ltrim(std::string& str, const std::string& chars = "\t\n\v\f\r ");
std::string& rtrim(std::string& str, const std::string& chars = "\t\n\v\f\r ");
std::string& trim(std::string& str, const std::string& chars = "\t\n\v\f\r ");
std::string_view& ltrim(std::string_view& str, const std::string& chars = "\t\n\v\f\r ");
std::string_view& rtrim(std::string_view& str, const std::string& chars = "\t\n\v\f\r ");
std::string_view& trim(std::string_view& str, const std::string& chars = "\t\n\v\f\r ");
std::string GenerateRandomString(size_t length);
std::string GenerateRandomString(size_t length, std::string_view characters);
std::wstring GetSystemErrorMessageW();
std::wstring GetSystemErrorMessageW(DWORD error_code);
std::string GetSystemErrorMessage();
std::string GetSystemErrorMessage(DWORD error_code);
std::string GetSystemErrorMessage(const std::exception& ex);
std::string GetSystemErrorMessage(const std::error_code& ec);
template<class... Ts> struct overloaded : Ts... { using Ts::operator()...; };
template<class... Ts> overloaded(Ts...)->overloaded<Ts...>;
template<typename T>
bool equals(T v1, T v2)
{
/*
return std::fabs(x-y) <= std::numeric_limits<T>::epsilon() * std::fabs(x+y) * ulp
// unless the result is subnormal
|| std::fabs(x-y) < std::numeric_limits<T>::min();
*/
if constexpr (std::is_floating_point_v<T>)
return std::abs(v1 - v2) < (T)0.000001;
else if constexpr (std::is_same_v<T, const char*>)
return strcmp(v1, v2) == 0;
else
return v1 == v2;
}
template<typename T>
T ConvertString(std::string_view str, sint32 base)
{
if (str.empty())
return {};
static_assert(std::is_integral_v<T>);
T result;
ltrim(str);
// from_chars cant deal with hex numbers starting with "0x"
if (base == 16)
{
const sint32 index = str[0] == '-' ? 1 : 0;
if (str.size() >= 2 && str[index+0] == '0' && tolower(str[index+1]) == 'x')
str = str.substr(index + 2);
if (std::from_chars(str.data(), str.data() + str.size(), result, base).ec == std::errc())
{
if (index == 1)
{
if constexpr(std::is_unsigned_v<T>)
result = static_cast<T>(-static_cast<std::make_signed_t<T>>(result));
else
result = -result;
}
return result;
}
return {};
}
if(std::from_chars(str.data(), str.data() + str.size(), result, base).ec == std::errc())
return result;
return {};
}
template<typename T>
T ConvertString(std::string_view str)
{
if (str.empty())
return {};
T result;
ltrim(str);
if constexpr (std::is_same_v<T, bool>)
{
return str == "1" || boost::iequals(str, "true");
}
else if constexpr(std::is_floating_point_v<T>)
{
// from_chars can't deal with float conversation starting with "+"
ltrim(str, "+");
#ifdef __clang__
if (fast_float::from_chars(str.data(), str.data() + str.size(), result).ec == std::errc())
return result;
#else
if (std::from_chars(str.data(), str.data() + str.size(), result).ec == std::errc())
return result;
#endif
return {};
}
else if constexpr(std::is_enum_v<T>)
{
return (T)ConvertString<std::underlying_type_t<T>>(str);
}
else
{
const sint32 index = str[0] == '-' ? 1 : 0;
if (str.size() >= 2 && str[index + 0] == '0' && tolower(str[index + 1]) == 'x')
result = ConvertString<T>(str, 16);
else
result = ConvertString<T>(str, 10);
}
return result;
}
template <typename T>
constexpr T DegToRad(T deg) { return (T)((double)deg * M_PI / 180); }
template <typename T>
constexpr T RadToDeg(T rad) { return (T)((double)rad * 180 / M_PI); }
template<typename T>
std::string ToString(T value)
{
std::ostringstream str;
str.imbue(std::locale("C"));
str << value;
return str.str();
}
template<typename T>
T FromString(std::string value)
{
std::istringstream str(value);
str.imbue(std::locale("C"));
T tmp;
str >> tmp;
return tmp;
}
template<typename T>
size_t RemoveDuplicatesKeepOrder(std::vector<T>& vec)
{
std::set<T> tmp;
auto new_end = std::remove_if(vec.begin(), vec.end(), [&tmp](const T& value)
{
if (tmp.find(value) != std::end(tmp))
return true;
tmp.insert(value);
return false;
});
vec.erase(new_end, vec.end());
return vec.size();
}
void SetThreadName(const char* name);
inline uint64 MakeU64(uint32 high, uint32 low)
{
return ((uint64)high << 32) | ((uint64)low);
}
// MAJOR; MINOR
std::pair<DWORD, DWORD> GetWindowsVersion();
bool IsWindows81OrGreater();
bool IsWindows10OrGreater();
fs::path GetParentProcess();

View file

@ -0,0 +1,124 @@
#pragma once
#include <mutex>
template<typename T, uint32 elements, typename P = uint32>
class RingBuffer
{
public:
RingBuffer<T, elements, P>();
bool Push(const T& v);
template<class Q = T>
typename std::enable_if< !std::is_array<T>::value, Q >::type
Pop()
{
std::unique_lock<std::mutex> lock(m_mutex);
if (m_readPointer == m_writePointer)
{
return T();
}
const T& tmp = m_data[m_readPointer];
m_readPointer = (m_readPointer + 1) % elements;
return tmp;
}
T& GetSlot();
T& GetSlotAndAdvance();
void Advance();
void Clear();
P GetReadPointer();
P GetWritePointer();
bool HasData();
private:
T m_data[elements];
P m_readPointer;
P m_writePointer;
std::mutex m_mutex;
};
template <typename T, uint32 elements, typename P>
RingBuffer<T, elements, P>::RingBuffer()
: m_readPointer(0), m_writePointer(0)
{
}
template <typename T, uint32 elements, typename P>
bool RingBuffer<T, elements, P>::Push(const T& v)
{
std::unique_lock<std::mutex> lock(m_mutex);
if (m_readPointer == ((m_writePointer + 1) % elements))
{
debugBreakpoint(); // buffer is full
return false;
}
m_data[m_writePointer] = v;
m_writePointer = (m_writePointer + 1) % elements;
return true;
}
template <typename T, uint32 elements, typename P>
T& RingBuffer<T, elements, P>::GetSlot()
{
std::unique_lock<std::mutex> lock(m_mutex);
T& result = m_data[m_writePointer];
m_writePointer = (m_writePointer + 1) % elements;
return result;
}
template <typename T, uint32 elements, typename P>
T& RingBuffer<T, elements, P>::GetSlotAndAdvance()
{
std::unique_lock<std::mutex> lock(m_mutex);
T& result = m_data[m_writePointer];
m_writePointer = (m_writePointer + 1) % elements;
m_readPointer = (m_readPointer + 1) % elements;
return result;
}
template <typename T, uint32 elements, typename P>
void RingBuffer<T, elements, P>::Advance()
{
std::unique_lock<std::mutex> lock(m_mutex);
if (m_readPointer != m_writePointer)
{
m_readPointer = (m_readPointer + 1) % elements;
}
}
template <typename T, uint32 elements, typename P>
void RingBuffer<T, elements, P>::Clear()
{
std::unique_lock<std::mutex> lock(m_mutex);
m_readPointer = 0;
m_writePointer = 0;
}
template <typename T, uint32 elements, typename P>
P RingBuffer<T, elements, P>::GetReadPointer()
{
std::unique_lock<std::mutex> lock(m_mutex);
P tmp = m_readPointer;
return tmp;
}
template <typename T, uint32 elements, typename P>
P RingBuffer<T, elements, P>::GetWritePointer()
{
std::unique_lock<std::mutex> lock(m_mutex);
P tmp = m_writePointer;
return tmp;
}
template <typename T, uint32 elements, typename P>
bool RingBuffer<T, elements, P>::HasData()
{
std::unique_lock<std::mutex> lock(m_mutex);
return m_readPointer != m_writePointer;
}

View file

@ -0,0 +1,34 @@
#include "util/highresolutiontimer/HighResolutionTimer.h"
#include "Common/precompiled.h"
HighResolutionTimer HighResolutionTimer::now()
{
#if BOOST_OS_WINDOWS
LARGE_INTEGER pc;
QueryPerformanceCounter(&pc);
return HighResolutionTimer(pc.QuadPart);
#else
timespec pc;
clock_gettime(CLOCK_MONOTONIC, &pc);
uint64 nsec = (uint64)pc.tv_sec * (uint64)1000000000 + (uint64)pc.tv_nsec;
return HighResolutionTimer(nsec);
#endif
}
HRTick HighResolutionTimer::getFrequency()
{
return m_freq;
}
uint64 HighResolutionTimer::m_freq = []() -> uint64 {
#if BOOST_OS_WINDOWS
LARGE_INTEGER freq;
QueryPerformanceFrequency(&freq);
return (uint64)(freq.QuadPart);
#else
timespec pc;
clock_getres(CLOCK_MONOTONIC, &pc);
return (uint64)1000000000 / (uint64)pc.tv_nsec;
#endif
}();

View file

@ -0,0 +1,75 @@
#pragma once
using HRTick = uint64;
class HighResolutionTimer
{
public:
HighResolutionTimer()
{
m_timePoint = 0;
}
HRTick getTick() const
{
return m_timePoint;
}
uint64 getTickInSeconds() const
{
return m_timePoint / m_freq;
}
// return time difference in seconds, this is an utility function mainly intended for debugging/benchmarking purposes. Avoid using doubles for precise timing
static double getTimeDiff(HRTick startTime, HRTick endTime)
{
return (double)(endTime - startTime) / (double)m_freq;
}
// returns tick difference and frequency
static uint64 getTimeDiffEx(HRTick startTime, HRTick endTime, uint64& freq)
{
freq = m_freq;
return endTime - startTime;
}
static HighResolutionTimer now();
static HRTick getFrequency();
private:
HighResolutionTimer(uint64 timePoint) : m_timePoint(timePoint) {};
uint64 m_timePoint;
static uint64 m_freq;
};
// benchmark helper utility
// measures time between Start() and Stop() call
class BenchmarkTimer
{
public:
void Start()
{
m_startTime = HighResolutionTimer::now().getTick();
}
void Stop()
{
m_stopTime = HighResolutionTimer::now().getTick();
}
double GetElapsedMilliseconds() const
{
cemu_assert_debug(m_startTime != 0 && m_stopTime != 0);
cemu_assert_debug(m_startTime <= m_stopTime);
uint64 tickDif = m_stopTime - m_startTime;
double freq = (double)HighResolutionTimer::now().getFrequency();
double elapsedMS = (double)tickDif * 1000.0 / freq;
return elapsedMS;
}
private:
HRTick m_startTime{};
HRTick m_stopTime{};
};

View file

@ -0,0 +1,74 @@
#include "libusbWrapper.h"
/*
#include "config/ActiveSettings.h"
libusbWrapper::libusbWrapper()
{
}
void libusbWrapper::init()
{
#if BOOST_OS_WINDOWS
if (m_isInitialized)
return;
m_isInitialized = true;
// load module
m_module = LoadLibraryW(L"libusb-1.0.dll");
if (!m_module)
{
const auto path = ActiveSettings::GetPath("resources/libusb-1.0.dll");
m_module = LoadLibraryW(path.generic_wstring().c_str());
if (!m_module)
{
cemuLog_log(LogType::Force, "libusbWrapper: can't load libusb-1.0.dll");
return;
}
}
// grab imports
#define FETCH_IMPORT(__NAME__) p_##__NAME__ = (decltype(&__NAME__))GetProcAddress(m_module, #__NAME__)
FETCH_IMPORT(libusb_init);
FETCH_IMPORT(libusb_exit);
FETCH_IMPORT(libusb_interrupt_transfer);
FETCH_IMPORT(libusb_get_device_list);
FETCH_IMPORT(libusb_get_device_descriptor);
FETCH_IMPORT(libusb_open);
FETCH_IMPORT(libusb_close);
FETCH_IMPORT(libusb_kernel_driver_active);
FETCH_IMPORT(libusb_detach_kernel_driver);
FETCH_IMPORT(libusb_claim_interface);
FETCH_IMPORT(libusb_free_device_list);
FETCH_IMPORT(libusb_get_config_descriptor);
FETCH_IMPORT(libusb_hotplug_register_callback);
FETCH_IMPORT(libusb_hotplug_deregister_callback);
FETCH_IMPORT(libusb_has_capability);
FETCH_IMPORT(libusb_error_name);
FETCH_IMPORT(libusb_get_string_descriptor);
FETCH_IMPORT(libusb_get_string_descriptor_ascii);
FETCH_IMPORT(libusb_free_config_descriptor);
#undef FETCH_IMPORT
// create default context
if (p_libusb_init)
p_libusb_init(nullptr);
#else
cemuLog_log(LogType::Force, "libusbWrapper: Not supported on this OS");
#endif
}
libusbWrapper::~libusbWrapper()
{
#if BOOST_OS_WINDOWS > 0
// destroy default context
if(p_libusb_exit)
p_libusb_exit(nullptr);
// unload module
if(m_module)
FreeLibrary(m_module);
#endif
}
*/

View file

@ -0,0 +1,50 @@
#pragma once
// todo - port to cmake build
/*
#include "util/helpers/ClassWrapper.h"
#pragma warning(disable:4200)
#include "libusb-1.0/libusb.h"
#pragma warning(default:4200)
class libusbWrapper : public SingletonRef<libusbWrapper>
{
public:
libusbWrapper();
~libusbWrapper();
void init();
bool isAvailable() const { return p_libusb_init != nullptr; };
decltype(&libusb_init) p_libusb_init = nullptr;
decltype(&libusb_exit) p_libusb_exit = nullptr;
decltype(&libusb_interrupt_transfer) p_libusb_interrupt_transfer;
decltype(&libusb_get_device_list) p_libusb_get_device_list;
decltype(&libusb_get_device_descriptor) p_libusb_get_device_descriptor;
decltype(&libusb_open) p_libusb_open;
decltype(&libusb_kernel_driver_active) p_libusb_kernel_driver_active;
decltype(&libusb_detach_kernel_driver) p_libusb_detach_kernel_driver;
decltype(&libusb_claim_interface) p_libusb_claim_interface;
decltype(&libusb_free_device_list) p_libusb_free_device_list;
decltype(&libusb_get_config_descriptor) p_libusb_get_config_descriptor;
decltype(&libusb_free_config_descriptor) p_libusb_free_config_descriptor;
decltype(&libusb_close) p_libusb_close;
decltype(&libusb_hotplug_register_callback) p_libusb_hotplug_register_callback;
decltype(&libusb_hotplug_deregister_callback) p_libusb_hotplug_deregister_callback;
decltype(&libusb_has_capability) p_libusb_has_capability;
decltype(&libusb_error_name) p_libusb_error_name;
decltype(&libusb_get_string_descriptor) p_libusb_get_string_descriptor;
decltype(&libusb_get_string_descriptor_ascii) p_libusb_get_string_descriptor_ascii;
private:
#if BOOST_OS_WINDOWS > 0
HMODULE m_module = nullptr;
bool m_isInitialized = false;
#endif
};
*/

36
src/util/math/glm.h Normal file
View file

@ -0,0 +1,36 @@
#pragma once
namespace glm
{
inline quat normalize_xyz(const quat& q)
{
const auto xyzTargetLength = std::sqrt(1.0f - q.w * q.w);
const auto lengthScaler = xyzTargetLength / sqrtf(q.x * q.x + q.y * q.y + q.z * q.z);
return quat(q.w, q.x * lengthScaler, q.y * lengthScaler, q.z * lengthScaler);
}
inline vec3 GetVectorX(const quat& q)
{
return vec3(
2.0f * (q.w * q.w + q.x * q.x) - 1.0f,
2.0f * (q.x * q.y - q.w * q.z),
2.0f * (q.x * q.z + q.w * q.y));
}
inline vec3 GetVectorY(const quat& q)
{
return vec3(
2.0f * (q.x * q.y + q.w * q.z),
2.0f * (q.w * q.w + q.y * q.y) - 1.0f,
2.0f * (q.y * q.z - q.w * q.x)
);
}
inline vec3 GetVectorZ(const quat& q)
{
return vec3 (
2.0f * (q.x * q.z - q.w * q.y),
2.0f * (q.y * q.z + q.w * q.x),
2.0f * (q.w * q.w + q.z * q.z) - 1.0f);
}
}

217
src/util/math/quaternion.h Normal file
View file

@ -0,0 +1,217 @@
#pragma once
#include <tuple>
#include <wx/math.h>
#include "util/math/vector3.h"
#define DEG2RAD(__d__) ((__d__ * M_PI) / 180.0f )
#define RAD2DEG(__r__) ((__r__ * 180.0f) / M_PI)
template<typename T>
class Quaternion
{
public:
T w;
T x;
T y;
T z;
Quaternion();
Quaternion(const T& w, const T& x, const T& y, const T& z);
Quaternion(float x, float y, float z);
void Assign(float w, float x, float y, float z)
{
this->w = w;
this->x = x;
this->y = y;
this->z = z;
}
static Quaternion FromAngleAxis(float inAngle, float inX, float inY, float inZ);
std::tuple<Vector3<T>, Vector3<T>, Vector3<T>> GetRotationMatrix();
std::tuple<Vector3<T>, Vector3<T>, Vector3<T>> GetTransposedRotationMatrix();
// normalize but keep W
void NormalizeXYZ()
{
const T xyzTargetLength = sqrtf((T)1.0 - w * w);
const T lengthScaler = xyzTargetLength / sqrtf(x * x + y * y + z * z);
x *= lengthScaler;
y *= lengthScaler;
z *= lengthScaler;
}
void NormalizeXYZW()
{
const T lengthScaler = 1.0f / sqrtf(w * w + x * x + y * y + z * z);
w *= lengthScaler;
x *= lengthScaler;
y *= lengthScaler;
z *= lengthScaler;
}
Vector3<T> GetVectorX() const
{
return Vector3<T>(
2.0f * (w * w + x * x) - 1.0f,
2.0f * (x * y - w * z),
2.0f * (x * z + w * y));
}
Vector3<T> GetVectorY() const
{
return Vector3<T>(
2.0f * (x * y + w * z),
2.0f * (w * w + y * y) - 1.0f,
2.0f * (y * z - w * x)
);
}
Vector3<T> GetVectorZ() const
{
return Vector3<T>(
2.0f * (x * z - w * y),
2.0f * (y * z + w * x),
2.0f * (w * w + z * z) - 1.0f);
}
Quaternion& operator*=(const Quaternion<T>& rhs)
{
Assign(w * rhs.w - x * rhs.x - y * rhs.y - z * rhs.z,
w * rhs.x + x * rhs.w + y * rhs.z - z * rhs.y,
w * rhs.y - x * rhs.z + y * rhs.w + z * rhs.x,
w * rhs.z + x * rhs.y - y * rhs.x + z * rhs.w);
return *this;
}
Quaternion& operator+=(const Quaternion<T>& rhs)
{
w += rhs.w;
x += rhs.x;
y += rhs.y;
z += rhs.z;
return *this;
}
friend Quaternion operator*(Quaternion<T> lhs, const Quaternion<T>& rhs)
{
lhs *= rhs;
return lhs;
}
};
template <typename T>
Quaternion<T>::Quaternion()
: w(), x(), y(), z()
{}
template <typename T>
Quaternion<T>::Quaternion(const T& w, const T& x, const T& y, const T& z)
: w(w), x(x), y(y), z(z)
{}
template <typename T>
Quaternion<T>::Quaternion(float x, float y, float z)
{
float pitch = DEG2RAD(x);
float yaw = DEG2RAD(y);
float roll = DEG2RAD(z);
float cyaw = cos(0.5f * yaw);
float cpitch = cos(0.5f * pitch);
float croll = cos(0.5f * roll);
float syaw = sin(0.5f * yaw);
float spitch = sin(0.5f * pitch);
float sroll = sin(0.5f * roll);
float cyawcpitch = cyaw * cpitch;
float syawspitch = syaw * spitch;
float cyawspitch = cyaw * spitch;
float syawcpitch = syaw * cpitch;
this->w = cyawcpitch * croll + syawspitch * sroll;
this->x = cyawspitch * croll + syawcpitch * sroll;
this->y = syawcpitch * croll - cyawspitch * sroll;
this->z = cyawcpitch * sroll - syawspitch * croll;
}
template<typename T>
Quaternion<T> Quaternion<T>::FromAngleAxis(float inAngle, float inX, float inY, float inZ)
{
Quaternion<T> result = Quaternion<T>(cosf(inAngle * 0.5f), inX, inY, inZ);
result.NormalizeXYZ();
return result;
}
template <typename T>
std::tuple<Vector3<T>, Vector3<T>, Vector3<T>> Quaternion<T>::GetRotationMatrix()
{
float sqw = w*w;
float sqx = x*x;
float sqy = y*y;
float sqz = z*z;
// invs (inverse square length) is only required if quaternion is not already normalised
float invs = 1.0f / (sqx + sqy + sqz + sqw);
Vector3<T> v1, v2, v3;
v1.x = (sqx - sqy - sqz + sqw) * invs; // since sqw + sqx + sqy + sqz =1/invs*invs
v2.y = (-sqx + sqy - sqz + sqw) * invs;
v3.z = (-sqx - sqy + sqz + sqw) * invs;
float tmp1 = x*y;
float tmp2 = z*w;
v2.x = 2.0 * (tmp1 + tmp2)*invs;
v1.y = 2.0 * (tmp1 - tmp2)*invs;
tmp1 = x*z;
tmp2 = y*w;
v3.x = 2.0 * (tmp1 - tmp2)*invs;
v1.z = 2.0 * (tmp1 + tmp2)*invs;
tmp1 = y*z;
tmp2 = x*w;
v3.y = 2.0 * (tmp1 + tmp2)*invs;
v2.z = 2.0 * (tmp1 - tmp2)*invs;
return std::make_tuple(v1, v2, v3);
}
template <typename T>
std::tuple<Vector3<T>, Vector3<T>, Vector3<T>> Quaternion<T>::GetTransposedRotationMatrix()
{
float sqw = w*w;
float sqx = x*x;
float sqy = y*y;
float sqz = z*z;
// invs (inverse square length) is only required if quaternion is not already normalised
float invs = 1.0f / (sqx + sqy + sqz + sqw);
Vector3<T> v1, v2, v3;
v1.x = (sqx - sqy - sqz + sqw) * invs; // since sqw + sqx + sqy + sqz =1/invs*invs
v2.y = (-sqx + sqy - sqz + sqw) * invs;
v3.z = (-sqx - sqy + sqz + sqw) * invs;
float tmp1 = x*y;
float tmp2 = z*w;
v1.y = 2.0 * (tmp1 + tmp2)*invs;
v2.x = 2.0 * (tmp1 - tmp2)*invs;
tmp1 = x*z;
tmp2 = y*w;
v1.z = 2.0 * (tmp1 - tmp2)*invs;
v3.x = 2.0 * (tmp1 + tmp2)*invs;
tmp1 = y*z;
tmp2 = x*w;
v2.z = 2.0 * (tmp1 + tmp2)*invs;
v3.y = 2.0 * (tmp1 - tmp2)*invs;
return std::make_tuple(v1, v2, v3);
}
using Quaternionf = Quaternion<float>;

150
src/util/math/vector2.h Normal file
View file

@ -0,0 +1,150 @@
#pragma once
#include <cassert>
template <typename T>
class Vector2
{
public:
T x;
T y;
Vector2()
: x{}, y{} {}
Vector2(T x, T y)
: x(x), y(y) {}
template <typename U = T>
Vector2(const Vector2<U>& v)
: x((T)v.x), y((T)v.y) {}
float Length() const
{
return (float)std::sqrt((x * x) + (y * y));
}
float Cross(const Vector2& v) const
{
return x * v.y - y * v.x;
}
float Dot(const Vector2& v) const
{
return x * v.x + y * v.y;
}
Vector2 Ortho() const
{
return Vector2(y, -x);
}
Vector2 Normalized() const
{
const auto len = Length();
if (len == 0)
return Vector2<T>();
return Vector2<T>((T)((float)x / len), (T)((float)y / len));
}
Vector2& Normalize()
{
const auto len = Length();
if (len != 0)
{
x = (T)((float)x / len);
y = (T)((float)y / len);
}
return *this;
}
static Vector2 Max(const Vector2& v1, const Vector2& v2)
{
return Vector2(std::max(v1.x, v2.x), std::max(v1.y, v2.y));
}
static Vector2 Min(const Vector2& v1, const Vector2& v2)
{
return Vector2(std::min(v1.x, v2.x), std::min(v1.y, v2.y));
}
bool operator==(const Vector2& v) const
{
return x == v.x && y == v.y;
}
bool operator!=(const Vector2& v) const
{
return !(*this == v);
}
Vector2& operator+=(const Vector2& v)
{
x += v.x;
y += v.y;
return *this;
}
Vector2& operator-=(const Vector2& v)
{
x -= v.x;
y -= v.y;
return *this;
}
Vector2 operator+(const Vector2& v) const
{
return Vector2(x + v.x, y + v.y);
}
Vector2 operator-(const Vector2& v) const
{
return Vector2(x - v.x, y - v.y);
}
Vector2& operator+=(const T& v)
{
x += v;
y += v;
return *this;
}
Vector2& operator-=(const T& v)
{
x -= v;
y -= v;
return *this;
}
Vector2& operator*=(const T& v)
{
x *= v;
y *= v;
return *this;
}
Vector2& operator/=(const T& v)
{
assert(v != 0);
x /= v;
y /= v;
return *this;
}
Vector2 operator+(const T& v)
{
return Vector2(x + v, y + v);
}
Vector2 operator-(const T& v)
{
return Vector2(x - v, y - v);
}
Vector2 operator*(const T& v)
{
return Vector2(x * v, y * v);
}
Vector2 operator/(const T& v)
{
assert(v != 0);
return Vector2(x / v, y / v);
}
};
using Vector2f = Vector2<float>;
using Vector2i = Vector2<int>;

233
src/util/math/vector3.h Normal file
View file

@ -0,0 +1,233 @@
#pragma once
#include <cassert>
template <typename T>
class Vector3 {
public:
T x;
T y;
T z;
Vector3() : x{}, y{}, z{} {}
Vector3(T x, T y, T z) : x(x), y(y), z(z) {}
template <typename U=T>
Vector3(const Vector3<U>& v)
: x((T)v.x), y((T)v.y), z((T)v.z) {}
float Length() const
{
return std::sqrt((x * x) + (y * y) + (z * z));
}
float Dot(const Vector3& v) const
{
return x * v.x + y * v.y + z * v.z;
}
Vector3 Cross(const Vector3& v) const
{
return Vector3(y * v.z - z * v.y, z * v.x - x * v.z, x * v.y - y * v.x);
}
Vector3 Normalized() const
{
const auto len = Length();
if (len == 0)
return {};
return *this / len;
}
Vector3& Normalize()
{
const auto len = Length();
if (len != 0)
*this /= len;
return *this;
}
//Vector3& Scale(const Vector3& v)
//{
// *this *= v;
// return *this;
//}
void Scale(const float s)
{
this->x *= s;
this->y *= s;
this->z *= s;
}
Vector3& RotateX(float theta);
Vector3& RotateY(float theta);
Vector3& RotateZ(float theta);
bool operator==(const Vector3& v)
{
return x == v.x && y == v.y && z == v.z;
}
bool operator!=(const Vector3& v)
{
return !(*this == v);
}
template <typename U = T>
Vector3& operator+=(const Vector3<U>& v)
{
x += (T)v.x;
y += (T)v.y;
z += (T)v.z;
return *this;
}
template <typename U = T>
Vector3& operator-=(const Vector3<U>& v)
{
x -= (T)v.x;
y -= (T)v.y;
z -= (T)v.z;
return *this;
}
template <typename U = T>
Vector3& operator*=(const Vector3<U>& v)
{
x *= (T)v.x;
y *= (T)v.y;
z *= (T)v.z;
return *this;
}
template <typename U = T>
Vector3& operator/=(const Vector3<U>& v)
{
assert(v.x != 0 && v.y != 0 && v.z != 0);
x /= (T)v.x;
y /= (T)v.y;
z /= (T)v.z;
return *this;
}
template <typename U = T>
Vector3 operator+(const Vector3<U>& v) const
{
return Vector3(x + (T)v.x, y + (T)v.y, z + (T)v.z);
}
template <typename U = T>
Vector3 operator-(const Vector3<U>& v) const
{
return Vector3(x - (T)v.x, y - (T)v.y, z - (T)v.z);
}
template <typename U = T>
Vector3 operator*(const Vector3<U>& v) const
{
return Vector3(x * (T)v.x, y * (T)v.y, z * (T)v.z);
}
template <typename U = T>
Vector3 operator/(const Vector3<U>& v) const
{
assert(v.x != 0 && v.y != 0 && v.z != 0);
return Vector3(x / (T)v.x, y / (T)v.y, z / (T)v.z);
}
Vector3& operator+=(T v)
{
x += v;
y += v;
z += v;
return *this;
}
Vector3& operator-=(T v)
{
x -= v;
y -= v;
z -= v;
return *this;
}
Vector3& operator*=(T v)
{
x *= v;
y *= v;
z *= v;
return *this;
}
Vector3& operator/=(T v)
{
assert(v != 0);
x /= v;
y /= v;
z /= v;
return *this;
}
Vector3 operator+(T v) const
{
return Vector3(x + v, y + v, z + v);
}
Vector3 operator-(T v) const
{
return Vector3(x - v, y - v, z - v);
}
Vector3 operator*(T v) const
{
return Vector3(x * v, y * v, z * v);
}
Vector3 operator/(T v) const
{
assert(v != 0);
return Vector3(x / (T)v, y / (T)v, z / (T)v);
}
bool IsZero() const
{
return x == 0 && y == 0 && z == 0;
}
bool HasZero() const
{
return x == 0 || y == 0 || z == 0;
}
};
template <typename T>
Vector3<T>& Vector3<T>::RotateX(float theta)
{
const float sin = std::sin(theta);
const float cos = std::cos(theta);
y = y * cos - z * sin;
z = y * sin + z * cos;
return *this;
}
template <typename T>
Vector3<T>& Vector3<T>::RotateY(float theta)
{
const float sin = std::sin(theta);
const float cos = std::cos(theta);
x = x * cos + z * sin;
z = -x * sin + z * cos;
return *this;
}
template <typename T>
Vector3<T>& Vector3<T>::RotateZ(float theta)
{
const float sin = std::sin(theta);
const float cos = std::cos(theta);
x = x * cos - y * sin;
y = x * sin + y * cos;
return *this;
}
using Vector3f = Vector3<float>;
using Vector3i = Vector3<int>;

File diff suppressed because it is too large Load diff

2263
src/util/tinyxml2/tinyxml2.h Normal file

File diff suppressed because it is too large Load diff