Cemu/src/Cafe/HW/Latte/LatteAddrLib/LatteAddrLib_Coord.cpp
2025-03-22 18:57:37 +01:00

405 lines
No EOL
15 KiB
C++

#include "Cafe/HW/Latte/Core/Latte.h"
#include "Cafe/HW/Latte/LatteAddrLib/LatteAddrLib.h"
#include "Cafe/OS/libs/gx2/GX2_Surface.h"
using namespace Latte;
namespace LatteAddrLib
{
#if BOOST_OS_LINUX || BOOST_OS_MACOS
unsigned char _BitScanReverse(uint32* _Index, uint32 _Mask)
{
if (!_Mask)
return 0;
*_Index = 31 - __builtin_clzl(_Mask);
return 1;
}
#endif
static constexpr uint32 bankSwapOrder[] = { 0, 1, 3, 2 };
uint32 _GetMicroTileType(bool isDepth)
{
return isDepth ? 1 : 0;
}
uint32 _ComputePixelIndexWithinMicroTile(uint32 x, uint32 y, uint32 z, uint32 bpp, E_HWTILEMODE tileMode, uint32 microTileType)
{
cemu_assert_debug(microTileType == 0 || microTileType == 1);
uint32 pixelBit0, pixelBit1, pixelBit2, pixelBit3, pixelBit4, pixelBit5, pixelBit6, pixelBit7, pixelBit8;
pixelBit6 = 0;
pixelBit7 = 0;
pixelBit8 = 0;
uint32 thickness = LatteAddrLib::TM_GetThickness(tileMode);
if (microTileType)
{
pixelBit0 = x & 1;
pixelBit1 = y & 1;
pixelBit2 = (x & 2) >> 1;
pixelBit3 = (y & 2) >> 1;
pixelBit4 = (x & 4) >> 2;
pixelBit5 = (y & 4) >> 2;
}
else
{
switch (bpp)
{
case 8:
pixelBit0 = x & 1;
pixelBit1 = (x & 2) >> 1;
pixelBit2 = (x & 4) >> 2;
pixelBit3 = (y & 2) >> 1;
pixelBit4 = y & 1;
pixelBit5 = (y & 4) >> 2;
break;
case 0x10:
pixelBit0 = x & 1;
pixelBit1 = (x & 2) >> 1;
pixelBit2 = (x & 4) >> 2;
pixelBit3 = y & 1;
pixelBit4 = (y & 2) >> 1;
pixelBit5 = (y & 4) >> 2;
break;
case 0x20:
case 0x60:
pixelBit0 = x & 1;
pixelBit1 = (x & 2) >> 1;
pixelBit2 = y & 1;
pixelBit3 = (x & 4) >> 2;
pixelBit4 = (y & 2) >> 1;
pixelBit5 = (y & 4) >> 2;
break;
case 0x40:
pixelBit0 = x & 1;
pixelBit1 = y & 1;
pixelBit2 = (x & 2) >> 1;
pixelBit3 = (x & 4) >> 2;
pixelBit4 = (y & 2) >> 1;
pixelBit5 = (y & 4) >> 2;
break;
case 0x80:
pixelBit0 = y & 1;
pixelBit1 = x & 1;
pixelBit2 = (x & 2) >> 1;
pixelBit3 = (x & 4) >> 2;
pixelBit4 = (y & 2) >> 1;
pixelBit5 = (y & 4) >> 2;
break;
default:
pixelBit0 = x & 1;
pixelBit1 = (x & 2) >> 1;
pixelBit2 = y & 1;
pixelBit3 = (x & 4) >> 2;
pixelBit4 = (y & 2) >> 1;
pixelBit5 = (y & 4) >> 2;
break;
}
}
if (thickness > 1)
{
pixelBit6 = z & 1;
pixelBit7 = (z & 2) >> 1;
}
return (pixelBit8 << 8) | (pixelBit7 << 7) | (pixelBit6 << 6) | (pixelBit5 << 5) | (pixelBit4 << 4) | (pixelBit3 << 3) | (pixelBit2 << 2) | (pixelBit1 << 1) | pixelBit0;
}
uint32 _ComputePipeFromCoordWoRotation(uint32 x, uint32 y)
{
// hardcoded to assume 2 pipes
uint32 pipe;
pipe = ((y >> 3) ^ (x >> 3)) & 1;
return pipe;
}
uint32 _ComputeBankFromCoordWoRotation(uint32 x, uint32 y)
{
uint32 bank;
if (m_banks == 4)
{
uint32 bankNew = (y >> 4) & 3;
bankNew = ((bankNew >> 1) | (bankNew << 1)); // swap lowest two bits
bankNew ^= (x >> 3);
bankNew &= 3;
bank = bankNew;
}
else if (m_banks == 8)
{
cemu_assert_unimplemented();
bank = 0;
}
else
{
bank = 0;
}
return bank;
}
uint32 ComputeSurfaceAddrFromCoordLinear(uint32 x, uint32 y, uint32 slice, uint32 sample, uint32 bpp, uint32 pitch, uint32 height, uint32 numSlices)
{
uint32 pixelIndex = x + pitch * y + (slice + numSlices * sample) * height * pitch;
return (pixelIndex * bpp) / 8;
}
uint32 ComputeSurfaceAddrFromCoordMicroTiled(uint32 x, uint32 y, uint32 slice, uint32 bpp, uint32 pitch, uint32 height, Latte::E_HWTILEMODE tileMode, bool isDepth)
{
uint32 microTileThickness = (tileMode == Latte::E_HWTILEMODE::TM_1D_TILED_THICK) ? 4 : 1;
uint32 microTilesPerRow = pitch >> 3;
uint32 microTileIndexX = x >> 3;
uint32 microTileIndexY = y >> 3;
uint32 microTileBytes = microTileThickness * (((bpp << 6) + 7) >> 3); // each tile is 8x8 or 8x8x4
uint32 microTileOffset = microTileBytes * (uint64)((x >> 3) + (pitch >> 3) * (y >> 3));
uint32 sliceBytes = (height * (uint64)pitch * microTileThickness * bpp + 7) / 8;
uint32 sliceOffset = sliceBytes * (slice / microTileThickness);
uint32 pixelIndex = _ComputePixelIndexWithinMicroTile(x, y, slice, bpp, tileMode, _GetMicroTileType(isDepth));
uint32 pixelOffset = bpp * pixelIndex;
pixelOffset >>= 3;
return pixelOffset + microTileOffset + sliceOffset;
}
uint32 ComputeSurfaceAddrFromCoordMacroTiled(uint32 x, uint32 y, uint32 slice, uint32 sample, uint32 bpp, uint32 pitch, uint32 height, uint32 numSamples, Latte::E_HWTILEMODE tileMode, bool isDepth, uint32 pipeSwizzle, uint32 bankSwizzle)
{
uint32 microTileThickness = LatteAddrLib::TM_GetThickness((E_HWTILEMODE)tileMode);
uint32 microTileBits = numSamples * bpp * (microTileThickness * (8 * 8));
uint32 microTileBytes = microTileBits >> 3;
uint32 pixelIndex = _ComputePixelIndexWithinMicroTile(x, y, slice, bpp, tileMode, _GetMicroTileType(isDepth));
uint32 sampleOffset, pixelOffset;
if (isDepth)
{
sampleOffset = bpp * sample;
pixelOffset = numSamples * bpp * pixelIndex;
}
else
{
sampleOffset = sample * (microTileBits / numSamples);
pixelOffset = bpp * pixelIndex;
}
uint32 elemOffset = pixelOffset + sampleOffset;
uint32 bytesPerSample = microTileBytes / numSamples;
uint32 sampleSlice, numSampleSplits;
if (numSamples <= 1 || microTileBytes <= m_splitSize)
{
numSampleSplits = 1;
sampleSlice = 0;
}
else
{
uint32 samplesPerSlice = m_splitSize / bytesPerSample;
numSampleSplits = numSamples / samplesPerSlice;
numSamples = samplesPerSlice;
sampleSlice = elemOffset / (microTileBits / numSampleSplits);
elemOffset %= microTileBits / numSampleSplits;
}
elemOffset >>= 3;
uint32 pipe = _ComputePipeFromCoordWoRotation(x, y);
uint32 bank = _ComputeBankFromCoordWoRotation(x, y);
uint32 bankPipe = pipe + m_pipes * bank;
uint32 rotation = ComputeSurfaceRotationFromTileMode(tileMode);
uint32 swizzle = pipeSwizzle + m_pipes * bankSwizzle;
uint32 sliceIn = slice;
if (TM_IsThickAndMacroTiled(tileMode))
sliceIn >>= 2;
bankPipe ^= m_pipes * sampleSlice * ((m_banks >> 1) + 1) ^ (swizzle + sliceIn * rotation);
bankPipe %= m_pipes * m_banks;
pipe = bankPipe % m_pipes;
bank = bankPipe / m_pipes;
uint64 sliceBytes = (((uint64)height * pitch * microTileThickness * bpp * numSamples + 7) / 8);
uint64 sliceOffset = sliceBytes * ((sampleSlice + numSampleSplits * slice) / microTileThickness);
uint32 macroTilePitch = 8 * m_banks;
uint32 macroTileHeight = 8 * m_pipes;
switch (tileMode)
{
case Latte::E_HWTILEMODE::TM_2D_TILED_THIN2:
case Latte::E_HWTILEMODE::TM_2B_TILED_THIN2:
macroTilePitch >>= 1;
macroTileHeight <<= 1;
break;
case Latte::E_HWTILEMODE::TM_2D_TILED_THIN4:
case Latte::E_HWTILEMODE::TM_2B_TILED_THIN4:
macroTilePitch >>= 2;
macroTileHeight <<= 2;
break;
default:
break;
}
uint32 macroTilesPerRow = pitch / macroTilePitch;
uint32 macroTileBytes = (numSamples * microTileThickness * bpp * macroTileHeight * macroTilePitch + 7) >> 3;
uint32 macroTileIndexX = x / macroTilePitch;
uint32 macroTileIndexY = y / macroTileHeight;
uint32 macroTileOffset = (x / macroTilePitch + pitch / macroTilePitch * (y / macroTileHeight)) * macroTileBytes;
if (TM_IsBankSwapped(tileMode))
{
uint32 bankSwapWidth = ComputeSurfaceBankSwappedWidth(tileMode, bpp, numSamples, pitch);
uint32 swapIndex = macroTilePitch * macroTileIndexX / bankSwapWidth;
uint32 bankMask = m_banks - 1;
bank ^= bankSwapOrder[swapIndex & bankMask];
}
uint32 pipeOffset = (pipe << m_pipeInterleaveBytesBitcount);
uint32 bankOffset = (bank << (m_pipesBitcount + m_pipeInterleaveBytesBitcount));
uint32 numSwizzleBits = (m_banksBitcount + m_pipesBitcount);
uint32 macroSliceOffset = (uint32)((macroTileOffset + sliceOffset) >> numSwizzleBits);
macroSliceOffset += elemOffset;
uint32 macroSliceOffsetHigh = macroSliceOffset & ~((1 << m_pipeInterleaveBytesBitcount) - 1);
uint32 macroSliceOffsetLow = macroSliceOffset & ((1 << m_pipeInterleaveBytesBitcount) - 1);
uint32 finalMacroTileOffset = (macroSliceOffsetHigh << numSwizzleBits) | macroSliceOffsetLow;
return finalMacroTileOffset | pipeOffset | bankOffset;
}
void SetupCachedSurfaceAddrInfo(CachedSurfaceAddrInfo* info, uint32 slice, uint32 sample, uint32 bpp, uint32 pitch, uint32 height, uint32 depth, uint32 numSamples, E_HWTILEMODE tileMode, int isDepth, uint32 pipeSwizzle, uint32 bankSwizzle)
{
info->slice = slice;
info->sample = sample;
info->bpp = bpp;
info->pitch = pitch;
info->height = height;
info->depth = depth;
info->numSamples = numSamples;
info->tileMode = tileMode;
info->isDepth = isDepth;
info->pipeSwizzle = pipeSwizzle;
info->bankSwizzle = bankSwizzle;
// calculate static info
info->microTileThickness = LatteAddrLib::TM_GetThickness((E_HWTILEMODE)tileMode);
info->microTileBits = info->numSamples * info->bpp * (info->microTileThickness * (8 * 8));
info->microTileBytes = info->microTileBits >> 3;
info->microTileType = (info->isDepth != 0) ? 1 : 0;
cemu_assert_debug(sample == 0); // non-zero not supported
info->rotation = ComputeSurfaceRotationFromTileMode((E_HWTILEMODE)tileMode);
// macro tile
info->macroTilePitch = 8 * m_banks;
info->macroTileHeight = 8 * m_pipes;
switch (info->tileMode)
{
case E_HWTILEMODE::TM_2D_TILED_THIN2:
case E_HWTILEMODE::TM_2B_TILED_THIN2:
info->macroTilePitch >>= 1;
info->macroTileHeight <<= 1;
break;
case E_HWTILEMODE::TM_2D_TILED_THIN4:
case E_HWTILEMODE::TM_2B_TILED_THIN4:
info->macroTilePitch >>= 2;
info->macroTileHeight <<= 2;
break;
default:
break;
}
_BitScanReverse((DWORD*)&info->macroTilePitchBits, info->macroTilePitch);
_BitScanReverse((DWORD*)&info->macroTileHeightBits, info->macroTileHeight);
info->macroTilesPerRow = info->pitch / info->macroTilePitch;
info->macroTileBytes = (info->numSamples * info->microTileThickness * info->bpp * info->macroTileHeight * info->macroTilePitch + 7) >> 3;
// slice
info->sliceBytes = (info->height * (uint64)info->pitch * info->microTileThickness * info->bpp * info->numSamples + 7) / 8;
info->sliceIn = info->slice;
if (TM_IsThickAndMacroTiled(tileMode))
info->sliceIn >>= 2;
// bank swap
if (TM_IsBankSwapped(tileMode))
info->bankSwapWidth = ComputeSurfaceBankSwappedWidth(tileMode, info->bpp, info->numSamples, info->pitch);
// pixel offset multiplier
if (info->isDepth)
{
info->pixelOffsetMul = info->numSamples * info->bpp;
}
else
{
info->pixelOffsetMul = info->bpp;
}
info->bytesPerPixel = info->pixelOffsetMul >> 3;
// table for micro tile offset calculation (we could pre-generate these)
for (sint32 z = 0; z < 8; z++)
{
for (sint32 y = 0; y < 8; y++)
{
for (sint32 x = 0; x < 8; x++)
{
uint16 v = _ComputePixelIndexWithinMicroTile(x, y, z, info->bpp, info->tileMode, info->microTileType);
info->microTilePixelIndexTable[x + y * 8 + z * 8 * 8] = v;
}
}
}
// other constant values
uint32 swizzle = info->pipeSwizzle + m_pipes * info->bankSwizzle;
info->c0 = (swizzle + info->sliceIn * info->rotation);
}
uint32 ComputeSurfaceAddrFromCoordMacroTiledCached(uint32 x, uint32 y, CachedSurfaceAddrInfo* info)
{
uint32 numSamples = info->numSamples;
uint32 pixelIndex = (uint32)info->microTilePixelIndexTable[(x & 7) + ((y & 7) << 3) + ((info->slice & 7) << 6)];
uint32 pixelOffset = pixelIndex * info->pixelOffsetMul;
uint32 bytesPerSample = info->microTileBytes / numSamples;
uint32 sampleSlice, numSampleSplits, samplesPerSlice;
if (numSamples <= 1 || info->microTileBytes <= m_splitSize)
{
samplesPerSlice = numSamples;
numSampleSplits = 1;
sampleSlice = 0;
}
else
{
samplesPerSlice = m_splitSize / bytesPerSample;
numSampleSplits = numSamples / samplesPerSlice;
numSamples = samplesPerSlice;
sampleSlice = pixelOffset / (info->microTileBits / numSampleSplits);
pixelOffset %= info->microTileBits / numSampleSplits;
}
pixelOffset >>= 3;
uint32 pipe = _ComputePipeFromCoordWoRotation(x, y);
uint32 bank = _ComputeBankFromCoordWoRotation(x, y);
uint32 bankPipe = pipe + m_pipes * bank;
bankPipe ^= m_pipes * sampleSlice * ((m_banks >> 1) + 1) ^ info->c0;
bankPipe %= m_pipes * m_banks;
pipe = bankPipe % m_pipes;
bank = bankPipe / m_pipes;
uint32 sliceOffset = info->sliceBytes * ((sampleSlice + numSampleSplits * info->slice) / info->microTileThickness);
uint32 macroTileIndexX = x >> info->macroTilePitchBits;
uint32 macroTileIndexY = y >> info->macroTileHeightBits;
uint32 macroTileOffset = (macroTileIndexX + (info->pitch >> info->macroTilePitchBits) * macroTileIndexY) * info->macroTileBytes;
if (TM_IsBankSwapped(info->tileMode))
{
uint32 swapIndex = info->macroTilePitch * macroTileIndexX / info->bankSwapWidth;
bank ^= bankSwapOrder[swapIndex & (m_banks - 1)];
}
uint32 pipeOffset = (pipe << m_pipeInterleaveBytesBitcount);
uint32 bankOffset = (bank << (m_pipesBitcount + m_pipeInterleaveBytesBitcount));
uint32 numSwizzleBits = (m_banksBitcount + m_pipesBitcount);
uint32 macroSliceOffset = (uint32)((macroTileOffset + sliceOffset) >> numSwizzleBits);
macroSliceOffset += pixelOffset;
uint32 macroSliceOffsetHigh = macroSliceOffset & ~((1 << m_pipeInterleaveBytesBitcount) - 1);
uint32 macroSliceOffsetLow = macroSliceOffset & ((1 << m_pipeInterleaveBytesBitcount) - 1);
uint32 finalMacroTileOffset = (macroSliceOffsetHigh << numSwizzleBits) | macroSliceOffsetLow;
return finalMacroTileOffset | pipeOffset | bankOffset;
}
/*
* Optimized routine with following assumptions:
* tileMode is 4
* samples is 1
*/
uint32 ComputeSurfaceAddrFromCoordMacroTiledCached_tm04_sample1(uint32 x, uint32 y, CachedSurfaceAddrInfo* info)
{
uint32 pixelIndex = (uint32)info->microTilePixelIndexTable[(x & 7) + ((y & 7) << 3) + ((info->slice & 7) << 6)];
uint32 pixelOffset = pixelIndex * info->pixelOffsetMul;
pixelOffset >>= 3; // bits to bytes
uint32 pipe = _ComputePipeFromCoordWoRotation(x, y); // pipe = ((y >> 3) ^ (x >> 3)) & 1;
uint32 bank = _ComputeBankFromCoordWoRotation(x, y); // based on (x>>3)&3 and (y>>4)&3
pipe ^= (info->c0 >> 0) & 1;
bank ^= (info->c0 >> 1) & 3;
uint32 sliceOffset = info->sliceBytes * (info->slice / info->microTileThickness);
uint32 macroTileIndexX = x >> info->macroTilePitchBits;
uint32 macroTileIndexY = y >> info->macroTileHeightBits;
uint32 macroTileOffset = (macroTileIndexX + (info->pitch >> info->macroTilePitchBits) * macroTileIndexY) * info->macroTileBytes;
uint32 pipeOffset = (pipe << m_pipeInterleaveBytesBitcount);
uint32 bankOffset = (bank << (m_pipesBitcount + m_pipeInterleaveBytesBitcount));
uint32 numSwizzleBits = (m_banksBitcount + m_pipesBitcount);
uint32 macroSliceOffset = (uint32)((macroTileOffset + sliceOffset) >> numSwizzleBits);
macroSliceOffset += pixelOffset;
uint32 macroSliceOffsetHigh = macroSliceOffset & ~((1 << m_pipeInterleaveBytesBitcount) - 1);
uint32 macroSliceOffsetLow = macroSliceOffset & ((1 << m_pipeInterleaveBytesBitcount) - 1);
uint32 finalMacroTileOffset = (macroSliceOffsetHigh << numSwizzleBits) | macroSliceOffsetLow;
return finalMacroTileOffset | pipeOffset | bankOffset;
}
};