mirror of
https://github.com/cemu-project/Cemu.git
synced 2025-07-12 01:38:29 +12:00
405 lines
No EOL
15 KiB
C++
405 lines
No EOL
15 KiB
C++
#include "Cafe/HW/Latte/Core/Latte.h"
|
|
#include "Cafe/HW/Latte/LatteAddrLib/LatteAddrLib.h"
|
|
#include "Cafe/OS/libs/gx2/GX2_Surface.h"
|
|
|
|
using namespace Latte;
|
|
|
|
namespace LatteAddrLib
|
|
{
|
|
|
|
#if BOOST_OS_LINUX || BOOST_OS_MACOS
|
|
unsigned char _BitScanReverse(uint32* _Index, uint32 _Mask)
|
|
{
|
|
if (!_Mask)
|
|
return 0;
|
|
*_Index = 31 - __builtin_clzl(_Mask);
|
|
return 1;
|
|
}
|
|
#endif
|
|
|
|
static constexpr uint32 bankSwapOrder[] = { 0, 1, 3, 2 };
|
|
|
|
uint32 _GetMicroTileType(bool isDepth)
|
|
{
|
|
return isDepth ? 1 : 0;
|
|
}
|
|
|
|
uint32 _ComputePixelIndexWithinMicroTile(uint32 x, uint32 y, uint32 z, uint32 bpp, E_HWTILEMODE tileMode, uint32 microTileType)
|
|
{
|
|
cemu_assert_debug(microTileType == 0 || microTileType == 1);
|
|
uint32 pixelBit0, pixelBit1, pixelBit2, pixelBit3, pixelBit4, pixelBit5, pixelBit6, pixelBit7, pixelBit8;
|
|
pixelBit6 = 0;
|
|
pixelBit7 = 0;
|
|
pixelBit8 = 0;
|
|
uint32 thickness = LatteAddrLib::TM_GetThickness(tileMode);
|
|
if (microTileType)
|
|
{
|
|
pixelBit0 = x & 1;
|
|
pixelBit1 = y & 1;
|
|
pixelBit2 = (x & 2) >> 1;
|
|
pixelBit3 = (y & 2) >> 1;
|
|
pixelBit4 = (x & 4) >> 2;
|
|
pixelBit5 = (y & 4) >> 2;
|
|
}
|
|
else
|
|
{
|
|
switch (bpp)
|
|
{
|
|
case 8:
|
|
pixelBit0 = x & 1;
|
|
pixelBit1 = (x & 2) >> 1;
|
|
pixelBit2 = (x & 4) >> 2;
|
|
pixelBit3 = (y & 2) >> 1;
|
|
pixelBit4 = y & 1;
|
|
pixelBit5 = (y & 4) >> 2;
|
|
break;
|
|
case 0x10:
|
|
pixelBit0 = x & 1;
|
|
pixelBit1 = (x & 2) >> 1;
|
|
pixelBit2 = (x & 4) >> 2;
|
|
pixelBit3 = y & 1;
|
|
pixelBit4 = (y & 2) >> 1;
|
|
pixelBit5 = (y & 4) >> 2;
|
|
break;
|
|
case 0x20:
|
|
case 0x60:
|
|
pixelBit0 = x & 1;
|
|
pixelBit1 = (x & 2) >> 1;
|
|
pixelBit2 = y & 1;
|
|
pixelBit3 = (x & 4) >> 2;
|
|
pixelBit4 = (y & 2) >> 1;
|
|
pixelBit5 = (y & 4) >> 2;
|
|
break;
|
|
case 0x40:
|
|
pixelBit0 = x & 1;
|
|
pixelBit1 = y & 1;
|
|
pixelBit2 = (x & 2) >> 1;
|
|
pixelBit3 = (x & 4) >> 2;
|
|
pixelBit4 = (y & 2) >> 1;
|
|
pixelBit5 = (y & 4) >> 2;
|
|
break;
|
|
case 0x80:
|
|
pixelBit0 = y & 1;
|
|
pixelBit1 = x & 1;
|
|
pixelBit2 = (x & 2) >> 1;
|
|
pixelBit3 = (x & 4) >> 2;
|
|
pixelBit4 = (y & 2) >> 1;
|
|
pixelBit5 = (y & 4) >> 2;
|
|
break;
|
|
default:
|
|
pixelBit0 = x & 1;
|
|
pixelBit1 = (x & 2) >> 1;
|
|
pixelBit2 = y & 1;
|
|
pixelBit3 = (x & 4) >> 2;
|
|
pixelBit4 = (y & 2) >> 1;
|
|
pixelBit5 = (y & 4) >> 2;
|
|
break;
|
|
}
|
|
}
|
|
if (thickness > 1)
|
|
{
|
|
pixelBit6 = z & 1;
|
|
pixelBit7 = (z & 2) >> 1;
|
|
}
|
|
return (pixelBit8 << 8) | (pixelBit7 << 7) | (pixelBit6 << 6) | (pixelBit5 << 5) | (pixelBit4 << 4) | (pixelBit3 << 3) | (pixelBit2 << 2) | (pixelBit1 << 1) | pixelBit0;
|
|
}
|
|
|
|
uint32 _ComputePipeFromCoordWoRotation(uint32 x, uint32 y)
|
|
{
|
|
// hardcoded to assume 2 pipes
|
|
uint32 pipe;
|
|
pipe = ((y >> 3) ^ (x >> 3)) & 1;
|
|
return pipe;
|
|
}
|
|
|
|
uint32 _ComputeBankFromCoordWoRotation(uint32 x, uint32 y)
|
|
{
|
|
uint32 bank;
|
|
if (m_banks == 4)
|
|
{
|
|
uint32 bankNew = (y >> 4) & 3;
|
|
bankNew = ((bankNew >> 1) | (bankNew << 1)); // swap lowest two bits
|
|
bankNew ^= (x >> 3);
|
|
bankNew &= 3;
|
|
bank = bankNew;
|
|
}
|
|
else if (m_banks == 8)
|
|
{
|
|
cemu_assert_unimplemented();
|
|
bank = 0;
|
|
}
|
|
else
|
|
{
|
|
bank = 0;
|
|
}
|
|
return bank;
|
|
}
|
|
|
|
uint32 ComputeSurfaceAddrFromCoordLinear(uint32 x, uint32 y, uint32 slice, uint32 sample, uint32 bpp, uint32 pitch, uint32 height, uint32 numSlices)
|
|
{
|
|
uint32 pixelIndex = x + pitch * y + (slice + numSlices * sample) * height * pitch;
|
|
return (pixelIndex * bpp) / 8;
|
|
}
|
|
|
|
uint32 ComputeSurfaceAddrFromCoordMicroTiled(uint32 x, uint32 y, uint32 slice, uint32 bpp, uint32 pitch, uint32 height, Latte::E_HWTILEMODE tileMode, bool isDepth)
|
|
{
|
|
uint32 microTileThickness = (tileMode == Latte::E_HWTILEMODE::TM_1D_TILED_THICK) ? 4 : 1;
|
|
uint32 microTilesPerRow = pitch >> 3;
|
|
uint32 microTileIndexX = x >> 3;
|
|
uint32 microTileIndexY = y >> 3;
|
|
uint32 microTileBytes = microTileThickness * (((bpp << 6) + 7) >> 3); // each tile is 8x8 or 8x8x4
|
|
uint32 microTileOffset = microTileBytes * (uint64)((x >> 3) + (pitch >> 3) * (y >> 3));
|
|
uint32 sliceBytes = (height * (uint64)pitch * microTileThickness * bpp + 7) / 8;
|
|
uint32 sliceOffset = sliceBytes * (slice / microTileThickness);
|
|
uint32 pixelIndex = _ComputePixelIndexWithinMicroTile(x, y, slice, bpp, tileMode, _GetMicroTileType(isDepth));
|
|
uint32 pixelOffset = bpp * pixelIndex;
|
|
pixelOffset >>= 3;
|
|
return pixelOffset + microTileOffset + sliceOffset;
|
|
}
|
|
|
|
uint32 ComputeSurfaceAddrFromCoordMacroTiled(uint32 x, uint32 y, uint32 slice, uint32 sample, uint32 bpp, uint32 pitch, uint32 height, uint32 numSamples, Latte::E_HWTILEMODE tileMode, bool isDepth, uint32 pipeSwizzle, uint32 bankSwizzle)
|
|
{
|
|
uint32 microTileThickness = LatteAddrLib::TM_GetThickness((E_HWTILEMODE)tileMode);
|
|
uint32 microTileBits = numSamples * bpp * (microTileThickness * (8 * 8));
|
|
uint32 microTileBytes = microTileBits >> 3;
|
|
uint32 pixelIndex = _ComputePixelIndexWithinMicroTile(x, y, slice, bpp, tileMode, _GetMicroTileType(isDepth));
|
|
uint32 sampleOffset, pixelOffset;
|
|
if (isDepth)
|
|
{
|
|
sampleOffset = bpp * sample;
|
|
pixelOffset = numSamples * bpp * pixelIndex;
|
|
}
|
|
else
|
|
{
|
|
sampleOffset = sample * (microTileBits / numSamples);
|
|
pixelOffset = bpp * pixelIndex;
|
|
}
|
|
uint32 elemOffset = pixelOffset + sampleOffset;
|
|
uint32 bytesPerSample = microTileBytes / numSamples;
|
|
uint32 sampleSlice, numSampleSplits;
|
|
if (numSamples <= 1 || microTileBytes <= m_splitSize)
|
|
{
|
|
numSampleSplits = 1;
|
|
sampleSlice = 0;
|
|
}
|
|
else
|
|
{
|
|
uint32 samplesPerSlice = m_splitSize / bytesPerSample;
|
|
numSampleSplits = numSamples / samplesPerSlice;
|
|
numSamples = samplesPerSlice;
|
|
sampleSlice = elemOffset / (microTileBits / numSampleSplits);
|
|
elemOffset %= microTileBits / numSampleSplits;
|
|
}
|
|
elemOffset >>= 3;
|
|
uint32 pipe = _ComputePipeFromCoordWoRotation(x, y);
|
|
uint32 bank = _ComputeBankFromCoordWoRotation(x, y);
|
|
uint32 bankPipe = pipe + m_pipes * bank;
|
|
uint32 rotation = ComputeSurfaceRotationFromTileMode(tileMode);
|
|
uint32 swizzle = pipeSwizzle + m_pipes * bankSwizzle;
|
|
uint32 sliceIn = slice;
|
|
if (TM_IsThickAndMacroTiled(tileMode))
|
|
sliceIn >>= 2;
|
|
bankPipe ^= m_pipes * sampleSlice * ((m_banks >> 1) + 1) ^ (swizzle + sliceIn * rotation);
|
|
bankPipe %= m_pipes * m_banks;
|
|
pipe = bankPipe % m_pipes;
|
|
bank = bankPipe / m_pipes;
|
|
uint64 sliceBytes = (((uint64)height * pitch * microTileThickness * bpp * numSamples + 7) / 8);
|
|
uint64 sliceOffset = sliceBytes * ((sampleSlice + numSampleSplits * slice) / microTileThickness);
|
|
uint32 macroTilePitch = 8 * m_banks;
|
|
uint32 macroTileHeight = 8 * m_pipes;
|
|
switch (tileMode)
|
|
{
|
|
case Latte::E_HWTILEMODE::TM_2D_TILED_THIN2:
|
|
case Latte::E_HWTILEMODE::TM_2B_TILED_THIN2:
|
|
macroTilePitch >>= 1;
|
|
macroTileHeight <<= 1;
|
|
break;
|
|
case Latte::E_HWTILEMODE::TM_2D_TILED_THIN4:
|
|
case Latte::E_HWTILEMODE::TM_2B_TILED_THIN4:
|
|
macroTilePitch >>= 2;
|
|
macroTileHeight <<= 2;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
uint32 macroTilesPerRow = pitch / macroTilePitch;
|
|
uint32 macroTileBytes = (numSamples * microTileThickness * bpp * macroTileHeight * macroTilePitch + 7) >> 3;
|
|
uint32 macroTileIndexX = x / macroTilePitch;
|
|
uint32 macroTileIndexY = y / macroTileHeight;
|
|
uint32 macroTileOffset = (x / macroTilePitch + pitch / macroTilePitch * (y / macroTileHeight)) * macroTileBytes;
|
|
if (TM_IsBankSwapped(tileMode))
|
|
{
|
|
uint32 bankSwapWidth = ComputeSurfaceBankSwappedWidth(tileMode, bpp, numSamples, pitch);
|
|
uint32 swapIndex = macroTilePitch * macroTileIndexX / bankSwapWidth;
|
|
uint32 bankMask = m_banks - 1;
|
|
bank ^= bankSwapOrder[swapIndex & bankMask];
|
|
}
|
|
uint32 pipeOffset = (pipe << m_pipeInterleaveBytesBitcount);
|
|
uint32 bankOffset = (bank << (m_pipesBitcount + m_pipeInterleaveBytesBitcount));
|
|
uint32 numSwizzleBits = (m_banksBitcount + m_pipesBitcount);
|
|
uint32 macroSliceOffset = (uint32)((macroTileOffset + sliceOffset) >> numSwizzleBits);
|
|
macroSliceOffset += elemOffset;
|
|
|
|
uint32 macroSliceOffsetHigh = macroSliceOffset & ~((1 << m_pipeInterleaveBytesBitcount) - 1);
|
|
uint32 macroSliceOffsetLow = macroSliceOffset & ((1 << m_pipeInterleaveBytesBitcount) - 1);
|
|
|
|
uint32 finalMacroTileOffset = (macroSliceOffsetHigh << numSwizzleBits) | macroSliceOffsetLow;
|
|
return finalMacroTileOffset | pipeOffset | bankOffset;
|
|
}
|
|
|
|
void SetupCachedSurfaceAddrInfo(CachedSurfaceAddrInfo* info, uint32 slice, uint32 sample, uint32 bpp, uint32 pitch, uint32 height, uint32 depth, uint32 numSamples, E_HWTILEMODE tileMode, int isDepth, uint32 pipeSwizzle, uint32 bankSwizzle)
|
|
{
|
|
info->slice = slice;
|
|
info->sample = sample;
|
|
info->bpp = bpp;
|
|
info->pitch = pitch;
|
|
info->height = height;
|
|
info->depth = depth;
|
|
info->numSamples = numSamples;
|
|
info->tileMode = tileMode;
|
|
info->isDepth = isDepth;
|
|
info->pipeSwizzle = pipeSwizzle;
|
|
info->bankSwizzle = bankSwizzle;
|
|
// calculate static info
|
|
info->microTileThickness = LatteAddrLib::TM_GetThickness((E_HWTILEMODE)tileMode);
|
|
info->microTileBits = info->numSamples * info->bpp * (info->microTileThickness * (8 * 8));
|
|
info->microTileBytes = info->microTileBits >> 3;
|
|
info->microTileType = (info->isDepth != 0) ? 1 : 0;
|
|
cemu_assert_debug(sample == 0); // non-zero not supported
|
|
info->rotation = ComputeSurfaceRotationFromTileMode((E_HWTILEMODE)tileMode);
|
|
// macro tile
|
|
info->macroTilePitch = 8 * m_banks;
|
|
info->macroTileHeight = 8 * m_pipes;
|
|
switch (info->tileMode)
|
|
{
|
|
case E_HWTILEMODE::TM_2D_TILED_THIN2:
|
|
case E_HWTILEMODE::TM_2B_TILED_THIN2:
|
|
info->macroTilePitch >>= 1;
|
|
info->macroTileHeight <<= 1;
|
|
break;
|
|
case E_HWTILEMODE::TM_2D_TILED_THIN4:
|
|
case E_HWTILEMODE::TM_2B_TILED_THIN4:
|
|
info->macroTilePitch >>= 2;
|
|
info->macroTileHeight <<= 2;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
_BitScanReverse((DWORD*)&info->macroTilePitchBits, info->macroTilePitch);
|
|
_BitScanReverse((DWORD*)&info->macroTileHeightBits, info->macroTileHeight);
|
|
info->macroTilesPerRow = info->pitch / info->macroTilePitch;
|
|
info->macroTileBytes = (info->numSamples * info->microTileThickness * info->bpp * info->macroTileHeight * info->macroTilePitch + 7) >> 3;
|
|
// slice
|
|
info->sliceBytes = (info->height * (uint64)info->pitch * info->microTileThickness * info->bpp * info->numSamples + 7) / 8;
|
|
info->sliceIn = info->slice;
|
|
if (TM_IsThickAndMacroTiled(tileMode))
|
|
info->sliceIn >>= 2;
|
|
// bank swap
|
|
if (TM_IsBankSwapped(tileMode))
|
|
info->bankSwapWidth = ComputeSurfaceBankSwappedWidth(tileMode, info->bpp, info->numSamples, info->pitch);
|
|
// pixel offset multiplier
|
|
if (info->isDepth)
|
|
{
|
|
info->pixelOffsetMul = info->numSamples * info->bpp;
|
|
}
|
|
else
|
|
{
|
|
info->pixelOffsetMul = info->bpp;
|
|
}
|
|
info->bytesPerPixel = info->pixelOffsetMul >> 3;
|
|
// table for micro tile offset calculation (we could pre-generate these)
|
|
for (sint32 z = 0; z < 8; z++)
|
|
{
|
|
for (sint32 y = 0; y < 8; y++)
|
|
{
|
|
for (sint32 x = 0; x < 8; x++)
|
|
{
|
|
uint16 v = _ComputePixelIndexWithinMicroTile(x, y, z, info->bpp, info->tileMode, info->microTileType);
|
|
info->microTilePixelIndexTable[x + y * 8 + z * 8 * 8] = v;
|
|
}
|
|
}
|
|
}
|
|
// other constant values
|
|
uint32 swizzle = info->pipeSwizzle + m_pipes * info->bankSwizzle;
|
|
info->c0 = (swizzle + info->sliceIn * info->rotation);
|
|
}
|
|
|
|
uint32 ComputeSurfaceAddrFromCoordMacroTiledCached(uint32 x, uint32 y, CachedSurfaceAddrInfo* info)
|
|
{
|
|
uint32 numSamples = info->numSamples;
|
|
uint32 pixelIndex = (uint32)info->microTilePixelIndexTable[(x & 7) + ((y & 7) << 3) + ((info->slice & 7) << 6)];
|
|
uint32 pixelOffset = pixelIndex * info->pixelOffsetMul;
|
|
uint32 bytesPerSample = info->microTileBytes / numSamples;
|
|
uint32 sampleSlice, numSampleSplits, samplesPerSlice;
|
|
if (numSamples <= 1 || info->microTileBytes <= m_splitSize)
|
|
{
|
|
samplesPerSlice = numSamples;
|
|
numSampleSplits = 1;
|
|
sampleSlice = 0;
|
|
}
|
|
else
|
|
{
|
|
samplesPerSlice = m_splitSize / bytesPerSample;
|
|
numSampleSplits = numSamples / samplesPerSlice;
|
|
numSamples = samplesPerSlice;
|
|
sampleSlice = pixelOffset / (info->microTileBits / numSampleSplits);
|
|
pixelOffset %= info->microTileBits / numSampleSplits;
|
|
}
|
|
pixelOffset >>= 3;
|
|
uint32 pipe = _ComputePipeFromCoordWoRotation(x, y);
|
|
uint32 bank = _ComputeBankFromCoordWoRotation(x, y);
|
|
uint32 bankPipe = pipe + m_pipes * bank;
|
|
bankPipe ^= m_pipes * sampleSlice * ((m_banks >> 1) + 1) ^ info->c0;
|
|
bankPipe %= m_pipes * m_banks;
|
|
pipe = bankPipe % m_pipes;
|
|
bank = bankPipe / m_pipes;
|
|
uint32 sliceOffset = info->sliceBytes * ((sampleSlice + numSampleSplits * info->slice) / info->microTileThickness);
|
|
uint32 macroTileIndexX = x >> info->macroTilePitchBits;
|
|
uint32 macroTileIndexY = y >> info->macroTileHeightBits;
|
|
uint32 macroTileOffset = (macroTileIndexX + (info->pitch >> info->macroTilePitchBits) * macroTileIndexY) * info->macroTileBytes;
|
|
if (TM_IsBankSwapped(info->tileMode))
|
|
{
|
|
uint32 swapIndex = info->macroTilePitch * macroTileIndexX / info->bankSwapWidth;
|
|
bank ^= bankSwapOrder[swapIndex & (m_banks - 1)];
|
|
}
|
|
uint32 pipeOffset = (pipe << m_pipeInterleaveBytesBitcount);
|
|
uint32 bankOffset = (bank << (m_pipesBitcount + m_pipeInterleaveBytesBitcount));
|
|
uint32 numSwizzleBits = (m_banksBitcount + m_pipesBitcount);
|
|
uint32 macroSliceOffset = (uint32)((macroTileOffset + sliceOffset) >> numSwizzleBits);
|
|
macroSliceOffset += pixelOffset;
|
|
uint32 macroSliceOffsetHigh = macroSliceOffset & ~((1 << m_pipeInterleaveBytesBitcount) - 1);
|
|
uint32 macroSliceOffsetLow = macroSliceOffset & ((1 << m_pipeInterleaveBytesBitcount) - 1);
|
|
uint32 finalMacroTileOffset = (macroSliceOffsetHigh << numSwizzleBits) | macroSliceOffsetLow;
|
|
return finalMacroTileOffset | pipeOffset | bankOffset;
|
|
}
|
|
|
|
/*
|
|
* Optimized routine with following assumptions:
|
|
* tileMode is 4
|
|
* samples is 1
|
|
*/
|
|
uint32 ComputeSurfaceAddrFromCoordMacroTiledCached_tm04_sample1(uint32 x, uint32 y, CachedSurfaceAddrInfo* info)
|
|
{
|
|
uint32 pixelIndex = (uint32)info->microTilePixelIndexTable[(x & 7) + ((y & 7) << 3) + ((info->slice & 7) << 6)];
|
|
uint32 pixelOffset = pixelIndex * info->pixelOffsetMul;
|
|
pixelOffset >>= 3; // bits to bytes
|
|
uint32 pipe = _ComputePipeFromCoordWoRotation(x, y); // pipe = ((y >> 3) ^ (x >> 3)) & 1;
|
|
uint32 bank = _ComputeBankFromCoordWoRotation(x, y); // based on (x>>3)&3 and (y>>4)&3
|
|
pipe ^= (info->c0 >> 0) & 1;
|
|
bank ^= (info->c0 >> 1) & 3;
|
|
uint32 sliceOffset = info->sliceBytes * (info->slice / info->microTileThickness);
|
|
uint32 macroTileIndexX = x >> info->macroTilePitchBits;
|
|
uint32 macroTileIndexY = y >> info->macroTileHeightBits;
|
|
uint32 macroTileOffset = (macroTileIndexX + (info->pitch >> info->macroTilePitchBits) * macroTileIndexY) * info->macroTileBytes;
|
|
uint32 pipeOffset = (pipe << m_pipeInterleaveBytesBitcount);
|
|
uint32 bankOffset = (bank << (m_pipesBitcount + m_pipeInterleaveBytesBitcount));
|
|
uint32 numSwizzleBits = (m_banksBitcount + m_pipesBitcount);
|
|
uint32 macroSliceOffset = (uint32)((macroTileOffset + sliceOffset) >> numSwizzleBits);
|
|
macroSliceOffset += pixelOffset;
|
|
uint32 macroSliceOffsetHigh = macroSliceOffset & ~((1 << m_pipeInterleaveBytesBitcount) - 1);
|
|
uint32 macroSliceOffsetLow = macroSliceOffset & ((1 << m_pipeInterleaveBytesBitcount) - 1);
|
|
uint32 finalMacroTileOffset = (macroSliceOffsetHigh << numSwizzleBits) | macroSliceOffsetLow;
|
|
return finalMacroTileOffset | pipeOffset | bankOffset;
|
|
}
|
|
|
|
}; |