mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-12 09:48:37 +12:00
d3d12: Take alignment into account in streamBuffer
And use it for texture upload
This commit is contained in:
parent
878a116c4a
commit
51d287d9b0
2 changed files with 20 additions and 15 deletions
|
@ -31,8 +31,9 @@ void streamToBuffer(void* dst, void* src, size_t sizeInBytes)
|
||||||
{
|
{
|
||||||
for (unsigned i = 0; i < sizeInBytes / 16; i++)
|
for (unsigned i = 0; i < sizeInBytes / 16; i++)
|
||||||
{
|
{
|
||||||
__m128i *srcPtr = (__m128i*) ((char*)src + i * 16);
|
|
||||||
_mm_stream_si128((__m128i*)((char*)dst + i * 16), *srcPtr);
|
const __m128i &srcPtr = _mm_loadu_si128((__m128i*) ((char*)src + i * 16));
|
||||||
|
_mm_stream_si128((__m128i*)((char*)dst + i * 16), srcPtr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,20 +45,24 @@ inline
|
||||||
void streamBuffer(void* dst, void* src, size_t sizeInBytes)
|
void streamBuffer(void* dst, void* src, size_t sizeInBytes)
|
||||||
{
|
{
|
||||||
// Assume 64 bytes cache line
|
// Assume 64 bytes cache line
|
||||||
assert(powerOf2Align(sizeInBytes, 64));
|
unsigned offset = 0;
|
||||||
for (unsigned i = 0; i < sizeInBytes / 64; i++)
|
bool isAligned = !((size_t)src & 15);
|
||||||
|
for (; (offset + 64) < sizeInBytes; offset += 64)
|
||||||
{
|
{
|
||||||
char *line = (char*)src + i * 64;
|
char *line = (char*)src + offset;
|
||||||
_mm_prefetch(line, _MM_HINT_NTA);
|
char *dstline = (char*)dst + offset;
|
||||||
__m128i *srcPtr = (__m128i*) (line);
|
// prefetch next line
|
||||||
_mm_stream_si128((__m128i*)((char*)dst + i * 64), *srcPtr);
|
_mm_prefetch(line + 16, _MM_HINT_NTA);
|
||||||
srcPtr = (__m128i*) (line + 16);
|
__m128i srcPtr = isAligned ? _mm_load_si128((__m128i *)line) : _mm_loadu_si128((__m128i *)line);
|
||||||
_mm_stream_si128((__m128i*)((char*)dst + i * 64 + 16), *srcPtr);
|
_mm_stream_si128((__m128i*)dstline, srcPtr);
|
||||||
srcPtr = (__m128i*) (line + 32);
|
srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 16)) : _mm_loadu_si128((__m128i *)(line + 16));
|
||||||
_mm_stream_si128((__m128i*)((char*)dst + i * 64 + 32), *srcPtr);
|
_mm_stream_si128((__m128i*)(dstline + 16), srcPtr);
|
||||||
srcPtr = (__m128i*) (line + 48);
|
srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 32)) : _mm_loadu_si128((__m128i *)(line + 32));
|
||||||
_mm_stream_si128((__m128i*)((char*)dst + i * 64 + 48), *srcPtr);
|
_mm_stream_si128((__m128i*)(dstline + 32), srcPtr);
|
||||||
|
srcPtr = isAligned ? _mm_load_si128((__m128i *)(line + 48)) : _mm_loadu_si128((__m128i *)(line + 48));
|
||||||
|
_mm_stream_si128((__m128i*)(dstline + 48), srcPtr);
|
||||||
}
|
}
|
||||||
|
memcpy((char*)dst + offset, (char*)src + offset, sizeInBytes - offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline
|
inline
|
||||||
|
|
|
@ -246,7 +246,7 @@ size_t D3D12GSRender::UploadTextures()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
streamToBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch);
|
streamBuffer((char*)textureData + row * rowPitch, (char*)pixels + row * m_texture_pitch, m_texture_pitch);
|
||||||
}
|
}
|
||||||
Texture->Unmap(0, nullptr);
|
Texture->Unmap(0, nullptr);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue