cellPngDecDecodeData handles CELL_PNGDEC_ARGB case much faster.

Profiling done with two samples on Solar v2.1 from rpcs3 init to first frame.
Before optimization, profiler found rpcs3 in cellPngDecDecodeData 15.3% of the time.
Post-optimization, profiler finds rpcs3 in cellPngDecDecodeData 0.33% of the time for ~50x improvement.
This commit is contained in:
Michael Yu 2014-05-19 02:14:07 -07:00
parent 9bdb12e3da
commit 3aeb0b0f95

View file

@ -201,7 +201,7 @@ int cellPngDecDecodeData(u32 mainHandle, u32 subHandle, mem8_ptr_t data, const m
case CELL_PNGDEC_ARGB:
{
const char nComponents = 4;
const int nComponents = 4;
image_size *= nComponents;
if (bytesPerLine > width * nComponents || flip) //check if we need padding
{
@ -225,13 +225,19 @@ int cellPngDecDecodeData(u32 mainHandle, u32 subHandle, mem8_ptr_t data, const m
}
else
{
for (uint i = 0; i < image_size; i += nComponents)
uint* dest = (uint*)new char[image_size];
uint* source_current = (uint*)&(image.get()[0]);
uint* dest_current = dest;
for (uint i = 0; i < image_size / nComponents; i++)
{
data += image.get()[i + 3];
data += image.get()[i + 0];
data += image.get()[i + 1];
data += image.get()[i + 2];
uint val = *source_current;
*dest_current = (val >> 24) | (val << 8); // set alpha (A8) as leftmost byte
source_current++;
dest_current++;
}
// NOTE: AppendRawBytes has diff side-effect vs Memory.CopyFromReal
data.AppendRawBytes((u8*)dest, image_size);
delete[] dest;
}
}
break;