From 3aeb0b0f9552cbae1856f396189f1dff7d5ca8c6 Mon Sep 17 00:00:00 2001 From: Michael Yu Date: Mon, 19 May 2014 02:14:07 -0700 Subject: [PATCH] cellPngDecDecodeData handles CELL_PNGDEC_ARGB case much faster. Profiling done with two samples on Solar v2.1 from rpcs3 init to first frame. Before optimization, profiler found rpcs3 in cellPngDecDecodeData 15.3% of the time. Post-optimization, profiler finds rpcs3 in cellPngDecDecodeData 0.33% of the time for ~50x improvement. --- rpcs3/Emu/SysCalls/Modules/cellPngDec.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/rpcs3/Emu/SysCalls/Modules/cellPngDec.cpp b/rpcs3/Emu/SysCalls/Modules/cellPngDec.cpp index d169321c51..31886ab113 100644 --- a/rpcs3/Emu/SysCalls/Modules/cellPngDec.cpp +++ b/rpcs3/Emu/SysCalls/Modules/cellPngDec.cpp @@ -201,7 +201,7 @@ int cellPngDecDecodeData(u32 mainHandle, u32 subHandle, mem8_ptr_t data, const m case CELL_PNGDEC_ARGB: { - const char nComponents = 4; + const int nComponents = 4; image_size *= nComponents; if (bytesPerLine > width * nComponents || flip) //check if we need padding { @@ -225,13 +225,19 @@ int cellPngDecDecodeData(u32 mainHandle, u32 subHandle, mem8_ptr_t data, const m } else { - for (uint i = 0; i < image_size; i += nComponents) + uint* dest = (uint*)new char[image_size]; + uint* source_current = (uint*)&(image.get()[0]); + uint* dest_current = dest; + for (uint i = 0; i < image_size / nComponents; i++) { - data += image.get()[i + 3]; - data += image.get()[i + 0]; - data += image.get()[i + 1]; - data += image.get()[i + 2]; + uint val = *source_current; + *dest_current = (val >> 24) | (val << 8); // set alpha (A8) as leftmost byte + source_current++; + dest_current++; } + // NOTE: AppendRawBytes has diff side-effect vs Memory.CopyFromReal + data.AppendRawBytes((u8*)dest, image_size); + delete[] dest; } } break;