mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-11 01:08:39 +12:00
rsx: Refactor image scaling code; Scale downloaded surfaces with vulkan
This commit is contained in:
parent
c7db322873
commit
e1a75deb25
4 changed files with 244 additions and 164 deletions
|
@ -16,6 +16,8 @@
|
||||||
#include "../../Memory/vm.h"
|
#include "../../Memory/vm.h"
|
||||||
#include "Utilities/Config.h"
|
#include "Utilities/Config.h"
|
||||||
|
|
||||||
|
#include "../rsx_utils.h"
|
||||||
|
|
||||||
class GLGSRender;
|
class GLGSRender;
|
||||||
|
|
||||||
extern cfg::bool_entry g_cfg_rsx_write_color_buffers;
|
extern cfg::bool_entry g_cfg_rsx_write_color_buffers;
|
||||||
|
@ -113,76 +115,6 @@ namespace gl
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
//TODO: Move swscale routines to RSX shared
|
|
||||||
void scale_image_fallback(u8* dst, const u8* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples)
|
|
||||||
{
|
|
||||||
u32 dst_offset = 0;
|
|
||||||
u32 src_offset = 0;
|
|
||||||
u32 padding = dst_pitch - (src_pitch * samples);
|
|
||||||
|
|
||||||
for (u16 h = 0; h < src_height; ++h)
|
|
||||||
{
|
|
||||||
for (u16 w = 0; w < src_width; ++w)
|
|
||||||
{
|
|
||||||
for (u8 n = 0; n < samples; ++n)
|
|
||||||
{
|
|
||||||
memcpy(&dst[dst_offset], &src[src_offset], pixel_size);
|
|
||||||
dst_offset += pixel_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
src_offset += pixel_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
dst_offset += padding;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T, int N>
|
|
||||||
void scale_image_impl(T* dst, const T* src, u16 src_width, u16 src_height, u16 padding)
|
|
||||||
{
|
|
||||||
u32 dst_offset = 0;
|
|
||||||
u32 src_offset = 0;
|
|
||||||
|
|
||||||
for (u16 h = 0; h < src_height; ++h)
|
|
||||||
{
|
|
||||||
for (u16 w = 0; w < src_width; ++w)
|
|
||||||
{
|
|
||||||
for (u8 n = 0; n < N; ++n)
|
|
||||||
{
|
|
||||||
dst[dst_offset++] = src[src_offset];
|
|
||||||
}
|
|
||||||
|
|
||||||
//Fetch next pixel
|
|
||||||
src_offset++;
|
|
||||||
}
|
|
||||||
|
|
||||||
//Pad this row
|
|
||||||
dst_offset += padding;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <int N>
|
|
||||||
void scale_image(void *dst, void *src, u8 pixel_size, u16 src_width, u16 src_height, u16 padding)
|
|
||||||
{
|
|
||||||
switch (pixel_size)
|
|
||||||
{
|
|
||||||
case 1:
|
|
||||||
scale_image_impl<u8, N>((u8*)dst, (u8*)src, current_width, current_height, padding);
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
scale_image_impl<u16, N>((u16*)dst, (u16*)src, current_width, current_height, padding);
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
scale_image_impl<u32, N>((u32*)dst, (u32*)src, current_width, current_height, padding);
|
|
||||||
break;
|
|
||||||
case 8:
|
|
||||||
scale_image_impl<u64, N>((u64*)dst, (u64*)src, current_width, current_height, padding);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
fmt::throw_exception("unsupported rtt format 0x%X" HERE, (u32)format);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void init_buffer()
|
void init_buffer()
|
||||||
{
|
{
|
||||||
if (pbo_id)
|
if (pbo_id)
|
||||||
|
@ -341,36 +273,11 @@ namespace gl
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
//TODO: Use compression hint from the gcm tile information
|
//TODO: Use compression hint from the gcm tile information
|
||||||
//Scale this image by repeating pixel data n times
|
//TODO: Fall back to bilinear filtering if samples > 2
|
||||||
//n = expected_pitch / real_pitch
|
|
||||||
//Use of fixed argument templates for performance reasons
|
|
||||||
|
|
||||||
const u16 pixel_size = get_pixel_size(format, type);
|
const u8 pixel_size = get_pixel_size(format, type);
|
||||||
const u16 dst_width = current_pitch / pixel_size;
|
const u8 samples = current_pitch / real_pitch;
|
||||||
const u16 sample_count = current_pitch / real_pitch;
|
rsx::scale_image_nearest(dst, const_cast<const void*>(data), current_width, current_height, current_pitch, real_pitch, pixel_size, samples);
|
||||||
const u16 padding = dst_width - (current_width * sample_count);
|
|
||||||
|
|
||||||
switch (sample_count)
|
|
||||||
{
|
|
||||||
case 2:
|
|
||||||
scale_image<2>(dst, data, pixel_size, current_width, current_height, padding);
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
scale_image<3>(dst, data, pixel_size, current_width, current_height, padding);
|
|
||||||
break;
|
|
||||||
case 4:
|
|
||||||
scale_image<4>(dst, data, pixel_size, current_width, current_height, padding);
|
|
||||||
break;
|
|
||||||
case 8:
|
|
||||||
scale_image<8>(dst, data, pixel_size, current_width, current_height, padding);
|
|
||||||
break;
|
|
||||||
case 16:
|
|
||||||
scale_image<16>(dst, data, pixel_size, current_width, current_height, padding);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", current_pitch, real_pitch);
|
|
||||||
scale_image_fallback(dst, static_cast<u8*>(data), current_width, current_height, current_pitch, real_pitch, pixel_size, sample_count);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
#include "VKRenderTargets.h"
|
#include "VKRenderTargets.h"
|
||||||
#include "VKGSRender.h"
|
#include "VKGSRender.h"
|
||||||
#include "../Common/TextureUtils.h"
|
#include "../Common/TextureUtils.h"
|
||||||
|
#include "../rsx_utils.h"
|
||||||
|
|
||||||
namespace vk
|
namespace vk
|
||||||
{
|
{
|
||||||
|
@ -195,10 +196,7 @@ namespace vk
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void do_memory_transfer(void *pixels_dst, void *pixels_src)
|
void do_memory_transfer(void *pixels_dst, const void *pixels_src)
|
||||||
{
|
|
||||||
//LOG_ERROR(RSX, "COPY %d -> %d", native_pitch, pitch);
|
|
||||||
if (pitch == native_pitch)
|
|
||||||
{
|
{
|
||||||
if (sizeof T == 1)
|
if (sizeof T == 1)
|
||||||
memcpy(pixels_dst, pixels_src, cpu_address_range);
|
memcpy(pixels_dst, pixels_src, cpu_address_range);
|
||||||
|
@ -213,42 +211,6 @@ namespace vk
|
||||||
typed_dst[px] = typed_src[px];
|
typed_dst[px] = typed_src[px];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
if (sizeof T == 1)
|
|
||||||
{
|
|
||||||
u8 *typed_dst = (u8 *)pixels_dst;
|
|
||||||
u8 *typed_src = (u8 *)pixels_src;
|
|
||||||
|
|
||||||
//TODO: Scaling
|
|
||||||
for (u16 row = 0; row < height; ++row)
|
|
||||||
{
|
|
||||||
memcpy(typed_dst, typed_src, native_pitch);
|
|
||||||
typed_dst += pitch;
|
|
||||||
typed_src += native_pitch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
const u32 src_step = native_pitch / sizeof T;
|
|
||||||
const u32 dst_step = pitch / sizeof T;
|
|
||||||
|
|
||||||
auto typed_dst = (be_t<T> *)pixels_dst;
|
|
||||||
auto typed_src = (T *)pixels_src;
|
|
||||||
|
|
||||||
for (u16 row = 0; row < height; ++row)
|
|
||||||
{
|
|
||||||
for (u16 px = 0; px < width; ++px)
|
|
||||||
{
|
|
||||||
typed_dst[px] = typed_src[px];
|
|
||||||
}
|
|
||||||
|
|
||||||
typed_dst += dst_step;
|
|
||||||
typed_src += src_step;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void flush(vk::render_device& dev, vk::command_buffer& cmd, u32 heap_index, VkQueue submit_queue)
|
void flush(vk::render_device& dev, vk::command_buffer& cmd, u32 heap_index, VkQueue submit_queue)
|
||||||
{
|
{
|
||||||
|
@ -263,13 +225,14 @@ namespace vk
|
||||||
|
|
||||||
protect(utils::protection::rw);
|
protect(utils::protection::rw);
|
||||||
|
|
||||||
//TODO: Image scaling, etc
|
|
||||||
void* pixels_src = dma_buffer->map(0, cpu_address_range);
|
void* pixels_src = dma_buffer->map(0, cpu_address_range);
|
||||||
void* pixels_dst = vm::base(cpu_address_base);
|
void* pixels_dst = vm::base(cpu_address_base);
|
||||||
|
|
||||||
//We have to do our own byte swapping since the driver doesnt do it for us
|
|
||||||
const u8 bpp = native_pitch / width;
|
const u8 bpp = native_pitch / width;
|
||||||
|
|
||||||
|
if (pitch == native_pitch)
|
||||||
|
{
|
||||||
|
//We have to do our own byte swapping since the driver doesnt do it for us
|
||||||
switch (bpp)
|
switch (bpp)
|
||||||
{
|
{
|
||||||
default:
|
default:
|
||||||
|
@ -287,6 +250,15 @@ namespace vk
|
||||||
do_memory_transfer<u64>(pixels_dst, pixels_src);
|
do_memory_transfer<u64>(pixels_dst, pixels_src);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//Scale image to fit
|
||||||
|
//usually we can just get away with nearest filtering
|
||||||
|
const u8 samples = pitch / native_pitch;
|
||||||
|
|
||||||
|
rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, pitch, native_pitch, bpp, samples, true);
|
||||||
|
}
|
||||||
|
|
||||||
dma_buffer->unmap();
|
dma_buffer->unmap();
|
||||||
//Its highly likely that this surface will be reused, so we just leave resources in place
|
//Its highly likely that this surface will be reused, so we just leave resources in place
|
||||||
|
|
|
@ -132,4 +132,203 @@ namespace rsx
|
||||||
return { blend_color_r / 255.f, blend_color_g / 255.f, blend_color_b / 255.f, blend_color_a / 255.f };
|
return { blend_color_r / 255.f, blend_color_g / 255.f, blend_color_b / 255.f, blend_color_a / 255.f };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Fast image scaling routines
|
||||||
|
* Only uses fast nearest scaling and integral scaling factors
|
||||||
|
* T - Dst type
|
||||||
|
* U - Src type
|
||||||
|
* N - Sample count
|
||||||
|
*/
|
||||||
|
template <typename T, typename U>
|
||||||
|
void scale_image_fallback_impl(T* dst, const U* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples)
|
||||||
|
{
|
||||||
|
u32 dst_offset = 0;
|
||||||
|
u32 src_offset = 0;
|
||||||
|
|
||||||
|
u32 padding = (dst_pitch - (src_pitch * samples)) / sizeof T;
|
||||||
|
|
||||||
|
for (u16 h = 0; h < src_height; ++h)
|
||||||
|
{
|
||||||
|
for (u16 w = 0; w < src_width; ++w)
|
||||||
|
{
|
||||||
|
for (u8 n = 0; n < samples; ++n)
|
||||||
|
{
|
||||||
|
dst[dst_offset++] = src[src_offset];
|
||||||
|
}
|
||||||
|
|
||||||
|
src_offset++;
|
||||||
|
}
|
||||||
|
|
||||||
|
dst_offset += padding;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void scale_image_fallback(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples)
|
||||||
|
{
|
||||||
|
switch (pixel_size)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
scale_image_fallback_impl<u8, u8>((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
scale_image_fallback_impl<u16, u16>((u16*)dst, (const u16*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
scale_image_fallback_impl<u32, u32>((u32*)dst, (const u32*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
scale_image_fallback_impl<u64, u64>((u64*)dst, (const u64*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void scale_image_fallback_with_byte_swap(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples)
|
||||||
|
{
|
||||||
|
switch (pixel_size)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
scale_image_fallback_impl<u8, u8>((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
scale_image_fallback_impl<u16, be_t<u16>>((u16*)dst, (const be_t<u16>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
scale_image_fallback_impl<u32, be_t<u32>>((u32*)dst, (const be_t<u32>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
scale_image_fallback_impl<u64, be_t<u64>>((u64*)dst, (const be_t<u64>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename U, int N>
|
||||||
|
void scale_image_impl(T* dst, const U* src, u16 src_width, u16 src_height, u16 padding)
|
||||||
|
{
|
||||||
|
u32 dst_offset = 0;
|
||||||
|
u32 src_offset = 0;
|
||||||
|
|
||||||
|
for (u16 h = 0; h < src_height; ++h)
|
||||||
|
{
|
||||||
|
for (u16 w = 0; w < src_width; ++w)
|
||||||
|
{
|
||||||
|
for (u8 n = 0; n < N; ++n)
|
||||||
|
{
|
||||||
|
dst[dst_offset++] = src[src_offset];
|
||||||
|
}
|
||||||
|
|
||||||
|
//Fetch next pixel
|
||||||
|
src_offset++;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Pad this row
|
||||||
|
dst_offset += padding;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int N>
|
||||||
|
void scale_image_fast(void *dst, const void *src, u8 pixel_size, u16 src_width, u16 src_height, u16 padding)
|
||||||
|
{
|
||||||
|
switch (pixel_size)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
scale_image_impl<u8, u8, N>((u8*)dst, (const u8*)src, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
scale_image_impl<u16, u16, N>((u16*)dst, (const u16*)src, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
scale_image_impl<u32, u32, N>((u32*)dst, (const u32*)src, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
scale_image_impl<u64, u64, N>((u64*)dst, (const u64*)src, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int N>
|
||||||
|
void scale_image_fast_with_byte_swap(void *dst, const void *src, u8 pixel_size, u16 src_width, u16 src_height, u16 padding)
|
||||||
|
{
|
||||||
|
switch (pixel_size)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
scale_image_impl<u8, u8, N>((u8*)dst, (const u8*)src, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
scale_image_impl<u16, be_t<u16>, N>((u16*)dst, (const be_t<u16>*)src, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
scale_image_impl<u32, be_t<u32>, N>((u32*)dst, (const be_t<u32>*)src, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
scale_image_impl<u64, be_t<u64>, N>((u64*)dst, (const be_t<u64>*)src, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples, bool swap_bytes)
|
||||||
|
{
|
||||||
|
//Scale this image by repeating pixel data n times
|
||||||
|
//n = expected_pitch / real_pitch
|
||||||
|
//Use of fixed argument templates for performance reasons
|
||||||
|
|
||||||
|
const u16 dst_width = dst_pitch / pixel_size;
|
||||||
|
const u16 padding = dst_width - (src_width * samples);
|
||||||
|
|
||||||
|
if (!swap_bytes)
|
||||||
|
{
|
||||||
|
switch (samples)
|
||||||
|
{
|
||||||
|
case 2:
|
||||||
|
scale_image_fast<2>(dst, src, pixel_size, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
scale_image_fast<3>(dst, src, pixel_size, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
scale_image_fast<4>(dst, src, pixel_size, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
scale_image_fast<8>(dst, src, pixel_size, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
scale_image_fast<16>(dst, src, pixel_size, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", dst_pitch, src_pitch);
|
||||||
|
scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
switch (samples)
|
||||||
|
{
|
||||||
|
case 2:
|
||||||
|
scale_image_fast_with_byte_swap<2>(dst, src, pixel_size, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
scale_image_fast_with_byte_swap<3>(dst, src, pixel_size, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
scale_image_fast_with_byte_swap<4>(dst, src, pixel_size, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
scale_image_fast_with_byte_swap<8>(dst, src, pixel_size, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
scale_image_fast_with_byte_swap<16>(dst, src, pixel_size, src_width, src_height, padding);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", dst_pitch, src_pitch);
|
||||||
|
scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -133,6 +133,8 @@ namespace rsx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples, bool swap_bytes = false);
|
||||||
|
|
||||||
void convert_scale_image(u8 *dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch,
|
void convert_scale_image(u8 *dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch,
|
||||||
const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear);
|
const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue