rsx: wcb scaling fixes

This commit is contained in:
kd-11 2017-11-29 19:08:16 +03:00
parent 9d27ac359b
commit 08b829dc22
6 changed files with 101 additions and 67 deletions

View file

@ -248,6 +248,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
const auto color_locations = get_locations(); const auto color_locations = get_locations();
const auto aa_mode = rsx::method_registers.surface_antialias(); const auto aa_mode = rsx::method_registers.surface_antialias();
const auto bpp = get_format_block_size_in_bytes(surface_format); const auto bpp = get_format_block_size_in_bytes(surface_format);
const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2;
for (int i = 0; i < rsx::limits::color_buffers_count; ++i) for (int i = 0; i < rsx::limits::color_buffers_count; ++i)
{ {
@ -354,7 +355,6 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
{ {
if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue; if (!m_surface_info[i].address || !m_surface_info[i].pitch) continue;
const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2;
const u32 range = m_surface_info[i].pitch * m_surface_info[i].height * aa_factor; const u32 range = m_surface_info[i].pitch * m_surface_info[i].height * aa_factor;
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), m_surface_info[i].address, range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch, m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[i]), m_surface_info[i].address, range, m_surface_info[i].width, m_surface_info[i].height, m_surface_info[i].pitch,
color_format.format, color_format.type, color_format.swap_bytes); color_format.format, color_format.type, color_format.swap_bytes);
@ -370,7 +370,6 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk
u32 pitch = m_depth_surface_info.width * 2; u32 pitch = m_depth_surface_info.width * 2;
if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2; if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2;
const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2;
const u32 range = pitch * m_depth_surface_info.height * aa_factor; const u32 range = pitch * m_depth_surface_info.height * aa_factor;
m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, m_depth_surface_info.width, m_depth_surface_info.height, pitch, m_gl_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, m_depth_surface_info.width, m_depth_surface_info.height, pitch,
depth_format_gl.format, depth_format_gl.type, true); depth_format_gl.format, depth_format_gl.type, true);

View file

@ -442,12 +442,10 @@ namespace gl
} }
else else
{ {
//TODO: Use compression hint from the gcm tile information
//TODO: Fall back to bilinear filtering if samples > 2
const u8 pixel_size = get_pixel_size(format, type); const u8 pixel_size = get_pixel_size(format, type);
const u8 samples = rsx_pitch / real_pitch; const u8 samples_u = (aa_mode == rsx::surface_antialiasing::center_1_sample) ? 1 : 2;
rsx::scale_image_nearest(dst, const_cast<const void*>(data), width, height, rsx_pitch, real_pitch, pixel_size, samples); const u8 samples_v = (aa_mode == rsx::surface_antialiasing::square_centered_4_samples || aa_mode == rsx::surface_antialiasing::square_rotated_4_samples) ? 2 : 1;
rsx::scale_image_nearest(dst, const_cast<const void*>(data), width, height, rsx_pitch, real_pitch, pixel_size, samples_u, samples_v);
} }
/* switch (gcm_format) /* switch (gcm_format)

View file

@ -2481,6 +2481,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
const auto fbo_height = rsx::apply_resolution_scale(clip_height, true); const auto fbo_height = rsx::apply_resolution_scale(clip_height, true);
const auto aa_mode = rsx::method_registers.surface_antialias(); const auto aa_mode = rsx::method_registers.surface_antialias();
const auto bpp = get_format_block_size_in_bytes(color_fmt); const auto bpp = get_format_block_size_in_bytes(color_fmt);
const u32 aa_factor = (aa_mode == rsx::surface_antialiasing::center_1_sample || aa_mode == rsx::surface_antialiasing::diagonal_centered_2_samples) ? 1 : 2;
if (m_draw_fbo) if (m_draw_fbo)
{ {
@ -2588,8 +2589,8 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
for (u8 index : draw_buffers) for (u8 index : draw_buffers)
{ {
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue; if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
const u32 range = m_surface_info[index].pitch * m_surface_info[index].height;
const u32 range = m_surface_info[index].pitch * m_surface_info[index].height * aa_factor;
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range, m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range,
m_surface_info[index].width, m_surface_info[index].height, m_surface_info[index].pitch, color_fmt_info.first, color_fmt_info.second); m_surface_info[index].width, m_surface_info[index].height, m_surface_info[index].pitch, color_fmt_info.first, color_fmt_info.second);
} }
@ -2608,7 +2609,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context)
pitch *= 2; pitch *= 2;
} }
const u32 range = pitch * m_depth_surface_info.height; const u32 range = pitch * m_depth_surface_info.height * aa_factor;
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range, m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range,
m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, gcm_format, true); m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, gcm_format, true);
} }

View file

@ -308,9 +308,20 @@ namespace vk
{ {
//Scale image to fit //Scale image to fit
//usually we can just get away with nearest filtering //usually we can just get away with nearest filtering
const u8 samples = rsx_pitch / real_pitch; u8 samples_u = 1, samples_v = 1;
switch (static_cast<vk::render_target*>(vram_texture)->aa_mode)
{
case rsx::surface_antialiasing::diagonal_centered_2_samples:
samples_u = 2;
break;
case rsx::surface_antialiasing::square_centered_4_samples:
case rsx::surface_antialiasing::square_rotated_4_samples:
samples_u = 2;
samples_v = 2;
break;
}
rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, rsx_pitch, real_pitch, bpp, samples, pack_unpack_swap_bytes); rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, rsx_pitch, real_pitch, bpp, samples_u, samples_v, pack_unpack_swap_bytes);
} }
dma_buffer->unmap(); dma_buffer->unmap();

View file

@ -140,18 +140,19 @@ namespace rsx
* N - Sample count * N - Sample count
*/ */
template <typename T, typename U> template <typename T, typename U>
void scale_image_fallback_impl(T* dst, const U* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples) void scale_image_fallback_impl(T* dst, const U* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v)
{ {
u32 dst_offset = 0; u32 dst_offset = 0;
u32 src_offset = 0; u32 src_offset = 0;
u32 padding = (dst_pitch - (src_pitch * samples)) / sizeof(T); u32 padding = (dst_pitch - (src_pitch * samples_u)) / sizeof(T);
for (u16 h = 0; h < src_height; ++h) for (u16 h = 0; h < src_height; ++h)
{ {
const auto row_start = dst_offset;
for (u16 w = 0; w < src_width; ++w) for (u16 w = 0; w < src_width; ++w)
{ {
for (u8 n = 0; n < samples; ++n) for (u8 n = 0; n < samples_u; ++n)
{ {
dst[dst_offset++] = src[src_offset]; dst[dst_offset++] = src[src_offset];
} }
@ -160,51 +161,57 @@ namespace rsx
} }
dst_offset += padding; dst_offset += padding;
for (int n = 1; n < samples_v; ++n)
{
memcpy(&dst[dst_offset], &dst[row_start], dst_pitch);
dst_offset += dst_pitch;
}
} }
} }
void scale_image_fallback(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples) void scale_image_fallback(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v)
{ {
switch (pixel_size) switch (pixel_size)
{ {
case 1: case 1:
scale_image_fallback_impl<u8, u8>((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); scale_image_fallback_impl<u8, u8>((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break; break;
case 2: case 2:
scale_image_fallback_impl<u16, u16>((u16*)dst, (const u16*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); scale_image_fallback_impl<u16, u16>((u16*)dst, (const u16*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break; break;
case 4: case 4:
scale_image_fallback_impl<u32, u32>((u32*)dst, (const u32*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); scale_image_fallback_impl<u32, u32>((u32*)dst, (const u32*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break; break;
case 8: case 8:
scale_image_fallback_impl<u64, u64>((u64*)dst, (const u64*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); scale_image_fallback_impl<u64, u64>((u64*)dst, (const u64*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break; break;
case 16: case 16:
scale_image_fallback_impl<u128, u128>((u128*)dst, (const u128*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); scale_image_fallback_impl<u128, u128>((u128*)dst, (const u128*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break; break;
default: default:
fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size); fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size);
} }
} }
void scale_image_fallback_with_byte_swap(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples) void scale_image_fallback_with_byte_swap(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v)
{ {
switch (pixel_size) switch (pixel_size)
{ {
case 1: case 1:
scale_image_fallback_impl<u8, u8>((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); scale_image_fallback_impl<u8, u8>((u8*)dst, (const u8*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break; break;
case 2: case 2:
scale_image_fallback_impl<u16, be_t<u16>>((u16*)dst, (const be_t<u16>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); scale_image_fallback_impl<u16, be_t<u16>>((u16*)dst, (const be_t<u16>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break; break;
case 4: case 4:
scale_image_fallback_impl<u32, be_t<u32>>((u32*)dst, (const be_t<u32>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); scale_image_fallback_impl<u32, be_t<u32>>((u32*)dst, (const be_t<u32>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break; break;
case 8: case 8:
scale_image_fallback_impl<u64, be_t<u64>>((u64*)dst, (const be_t<u64>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); scale_image_fallback_impl<u64, be_t<u64>>((u64*)dst, (const be_t<u64>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break; break;
case 16: case 16:
scale_image_fallback_impl<u128, be_t<u128>>((u128*)dst, (const be_t<u128>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); scale_image_fallback_impl<u128, be_t<u128>>((u128*)dst, (const be_t<u128>*)src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
break; break;
default: default:
fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size); fmt::throw_exception("unsupported pixel size %d" HERE, pixel_size);
@ -279,19 +286,24 @@ namespace rsx
} }
} }
void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples, bool swap_bytes) void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v, bool swap_bytes)
{ {
//Scale this image by repeating pixel data n times //Scale this image by repeating pixel data n times
//n = expected_pitch / real_pitch //n = expected_pitch / real_pitch
//Use of fixed argument templates for performance reasons //Use of fixed argument templates for performance reasons
const u16 dst_width = dst_pitch / pixel_size; const u16 dst_width = dst_pitch / pixel_size;
const u16 padding = dst_width - (src_width * samples); const u16 padding = dst_width - (src_width * samples_u);
if (!swap_bytes) if (!swap_bytes)
{ {
switch (samples) if (samples_v == 1)
{ {
switch (samples_u)
{
case 1:
scale_image_fast<1>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 2: case 2:
scale_image_fast<2>(dst, src, pixel_size, src_width, src_height, padding); scale_image_fast<2>(dst, src, pixel_size, src_width, src_height, padding);
break; break;
@ -308,14 +320,23 @@ namespace rsx
scale_image_fast<16>(dst, src, pixel_size, src_width, src_height, padding); scale_image_fast<16>(dst, src, pixel_size, src_width, src_height, padding);
break; break;
default: default:
LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", dst_pitch, src_pitch); scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, 1);
scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples);
} }
} }
else else
{ {
switch (samples) scale_image_fallback(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
}
}
else
{ {
if (samples_v == 1)
{
switch (samples_u)
{
case 1:
scale_image_fast_with_byte_swap<1>(dst, src, pixel_size, src_width, src_height, padding);
break;
case 2: case 2:
scale_image_fast_with_byte_swap<2>(dst, src, pixel_size, src_width, src_height, padding); scale_image_fast_with_byte_swap<2>(dst, src, pixel_size, src_width, src_height, padding);
break; break;
@ -332,8 +353,12 @@ namespace rsx
scale_image_fast_with_byte_swap<16>(dst, src, pixel_size, src_width, src_height, padding); scale_image_fast_with_byte_swap<16>(dst, src, pixel_size, src_width, src_height, padding);
break; break;
default: default:
LOG_ERROR(RSX, "Unsupported RTT scaling factor: dst_pitch=%d src_pitch=%d", dst_pitch, src_pitch); scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, 1);
scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples); }
}
else
{
scale_image_fallback_with_byte_swap(dst, src, src_width, src_height, dst_pitch, src_pitch, pixel_size, samples_u, samples_v);
} }
} }
} }

View file

@ -143,7 +143,7 @@ namespace rsx
} }
} }
void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples, bool swap_bytes = false); void scale_image_nearest(void* dst, const void* src, u16 src_width, u16 src_height, u16 dst_pitch, u16 src_pitch, u8 pixel_size, u8 samples_u, u8 samples_v, bool swap_bytes = false);
void convert_scale_image(u8 *dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch, void convert_scale_image(u8 *dst, AVPixelFormat dst_format, int dst_width, int dst_height, int dst_pitch,
const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear); const u8 *src, AVPixelFormat src_format, int src_width, int src_height, int src_pitch, int src_slice_h, bool bilinear);