mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-10 00:41:26 +12:00
rsx/blit: Remove workarounds/hacks added for master. Start implementation/stubs for blit engine rotations in GPU
This commit is contained in:
parent
745f8f9627
commit
17c49d21a5
2 changed files with 59 additions and 33 deletions
|
@ -2119,8 +2119,7 @@ namespace rsx
|
||||||
texaddr, tex.format(), tex_width, tex_height, depth, tex_pitch, slice_h,
|
texaddr, tex.format(), tex_width, tex_height, depth, tex_pitch, slice_h,
|
||||||
extended_dimension, tex.remap(), tex.decoded_remap(), _pool);
|
extended_dimension, tex.remap(), tex.decoded_remap(), _pool);
|
||||||
|
|
||||||
if (!result.external_subresource_desc.sections_to_copy.empty() &&
|
if (!result.external_subresource_desc.sections_to_copy.empty() && result.atlas_covers_target_area())
|
||||||
(_pool == 0 || result.atlas_covers_target_area()))
|
|
||||||
{
|
{
|
||||||
// TODO: Investigate why a full re-upload can cause problems in some games (yellow flicker in SCV)
|
// TODO: Investigate why a full re-upload can cause problems in some games (yellow flicker in SCV)
|
||||||
// Unimplemented readback formats?
|
// Unimplemented readback formats?
|
||||||
|
@ -2162,8 +2161,8 @@ namespace rsx
|
||||||
template <typename surface_store_type, typename blitter_type, typename ...Args>
|
template <typename surface_store_type, typename blitter_type, typename ...Args>
|
||||||
blit_op_result upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, commandbuffer_type& cmd, surface_store_type& m_rtts, blitter_type& blitter, Args&&... extras)
|
blit_op_result upload_scaled_image(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate, commandbuffer_type& cmd, surface_store_type& m_rtts, blitter_type& blitter, Args&&... extras)
|
||||||
{
|
{
|
||||||
//Since we will have dst in vram, we can 'safely' ignore the swizzle flag
|
// Since we will have dst in vram, we can 'safely' ignore the swizzle flag
|
||||||
//TODO: Verify correct behavior
|
// TODO: Verify correct behavior
|
||||||
bool src_is_render_target = false;
|
bool src_is_render_target = false;
|
||||||
bool dst_is_render_target = false;
|
bool dst_is_render_target = false;
|
||||||
bool dst_is_argb8 = (dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8);
|
bool dst_is_argb8 = (dst.format == rsx::blit_engine::transfer_destination_format::a8r8g8b8);
|
||||||
|
@ -2176,11 +2175,21 @@ namespace rsx
|
||||||
const u32 src_address = (u32)((u64)src.pixels - (u64)vm::base(0));
|
const u32 src_address = (u32)((u64)src.pixels - (u64)vm::base(0));
|
||||||
const u32 dst_address = (u32)((u64)dst.pixels - (u64)vm::base(0));
|
const u32 dst_address = (u32)((u64)dst.pixels - (u64)vm::base(0));
|
||||||
|
|
||||||
f32 scale_x = dst.scale_x;
|
const f32 scale_x = fabsf(dst.scale_x);
|
||||||
f32 scale_y = dst.scale_y;
|
const f32 scale_y = fabsf(dst.scale_y);
|
||||||
|
|
||||||
//Offset in x and y for src is 0 (it is already accounted for when getting pixels_src)
|
if (dst.scale_y < 0.f)
|
||||||
//Reproject final clip onto source...
|
{
|
||||||
|
// TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dst.scale_x < 0.f)
|
||||||
|
{
|
||||||
|
// TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
// Offset in x and y for src is 0 (it is already accounted for when getting pixels_src)
|
||||||
|
// Reproject final clip onto source...
|
||||||
u16 src_w = (u16)((f32)dst.clip_width / scale_x);
|
u16 src_w = (u16)((f32)dst.clip_width / scale_x);
|
||||||
u16 src_h = (u16)((f32)dst.clip_height / scale_y);
|
u16 src_h = (u16)((f32)dst.clip_height / scale_y);
|
||||||
|
|
||||||
|
@ -2210,6 +2219,32 @@ namespace rsx
|
||||||
if (!g_cfg.video.use_gpu_texture_scaling && !(src_is_render_target || dst_is_render_target))
|
if (!g_cfg.video.use_gpu_texture_scaling && !(src_is_render_target || dst_is_render_target))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// Check if trivial memcpy can perform the same task
|
||||||
|
// Used to copy programs and arbitrary data to the GPU in some cases
|
||||||
|
if (!src_is_render_target && !dst_is_render_target && dst_is_argb8 == src_is_argb8 && !dst.swizzled)
|
||||||
|
{
|
||||||
|
if ((src.slice_h == 1 && dst.clip_height == 1) ||
|
||||||
|
(dst.clip_width == src.width && dst.clip_height == src.slice_h && src.pitch == dst.pitch))
|
||||||
|
{
|
||||||
|
if (dst.scale_x > 0.f && dst.scale_y > 0.f)
|
||||||
|
{
|
||||||
|
const u8 bpp = dst_is_argb8 ? 4 : 2;
|
||||||
|
const u32 memcpy_bytes_length = dst.clip_width * bpp * dst.clip_height;
|
||||||
|
|
||||||
|
std::lock_guard lock(m_cache_mutex);
|
||||||
|
invalidate_range_impl_base(cmd, address_range::start_length(src_address, memcpy_bytes_length), invalidation_cause::read, std::forward<Args>(extras)...);
|
||||||
|
invalidate_range_impl_base(cmd, address_range::start_length(dst_address, memcpy_bytes_length), invalidation_cause::write, std::forward<Args>(extras)...);
|
||||||
|
memcpy(dst.pixels, src.pixels, memcpy_bytes_length);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Rotation transform applied, use fallback
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (src_is_render_target)
|
if (src_is_render_target)
|
||||||
{
|
{
|
||||||
src_subres.surface->read_barrier(cmd);
|
src_subres.surface->read_barrier(cmd);
|
||||||
|
@ -2260,41 +2295,32 @@ namespace rsx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//Check if trivial memcpy can perform the same task
|
|
||||||
//Used to copy programs to the GPU in some cases
|
|
||||||
if (!src_is_render_target && !dst_is_render_target && dst_is_argb8 == src_is_argb8 && !dst.swizzled)
|
|
||||||
{
|
|
||||||
if ((src.slice_h == 1 && dst.clip_height == 1) ||
|
|
||||||
(dst.clip_width == src.width && dst.clip_height == src.slice_h && src.pitch == dst.pitch))
|
|
||||||
{
|
|
||||||
const u8 bpp = dst_is_argb8 ? 4 : 2;
|
|
||||||
const u32 memcpy_bytes_length = dst.clip_width * bpp * dst.clip_height;
|
|
||||||
|
|
||||||
std::lock_guard lock(m_cache_mutex);
|
|
||||||
invalidate_range_impl_base(cmd, address_range::start_length(src_address, memcpy_bytes_length), invalidation_cause::read, std::forward<Args>(extras)...);
|
|
||||||
invalidate_range_impl_base(cmd, address_range::start_length(dst_address, memcpy_bytes_length), invalidation_cause::write, std::forward<Args>(extras)...);
|
|
||||||
memcpy(dst.pixels, src.pixels, memcpy_bytes_length);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
section_storage_type* cached_dest = nullptr;
|
section_storage_type* cached_dest = nullptr;
|
||||||
u16 max_dst_width = dst.width;
|
u16 max_dst_width = dst.width;
|
||||||
u16 max_dst_height = dst.height;
|
u16 max_dst_height = dst.height;
|
||||||
areai src_area = { 0, 0, src_w, src_h };
|
areai src_area = { 0, 0, src_w, src_h };
|
||||||
areai dst_area = { 0, 0, dst_w, dst_h };
|
areai dst_area = { 0, 0, dst_w, dst_h };
|
||||||
|
|
||||||
// 1024 height is a hack (for ~720p buffers)
|
|
||||||
// It is possible to have a large buffer that goes up to around 4kx4k but anything above 1280x720 is rare
|
|
||||||
// RSX only handles 512x512 tiles so texture 'stitching' will eventually be needed to be completely accurate
|
|
||||||
// Sections will be submitted as (512x512 + 512x512 + 256x512 + 512x208 + 512x208 + 256x208) to blit a 720p surface to the backbuffer for example
|
|
||||||
size2i dst_dimensions = { dst.pitch / (dst_is_argb8 ? 4 : 2), dst.height };
|
size2i dst_dimensions = { dst.pitch / (dst_is_argb8 ? 4 : 2), dst.height };
|
||||||
if (src_is_render_target)
|
if (src_is_render_target)
|
||||||
{
|
{
|
||||||
if (dst_dimensions.width == src_subres.surface->get_surface_width())
|
if (dst_dimensions.width == src_subres.surface->get_surface_width())
|
||||||
|
{
|
||||||
dst_dimensions.height = std::max(src_subres.surface->get_surface_height(), dst.height);
|
dst_dimensions.height = std::max(src_subres.surface->get_surface_height(), dst.height);
|
||||||
|
}
|
||||||
else if (dst.max_tile_h > dst.height)
|
else if (dst.max_tile_h > dst.height)
|
||||||
dst_dimensions.height = std::min((s32)dst.max_tile_h, 1024);
|
{
|
||||||
|
// Optimizations table based on common width/height pairings. If we guess wrong, the upload resolver will fix it anyway
|
||||||
|
// TODO: Add more entries based on empirical data
|
||||||
|
if (LIKELY(dst.width == 1280))
|
||||||
|
{
|
||||||
|
dst_dimensions.height = std::max<s32>(dst.height, 720);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
dst_dimensions.height = std::min((s32)dst.max_tile_h, 1024);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
reader_lock lock(m_cache_mutex);
|
reader_lock lock(m_cache_mutex);
|
||||||
|
@ -2435,7 +2461,7 @@ namespace rsx
|
||||||
{
|
{
|
||||||
// TODO: Rejecting unlocked blit_engine dst causes stutter in SCV
|
// TODO: Rejecting unlocked blit_engine dst causes stutter in SCV
|
||||||
// Surfaces marked as dirty have already been removed, leaving only flushed blit_dst data
|
// Surfaces marked as dirty have already been removed, leaving only flushed blit_dst data
|
||||||
// continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto this_address = surface->get_section_base();
|
const auto this_address = surface->get_section_base();
|
||||||
|
|
|
@ -988,7 +988,7 @@ namespace rsx
|
||||||
dst_info.max_tile_h = static_cast<u16>((dst_region.tile->size - dst_region.base) / out_pitch);
|
dst_info.max_tile_h = static_cast<u16>((dst_region.tile->size - dst_region.base) / out_pitch);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!g_cfg.video.force_cpu_blit_processing && (dst_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER || src_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER) && scale_x > 0 && scale_y > 0)
|
if (!g_cfg.video.force_cpu_blit_processing && (dst_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER || src_dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER))
|
||||||
{
|
{
|
||||||
//For now, only use this for actual scaled images, there are use cases that should not go through 3d engine, e.g program ucode transfer
|
//For now, only use this for actual scaled images, there are use cases that should not go through 3d engine, e.g program ucode transfer
|
||||||
//TODO: Figure out more instances where we can use this without problems
|
//TODO: Figure out more instances where we can use this without problems
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue