mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-14 18:58:36 +12:00
rsx: Fix and improve fp program data invalidation
This commit is contained in:
parent
2ed370093e
commit
c2c5005278
6 changed files with 46 additions and 23 deletions
|
@ -51,10 +51,7 @@ namespace rsx
|
||||||
std::unordered_set<u64>& mem_changes = frame_capture.replay_commands.back().memory_state;
|
std::unordered_set<u64>& mem_changes = frame_capture.replay_commands.back().memory_state;
|
||||||
|
|
||||||
// capture fragment shader mem
|
// capture fragment shader mem
|
||||||
const u32 shader_program = method_registers.shader_program_address();
|
const auto [program_offset, program_location] = method_registers.shader_program_address();
|
||||||
|
|
||||||
const u32 program_location = (shader_program & 0x3) - 1;
|
|
||||||
const u32 program_offset = (shader_program & ~0x3);
|
|
||||||
|
|
||||||
const u32 addr = get_address(program_offset, program_location, HERE);
|
const u32 addr = get_address(program_offset, program_location, HERE);
|
||||||
const auto program_info = program_hash_util::fragment_program_utils::analyse_fragment_program(vm::base(addr));
|
const auto program_info = program_hash_util::fragment_program_utils::analyse_fragment_program(vm::base(addr));
|
||||||
|
|
|
@ -230,6 +230,7 @@ struct RSXFragmentProgram
|
||||||
void *addr;
|
void *addr;
|
||||||
u32 offset;
|
u32 offset;
|
||||||
u32 ucode_length;
|
u32 ucode_length;
|
||||||
|
u32 total_length;
|
||||||
u32 ctrl;
|
u32 ctrl;
|
||||||
u16 unnormalized_coords;
|
u16 unnormalized_coords;
|
||||||
u16 redirected_textures;
|
u16 redirected_textures;
|
||||||
|
|
|
@ -1585,10 +1585,7 @@ namespace rsx
|
||||||
m_graphics_state &= ~(rsx::pipeline_state::fragment_program_dirty);
|
m_graphics_state &= ~(rsx::pipeline_state::fragment_program_dirty);
|
||||||
auto &result = current_fragment_program = {};
|
auto &result = current_fragment_program = {};
|
||||||
|
|
||||||
const u32 shader_program = rsx::method_registers.shader_program_address();
|
const auto [program_offset, program_location] = method_registers.shader_program_address();
|
||||||
|
|
||||||
const u32 program_location = (shader_program & 0x3) - 1;
|
|
||||||
const u32 program_offset = (shader_program & ~0x3);
|
|
||||||
|
|
||||||
result.addr = vm::base(rsx::get_address(program_offset, program_location, HERE));
|
result.addr = vm::base(rsx::get_address(program_offset, program_location, HERE));
|
||||||
current_fp_metadata = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr);
|
current_fp_metadata = program_hash_util::fragment_program_utils::analyse_fragment_program(result.addr);
|
||||||
|
@ -1596,6 +1593,7 @@ namespace rsx
|
||||||
result.addr = (static_cast<u8*>(result.addr) + current_fp_metadata.program_start_offset);
|
result.addr = (static_cast<u8*>(result.addr) + current_fp_metadata.program_start_offset);
|
||||||
result.offset = program_offset + current_fp_metadata.program_start_offset;
|
result.offset = program_offset + current_fp_metadata.program_start_offset;
|
||||||
result.ucode_length = current_fp_metadata.program_ucode_length;
|
result.ucode_length = current_fp_metadata.program_ucode_length;
|
||||||
|
result.total_length = result.ucode_length + current_fp_metadata.program_start_offset;
|
||||||
result.valid = true;
|
result.valid = true;
|
||||||
result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT);
|
result.ctrl = rsx::method_registers.shader_control() & (CELL_GCM_SHADER_CONTROL_32_BITS_EXPORTS | CELL_GCM_SHADER_CONTROL_DEPTH_EXPORT);
|
||||||
result.texcoord_control_mask = rsx::method_registers.texcoord_control_mask();
|
result.texcoord_control_mask = rsx::method_registers.texcoord_control_mask();
|
||||||
|
@ -1737,6 +1735,22 @@ namespace rsx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool thread::invalidate_fragment_program(u32 dst_dma, u32 dst_offset, u32 size)
|
||||||
|
{
|
||||||
|
const auto [shader_offset, shader_dma] = rsx::method_registers.shader_program_address();
|
||||||
|
|
||||||
|
if ((dst_dma & CELL_GCM_LOCATION_MAIN) == shader_dma &&
|
||||||
|
address_range::start_length(shader_offset, current_fragment_program.total_length).overlaps(
|
||||||
|
address_range::start_length(dst_offset, size))) [[unlikely]]
|
||||||
|
{
|
||||||
|
// Data overlaps
|
||||||
|
m_graphics_state |= rsx::pipeline_state::fragment_program_dirty;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
void thread::reset()
|
void thread::reset()
|
||||||
{
|
{
|
||||||
rsx::method_registers.reset();
|
rsx::method_registers.reset();
|
||||||
|
|
|
@ -711,6 +711,8 @@ namespace rsx
|
||||||
* returns whether surface is a render target and surface pitch in native format
|
* returns whether surface is a render target and surface pitch in native format
|
||||||
*/
|
*/
|
||||||
void get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors);
|
void get_current_fragment_program(const std::array<std::unique_ptr<rsx::sampled_image_descriptor_base>, rsx::limits::fragment_textures_count>& sampler_descriptors);
|
||||||
|
public:
|
||||||
|
bool invalidate_fragment_program(u32 dst_dma, u32 dst_offset, u32 size);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
u64 target_rsx_flip_time = 0;
|
u64 target_rsx_flip_time = 0;
|
||||||
|
|
|
@ -876,16 +876,22 @@ namespace rsx
|
||||||
{
|
{
|
||||||
// Move last 32 bits
|
// Move last 32 bits
|
||||||
reinterpret_cast<u32*>(dst)[0] = reinterpret_cast<const u32*>(src)[count - 1];
|
reinterpret_cast<u32*>(dst)[0] = reinterpret_cast<const u32*>(src)[count - 1];
|
||||||
}
|
rsx->invalidate_fragment_program(dst_dma, dst_offset, 4);
|
||||||
else if (dst_dma & CELL_GCM_LOCATION_MAIN)
|
|
||||||
{
|
|
||||||
// May overlap
|
|
||||||
std::memmove(dst, src, data_length);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// Never overlaps
|
if (dst_dma & CELL_GCM_LOCATION_MAIN)
|
||||||
std::memcpy(dst, src, data_length);
|
{
|
||||||
|
// May overlap
|
||||||
|
std::memmove(dst, src, data_length);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Never overlaps
|
||||||
|
std::memcpy(dst, src, data_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
rsx->invalidate_fragment_program(dst_dma, dst_offset, count * 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -912,6 +918,7 @@ namespace rsx
|
||||||
{
|
{
|
||||||
// Move last 16 bits
|
// Move last 16 bits
|
||||||
dst[0] = convert(src[count - 1]);
|
dst[0] = convert(src[count - 1]);
|
||||||
|
rsx->invalidate_fragment_program(dst_dma, dst_offset, 2);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -920,6 +927,7 @@ namespace rsx
|
||||||
dst[i] = convert(src[i]);
|
dst[i] = convert(src[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rsx->invalidate_fragment_program(dst_dma, dst_offset, count * 2);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
@ -930,12 +938,6 @@ namespace rsx
|
||||||
|
|
||||||
//res->release(0);
|
//res->release(0);
|
||||||
|
|
||||||
if (!(dst_dma & CELL_GCM_LOCATION_MAIN))
|
|
||||||
{
|
|
||||||
// Set this flag on LOCAL memory transfer
|
|
||||||
rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip "handled methods"
|
// Skip "handled methods"
|
||||||
rsx->fifo_ctrl->skip_methods(count - 1);
|
rsx->fifo_ctrl->skip_methods(count - 1);
|
||||||
}
|
}
|
||||||
|
@ -1086,6 +1088,8 @@ namespace rsx
|
||||||
const u32 nb_lines = std::min(clip_h, in_h);
|
const u32 nb_lines = std::min(clip_h, in_h);
|
||||||
const u32 data_length = nb_lines * src_line_length;
|
const u32 data_length = nb_lines * src_line_length;
|
||||||
|
|
||||||
|
rsx->invalidate_fragment_program(dst_dma, dst_offset, data_length);
|
||||||
|
|
||||||
if (const auto result = rsx->read_barrier(src_address, data_length, false);
|
if (const auto result = rsx->read_barrier(src_address, data_length, false);
|
||||||
result == rsx::result_zcull_intr)
|
result == rsx::result_zcull_intr)
|
||||||
{
|
{
|
||||||
|
@ -1099,6 +1103,8 @@ namespace rsx
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const u32 data_length = in_pitch * (in_h - 1) + src_line_length;
|
const u32 data_length = in_pitch * (in_h - 1) + src_line_length;
|
||||||
|
|
||||||
|
rsx->invalidate_fragment_program(dst_dma, dst_offset, data_length);
|
||||||
rsx->read_barrier(src_address, data_length, true);
|
rsx->read_barrier(src_address, data_length, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1437,6 +1443,8 @@ namespace rsx
|
||||||
const auto write_address = get_address(dst_offset, dst_dma, HERE);
|
const auto write_address = get_address(dst_offset, dst_dma, HERE);
|
||||||
const auto data_length = in_pitch * (line_count - 1) + line_length;
|
const auto data_length = in_pitch * (line_count - 1) + line_length;
|
||||||
|
|
||||||
|
rsx->invalidate_fragment_program(dst_dma, dst_offset, data_length);
|
||||||
|
|
||||||
if (const auto result = rsx->read_barrier(read_address, data_length, !is_block_transfer);
|
if (const auto result = rsx->read_barrier(read_address, data_length, !is_block_transfer);
|
||||||
result == rsx::result_zcull_intr)
|
result == rsx::result_zcull_intr)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1331,9 +1331,10 @@ namespace rsx
|
||||||
return decode<NV4097_SET_VERTEX_DATA_BASE_INDEX>().vertex_data_base_index();
|
return decode<NV4097_SET_VERTEX_DATA_BASE_INDEX>().vertex_data_base_index();
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 shader_program_address() const
|
std::pair<u32, u32> shader_program_address() const
|
||||||
{
|
{
|
||||||
return decode<NV4097_SET_SHADER_PROGRAM>().shader_program_address();
|
const u32 shader_address = decode<NV4097_SET_SHADER_PROGRAM>().shader_program_address();
|
||||||
|
return { shader_address & ~3, (shader_address & 3) - 1 };
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 transform_program_start() const
|
u32 transform_program_start() const
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue