mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-05 14:31:24 +12:00
rsx: Separate register context from RSX thread
This commit is contained in:
parent
0231902a69
commit
10fe14e783
21 changed files with 2149 additions and 1834 deletions
66
rpcs3/Emu/RSX/NV47/common.cpp
Normal file
66
rpcs3/Emu/RSX/NV47/common.cpp
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
#include "stdafx.h"
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#include "Emu/RSX/RSXThread.h"
|
||||||
|
|
||||||
|
#define RSX(ctx) ctx->rsxthr
|
||||||
|
#define REGS(ctx) (&rsx::method_registers)
|
||||||
|
|
||||||
|
namespace rsx
|
||||||
|
{
|
||||||
|
namespace util
|
||||||
|
{
|
||||||
|
void push_vertex_data(rsx::context* ctx, u32 attrib_index, u32 channel_select, int count, rsx::vertex_base_type vtype, u32 value)
|
||||||
|
{
|
||||||
|
if (RSX(ctx)->in_begin_end)
|
||||||
|
{
|
||||||
|
// Update to immediate mode register/array
|
||||||
|
// NOTE: Push buffers still behave like register writes.
|
||||||
|
// You do not need to specify each attribute for each vertex, the register is referenced instead.
|
||||||
|
// This is classic OpenGL 1.x behavior as I remember.
|
||||||
|
RSX(ctx)->append_to_push_buffer(attrib_index, count, channel_select, vtype, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto& info = REGS(ctx)->register_vertex_info[attrib_index];
|
||||||
|
|
||||||
|
info.type = vtype;
|
||||||
|
info.size = count;
|
||||||
|
info.frequency = 0;
|
||||||
|
info.stride = 0;
|
||||||
|
REGS(ctx)->register_vertex_info[attrib_index].data[channel_select] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
void push_draw_parameter_change(rsx::context* ctx, rsx::command_barrier_type type, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
if (REGS(ctx)->latch == arg ||
|
||||||
|
!RSX(ctx)->in_begin_end ||
|
||||||
|
REGS(ctx)->current_draw_clause.empty())
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Defer the change. Rollback...
|
||||||
|
REGS(ctx)->decode(reg, REGS(ctx)->latch);
|
||||||
|
|
||||||
|
// Insert barrier to reinsert the value later
|
||||||
|
REGS(ctx)->current_draw_clause.insert_command_barrier(index_base_modifier_barrier, arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 get_report_data_impl(rsx::context* ctx, u32 offset)
|
||||||
|
{
|
||||||
|
u32 location = 0;
|
||||||
|
blit_engine::context_dma report_dma = REGS(ctx)->context_dma_report();
|
||||||
|
|
||||||
|
switch (report_dma)
|
||||||
|
{
|
||||||
|
case blit_engine::context_dma::to_memory_get_report: location = CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_LOCAL; break;
|
||||||
|
case blit_engine::context_dma::report_location_main: location = CELL_GCM_CONTEXT_DMA_REPORT_LOCATION_MAIN; break;
|
||||||
|
case blit_engine::context_dma::memory_host_buffer: location = CELL_GCM_CONTEXT_DMA_MEMORY_HOST_BUFFER; break;
|
||||||
|
default:
|
||||||
|
return vm::addr_t(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
return vm::cast(get_address(offset, location));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
60
rpcs3/Emu/RSX/NV47/common.h
Normal file
60
rpcs3/Emu/RSX/NV47/common.h
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <util/types.hpp>
|
||||||
|
#include "context.h"
|
||||||
|
#include "context_accessors.define.h"
|
||||||
|
|
||||||
|
namespace rsx
|
||||||
|
{
|
||||||
|
enum command_barrier_type : u32;
|
||||||
|
enum vertex_base_type;
|
||||||
|
|
||||||
|
namespace util
|
||||||
|
{
|
||||||
|
u32 get_report_data_impl(rsx::context* ctx, u32 offset);
|
||||||
|
|
||||||
|
void push_vertex_data(rsx::context* ctx, u32 attrib_index, u32 channel_select, int count, rsx::vertex_base_type vtype, u32 value);
|
||||||
|
|
||||||
|
void push_draw_parameter_change(rsx::context* ctx, rsx::command_barrier_type type, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
template <bool FlushDMA, bool FlushPipe>
|
||||||
|
void write_gcm_label(context* ctx, u32 address, u32 data)
|
||||||
|
{
|
||||||
|
const bool is_flip_sema = (address == (RSX(ctx)->label_addr + 0x10) || address == (RSX(ctx)->device_addr + 0x30));
|
||||||
|
if (!is_flip_sema)
|
||||||
|
{
|
||||||
|
// First, queue the GPU work. If it flushes the queue for us, the following routines will be faster.
|
||||||
|
const bool handled = RSX(ctx)->get_backend_config().supports_host_gpu_labels && RSX(ctx)->release_GCM_label(address, data);
|
||||||
|
|
||||||
|
if (vm::_ref<RsxSemaphore>(address).val == data)
|
||||||
|
{
|
||||||
|
// It's a no-op to write the same value (although there is a delay in real-hw so it's more accurate to allow GPU label in this case)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if constexpr (FlushDMA)
|
||||||
|
{
|
||||||
|
// If the backend handled the request, this call will basically be a NOP
|
||||||
|
g_fxo->get<rsx::dma_manager>().sync();
|
||||||
|
}
|
||||||
|
|
||||||
|
if constexpr (FlushPipe)
|
||||||
|
{
|
||||||
|
// Manually flush the pipeline.
|
||||||
|
// It is possible to stream report writes using the host GPU, but that generates too much submit traffic.
|
||||||
|
RSX(ctx)->sync();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (handled)
|
||||||
|
{
|
||||||
|
// Backend will handle it, nothing to write.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vm::_ref<RsxSemaphore>(address).val = data;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "context_accessors.undef.h"
|
30
rpcs3/Emu/RSX/NV47/context.h
Normal file
30
rpcs3/Emu/RSX/NV47/context.h
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <util/types.hpp>
|
||||||
|
|
||||||
|
namespace rsx
|
||||||
|
{
|
||||||
|
class thread;
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
// TODO: Separate GRAPH context from RSX state
|
||||||
|
struct GRAPH_context
|
||||||
|
{
|
||||||
|
u32 id;
|
||||||
|
std::array<u32, 0x10000 / 4> registers;
|
||||||
|
|
||||||
|
GRAPH_context(u32 ctx_id)
|
||||||
|
: id(ctx_id)
|
||||||
|
{
|
||||||
|
std::fill(registers.begin(), registers.end(), 0);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
struct context
|
||||||
|
{
|
||||||
|
thread* rsxthr;
|
||||||
|
// GRAPH_context* graph;
|
||||||
|
rsx_state* register_state;
|
||||||
|
};
|
||||||
|
}
|
3
rpcs3/Emu/RSX/NV47/context_accessors.define.h
Normal file
3
rpcs3/Emu/RSX/NV47/context_accessors.define.h
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
#define RSX(ctx) ctx->rsxthr
|
||||||
|
#define REGS(ctx) ctx->register_state
|
||||||
|
#define RSX_CAPTURE_EVENT(name) if (RSX(ctx)->capture_current_frame) { RSX(ctx)->capture_frame(name); }
|
3
rpcs3/Emu/RSX/NV47/context_accessors.undef.h
Normal file
3
rpcs3/Emu/RSX/NV47/context_accessors.undef.h
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
#undef RSX
|
||||||
|
#undef REGS
|
||||||
|
#undef RSX_CAPTURE_EVENT
|
655
rpcs3/Emu/RSX/NV47/nv3089.cpp
Normal file
655
rpcs3/Emu/RSX/NV47/nv3089.cpp
Normal file
|
@ -0,0 +1,655 @@
|
||||||
|
#include "stdafx.h"
|
||||||
|
#include "nv3089.h"
|
||||||
|
|
||||||
|
#include "Emu/RSX/RSXThread.h"
|
||||||
|
|
||||||
|
#include "context_accessors.define.h"
|
||||||
|
|
||||||
|
namespace rsx
|
||||||
|
{
|
||||||
|
namespace nv3089
|
||||||
|
{
|
||||||
|
static std::tuple<bool, blit_src_info, blit_dst_info> decode_transfer_registers(context* ctx)
|
||||||
|
{
|
||||||
|
blit_src_info src_info = {};
|
||||||
|
blit_dst_info dst_info = {};
|
||||||
|
|
||||||
|
const rsx::blit_engine::transfer_operation operation = REGS(ctx)->blit_engine_operation();
|
||||||
|
|
||||||
|
const u16 out_x = REGS(ctx)->blit_engine_output_x();
|
||||||
|
const u16 out_y = REGS(ctx)->blit_engine_output_y();
|
||||||
|
const u16 out_w = REGS(ctx)->blit_engine_output_width();
|
||||||
|
const u16 out_h = REGS(ctx)->blit_engine_output_height();
|
||||||
|
|
||||||
|
const u16 in_w = REGS(ctx)->blit_engine_input_width();
|
||||||
|
const u16 in_h = REGS(ctx)->blit_engine_input_height();
|
||||||
|
|
||||||
|
const blit_engine::transfer_origin in_origin = REGS(ctx)->blit_engine_input_origin();
|
||||||
|
auto src_color_format = REGS(ctx)->blit_engine_src_color_format();
|
||||||
|
|
||||||
|
const f32 scale_x = REGS(ctx)->blit_engine_ds_dx();
|
||||||
|
const f32 scale_y = REGS(ctx)->blit_engine_dt_dy();
|
||||||
|
|
||||||
|
// Clipping
|
||||||
|
// Validate that clipping rect will fit onto both src and dst regions
|
||||||
|
const u16 clip_w = std::min(REGS(ctx)->blit_engine_clip_width(), out_w);
|
||||||
|
const u16 clip_h = std::min(REGS(ctx)->blit_engine_clip_height(), out_h);
|
||||||
|
|
||||||
|
// Check both clip dimensions and dst dimensions
|
||||||
|
if (clip_w == 0 || clip_h == 0)
|
||||||
|
{
|
||||||
|
rsx_log.warning("NV3089_IMAGE_IN: Operation NOPed out due to empty regions");
|
||||||
|
return { false, src_info, dst_info };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_w == 0 || in_h == 0)
|
||||||
|
{
|
||||||
|
// Input cant be an empty region
|
||||||
|
fmt::throw_exception("NV3089_IMAGE_IN_SIZE: Invalid blit dimensions passed (in_w=%d, in_h=%d)", in_w, in_h);
|
||||||
|
}
|
||||||
|
|
||||||
|
u16 clip_x = REGS(ctx)->blit_engine_clip_x();
|
||||||
|
u16 clip_y = REGS(ctx)->blit_engine_clip_y();
|
||||||
|
|
||||||
|
//Fit onto dst
|
||||||
|
if (clip_x && (out_x + clip_x + clip_w) > out_w) clip_x = 0;
|
||||||
|
if (clip_y && (out_y + clip_y + clip_h) > out_h) clip_y = 0;
|
||||||
|
|
||||||
|
u16 in_pitch = REGS(ctx)->blit_engine_input_pitch();
|
||||||
|
|
||||||
|
switch (in_origin)
|
||||||
|
{
|
||||||
|
case blit_engine::transfer_origin::corner:
|
||||||
|
case blit_engine::transfer_origin::center:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
rsx_log.warning("NV3089_IMAGE_IN_SIZE: unknown origin (%d)", static_cast<u8>(in_origin));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (operation != rsx::blit_engine::transfer_operation::srccopy)
|
||||||
|
{
|
||||||
|
rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown operation (0x%x)", REGS(ctx)->registers[NV3089_SET_OPERATION]);
|
||||||
|
RSX(ctx)->recover_fifo();
|
||||||
|
return { false, src_info, dst_info };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!src_color_format)
|
||||||
|
{
|
||||||
|
rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown src color format (0x%x)", REGS(ctx)->registers[NV3089_SET_COLOR_FORMAT]);
|
||||||
|
RSX(ctx)->recover_fifo();
|
||||||
|
return { false, src_info, dst_info };
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 src_offset = REGS(ctx)->blit_engine_input_offset();
|
||||||
|
const u32 src_dma = REGS(ctx)->blit_engine_input_location();
|
||||||
|
|
||||||
|
u32 dst_offset;
|
||||||
|
u32 dst_dma = 0;
|
||||||
|
rsx::blit_engine::transfer_destination_format dst_color_format;
|
||||||
|
u32 out_pitch = 0;
|
||||||
|
[[maybe_unused]] u32 out_alignment = 64;
|
||||||
|
bool is_block_transfer = false;
|
||||||
|
|
||||||
|
switch (REGS(ctx)->blit_engine_context_surface())
|
||||||
|
{
|
||||||
|
case blit_engine::context_surface::surface2d:
|
||||||
|
{
|
||||||
|
dst_dma = REGS(ctx)->blit_engine_output_location_nv3062();
|
||||||
|
dst_offset = REGS(ctx)->blit_engine_output_offset_nv3062();
|
||||||
|
out_pitch = REGS(ctx)->blit_engine_output_pitch_nv3062();
|
||||||
|
out_alignment = REGS(ctx)->blit_engine_output_alignment_nv3062();
|
||||||
|
is_block_transfer = fcmp(scale_x, 1.f) && fcmp(scale_y, 1.f);
|
||||||
|
|
||||||
|
if (auto dst_fmt = REGS(ctx)->blit_engine_nv3062_color_format(); !dst_fmt)
|
||||||
|
{
|
||||||
|
rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown NV3062 dst color format (0x%x)", REGS(ctx)->registers[NV3062_SET_COLOR_FORMAT]);
|
||||||
|
RSX(ctx)->recover_fifo();
|
||||||
|
return { false, src_info, dst_info };
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
dst_color_format = dst_fmt;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case blit_engine::context_surface::swizzle2d:
|
||||||
|
{
|
||||||
|
dst_dma = REGS(ctx)->blit_engine_nv309E_location();
|
||||||
|
dst_offset = REGS(ctx)->blit_engine_nv309E_offset();
|
||||||
|
|
||||||
|
if (auto dst_fmt = REGS(ctx)->blit_engine_output_format_nv309E(); !dst_fmt)
|
||||||
|
{
|
||||||
|
rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown NV309E dst color format (0x%x)", REGS(ctx)->registers[NV309E_SET_FORMAT]);
|
||||||
|
RSX(ctx)->recover_fifo();
|
||||||
|
return { false, src_info, dst_info };
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
dst_color_format = dst_fmt;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
rsx_log.error("NV3089_IMAGE_IN_SIZE: unknown m_context_surface (0x%x)", static_cast<u8>(REGS(ctx)->blit_engine_context_surface()));
|
||||||
|
return { false, src_info, dst_info };
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 in_bpp = (src_color_format == rsx::blit_engine::transfer_source_format::r5g6b5) ? 2 : 4; // bytes per pixel
|
||||||
|
const u32 out_bpp = (dst_color_format == rsx::blit_engine::transfer_destination_format::r5g6b5) ? 2 : 4;
|
||||||
|
|
||||||
|
if (out_pitch == 0)
|
||||||
|
{
|
||||||
|
out_pitch = out_bpp * out_w;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_pitch == 0)
|
||||||
|
{
|
||||||
|
in_pitch = in_bpp * in_w;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_bpp != out_bpp)
|
||||||
|
{
|
||||||
|
is_block_transfer = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
u16 in_x, in_y;
|
||||||
|
if (in_origin == blit_engine::transfer_origin::center)
|
||||||
|
{
|
||||||
|
// Convert to normal u,v addressing. Under this scheme offset of 1 is actually half-way inside pixel 0
|
||||||
|
const float x = std::max(REGS(ctx)->blit_engine_in_x(), 0.5f);
|
||||||
|
const float y = std::max(REGS(ctx)->blit_engine_in_y(), 0.5f);
|
||||||
|
in_x = static_cast<u16>(std::floor(x - 0.5f));
|
||||||
|
in_y = static_cast<u16>(std::floor(y - 0.5f));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
in_x = static_cast<u16>(std::floor(REGS(ctx)->blit_engine_in_x()));
|
||||||
|
in_y = static_cast<u16>(std::floor(REGS(ctx)->blit_engine_in_y()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for subpixel addressing
|
||||||
|
if (scale_x < 1.f)
|
||||||
|
{
|
||||||
|
float dst_x = in_x * scale_x;
|
||||||
|
in_x = static_cast<u16>(std::floor(dst_x) / scale_x);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (scale_y < 1.f)
|
||||||
|
{
|
||||||
|
float dst_y = in_y * scale_y;
|
||||||
|
in_y = static_cast<u16>(std::floor(dst_y) / scale_y);
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 in_offset = in_x * in_bpp + in_pitch * in_y;
|
||||||
|
const u32 out_offset = out_x * out_bpp + out_pitch * out_y;
|
||||||
|
|
||||||
|
const u32 src_line_length = (in_w * in_bpp);
|
||||||
|
|
||||||
|
u32 src_address = 0;
|
||||||
|
const u32 dst_address = get_address(dst_offset, dst_dma, 1); // TODO: Add size
|
||||||
|
|
||||||
|
if (is_block_transfer && (clip_h == 1 || (in_pitch == out_pitch && src_line_length == in_pitch)))
|
||||||
|
{
|
||||||
|
const u32 nb_lines = std::min(clip_h, in_h);
|
||||||
|
const u32 data_length = nb_lines * src_line_length;
|
||||||
|
|
||||||
|
if (src_address = get_address(src_offset, src_dma, data_length);
|
||||||
|
!src_address || !dst_address)
|
||||||
|
{
|
||||||
|
RSX(ctx)->recover_fifo();
|
||||||
|
return { false, src_info, dst_info };
|
||||||
|
}
|
||||||
|
|
||||||
|
RSX(ctx)->invalidate_fragment_program(dst_dma, dst_offset, data_length);
|
||||||
|
|
||||||
|
if (const auto result = RSX(ctx)->read_barrier(src_address, data_length, false);
|
||||||
|
result == rsx::result_zcull_intr)
|
||||||
|
{
|
||||||
|
if (RSX(ctx)->copy_zcull_stats(src_address, data_length, dst_address) == data_length)
|
||||||
|
{
|
||||||
|
// All writes deferred
|
||||||
|
return { false, src_info, dst_info };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const u16 read_h = std::min(static_cast<u16>(clip_h / scale_y), in_h);
|
||||||
|
const u32 data_length = in_pitch * (read_h - 1) + src_line_length;
|
||||||
|
|
||||||
|
if (src_address = get_address(src_offset, src_dma, data_length);
|
||||||
|
!src_address || !dst_address)
|
||||||
|
{
|
||||||
|
RSX(ctx)->recover_fifo();
|
||||||
|
return { false, src_info, dst_info };
|
||||||
|
}
|
||||||
|
|
||||||
|
RSX(ctx)->invalidate_fragment_program(dst_dma, dst_offset, data_length);
|
||||||
|
RSX(ctx)->read_barrier(src_address, data_length, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (src_address == dst_address &&
|
||||||
|
in_w == clip_w && in_h == clip_h &&
|
||||||
|
in_pitch == out_pitch &&
|
||||||
|
rsx::fcmp(scale_x, 1.f) && rsx::fcmp(scale_y, 1.f))
|
||||||
|
{
|
||||||
|
// NULL operation
|
||||||
|
rsx_log.warning("NV3089_IMAGE_IN: Operation writes memory onto itself with no modification (move-to-self). Will ignore.");
|
||||||
|
return { false, src_info, dst_info };
|
||||||
|
}
|
||||||
|
|
||||||
|
u8* pixels_src = vm::_ptr<u8>(src_address + in_offset);
|
||||||
|
u8* pixels_dst = vm::_ptr<u8>(dst_address + out_offset);
|
||||||
|
|
||||||
|
if (dst_color_format != rsx::blit_engine::transfer_destination_format::r5g6b5 &&
|
||||||
|
dst_color_format != rsx::blit_engine::transfer_destination_format::a8r8g8b8)
|
||||||
|
{
|
||||||
|
fmt::throw_exception("NV3089_IMAGE_IN_SIZE: unknown dst_color_format (%d)", static_cast<u8>(dst_color_format));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (src_color_format != rsx::blit_engine::transfer_source_format::r5g6b5 &&
|
||||||
|
src_color_format != rsx::blit_engine::transfer_source_format::a8r8g8b8)
|
||||||
|
{
|
||||||
|
// Alpha has no meaning in both formats
|
||||||
|
if (src_color_format == rsx::blit_engine::transfer_source_format::x8r8g8b8)
|
||||||
|
{
|
||||||
|
src_color_format = rsx::blit_engine::transfer_source_format::a8r8g8b8;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// TODO: Support more formats
|
||||||
|
fmt::throw_exception("NV3089_IMAGE_IN_SIZE: unknown src_color_format (%d)", static_cast<u8>(*src_color_format));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 convert_w = static_cast<u32>(std::abs(scale_x) * in_w);
|
||||||
|
u32 convert_h = static_cast<u32>(std::abs(scale_y) * in_h);
|
||||||
|
|
||||||
|
if (convert_w == 0 || convert_h == 0)
|
||||||
|
{
|
||||||
|
rsx_log.error("NV3089_IMAGE_IN: Invalid dimensions or scaling factor. Request ignored (ds_dx=%f, dt_dy=%f)",
|
||||||
|
REGS(ctx)->blit_engine_ds_dx(), REGS(ctx)->blit_engine_dt_dy());
|
||||||
|
return { false, src_info, dst_info };
|
||||||
|
}
|
||||||
|
|
||||||
|
src_info.format = src_color_format;
|
||||||
|
src_info.origin = in_origin;
|
||||||
|
src_info.width = in_w;
|
||||||
|
src_info.height = in_h;
|
||||||
|
src_info.pitch = in_pitch;
|
||||||
|
src_info.bpp = in_bpp;
|
||||||
|
src_info.offset_x = in_x;
|
||||||
|
src_info.offset_y = in_y;
|
||||||
|
src_info.dma = src_dma;
|
||||||
|
src_info.rsx_address = src_address;
|
||||||
|
src_info.pixels = pixels_src;
|
||||||
|
|
||||||
|
dst_info.format = dst_color_format;
|
||||||
|
dst_info.width = convert_w;
|
||||||
|
dst_info.height = convert_h;
|
||||||
|
dst_info.clip_x = clip_x;
|
||||||
|
dst_info.clip_y = clip_y;
|
||||||
|
dst_info.clip_width = clip_w;
|
||||||
|
dst_info.clip_height = clip_h;
|
||||||
|
dst_info.offset_x = out_x;
|
||||||
|
dst_info.offset_y = out_y;
|
||||||
|
dst_info.pitch = out_pitch;
|
||||||
|
dst_info.bpp = out_bpp;
|
||||||
|
dst_info.scale_x = scale_x;
|
||||||
|
dst_info.scale_y = scale_y;
|
||||||
|
dst_info.dma = dst_dma;
|
||||||
|
dst_info.rsx_address = dst_address;
|
||||||
|
dst_info.pixels = pixels_dst;
|
||||||
|
dst_info.swizzled = (REGS(ctx)->blit_engine_context_surface() == blit_engine::context_surface::swizzle2d);
|
||||||
|
|
||||||
|
return { true, src_info, dst_info };
|
||||||
|
}
|
||||||
|
|
||||||
|
void linear_copy(
|
||||||
|
const blit_dst_info& dst,
|
||||||
|
const blit_src_info& src,
|
||||||
|
u16 out_w,
|
||||||
|
u16 out_h,
|
||||||
|
u32 slice_h,
|
||||||
|
AVPixelFormat ffmpeg_src_format,
|
||||||
|
AVPixelFormat ffmpeg_dst_format,
|
||||||
|
bool need_convert,
|
||||||
|
bool need_clip,
|
||||||
|
bool src_is_modified,
|
||||||
|
bool interpolate)
|
||||||
|
{
|
||||||
|
std::vector<u8> temp2;
|
||||||
|
|
||||||
|
if (!need_convert) [[ likely ]]
|
||||||
|
{
|
||||||
|
const bool is_overlapping = !src_is_modified && dst.dma == src.dma && [&]() -> bool
|
||||||
|
{
|
||||||
|
const auto src_range = utils::address_range::start_length(src.rsx_address, src.pitch * (src.height - 1) + (src.bpp * src.width));
|
||||||
|
const auto dst_range = utils::address_range::start_length(dst.rsx_address, dst.pitch * (dst.clip_height - 1) + (dst.bpp * dst.clip_width));
|
||||||
|
return src_range.overlaps(dst_range);
|
||||||
|
}();
|
||||||
|
|
||||||
|
if (is_overlapping) [[ unlikely ]]
|
||||||
|
{
|
||||||
|
if (need_clip)
|
||||||
|
{
|
||||||
|
temp2.resize(dst.pitch * dst.clip_height);
|
||||||
|
clip_image_may_overlap(dst.pixels, src.pixels, dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, src.pitch, dst.pitch, temp2.data());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dst.pitch != src.pitch || dst.pitch != dst.bpp * out_w)
|
||||||
|
{
|
||||||
|
const u32 buffer_pitch = dst.bpp * out_w;
|
||||||
|
temp2.resize(buffer_pitch * out_h);
|
||||||
|
std::add_pointer_t<u8> buf = temp2.data(), pixels = src.pixels;
|
||||||
|
|
||||||
|
// Read the whole buffer from source
|
||||||
|
for (u32 y = 0; y < out_h; ++y)
|
||||||
|
{
|
||||||
|
std::memcpy(buf, pixels, buffer_pitch);
|
||||||
|
pixels += src.pitch;
|
||||||
|
buf += buffer_pitch;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = temp2.data(), pixels = dst.pixels;
|
||||||
|
|
||||||
|
// Write to destination
|
||||||
|
for (u32 y = 0; y < out_h; ++y)
|
||||||
|
{
|
||||||
|
std::memcpy(pixels, buf, buffer_pitch);
|
||||||
|
pixels += dst.pitch;
|
||||||
|
buf += buffer_pitch;
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::memmove(dst.pixels, src.pixels, dst.pitch * out_h);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (need_clip) [[ unlikely ]]
|
||||||
|
{
|
||||||
|
clip_image(dst.pixels, src.pixels, dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, src.pitch, dst.pitch);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dst.pitch != src.pitch || dst.pitch != dst.bpp * out_w) [[ unlikely ]]
|
||||||
|
{
|
||||||
|
u8* dst_pixels = dst.pixels, * src_pixels = src.pixels;
|
||||||
|
|
||||||
|
for (u32 y = 0; y < out_h; ++y)
|
||||||
|
{
|
||||||
|
std::memcpy(dst_pixels, src_pixels, out_w * dst.bpp);
|
||||||
|
dst_pixels += dst.pitch;
|
||||||
|
src_pixels += src.pitch;
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::memcpy(dst.pixels, src.pixels, dst.pitch * out_h);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (need_clip) [[ unlikely ]]
|
||||||
|
{
|
||||||
|
temp2.resize(dst.pitch * std::max<u32>(dst.height, dst.clip_height));
|
||||||
|
|
||||||
|
convert_scale_image(temp2.data(), ffmpeg_dst_format, dst.width, dst.height, dst.pitch,
|
||||||
|
src.pixels, ffmpeg_src_format, src.width, src.height, src.pitch, slice_h, interpolate);
|
||||||
|
|
||||||
|
clip_image(dst.pixels, temp2.data(), dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, dst.pitch, dst.pitch);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
convert_scale_image(dst.pixels, ffmpeg_dst_format, out_w, out_h, dst.pitch,
|
||||||
|
src.pixels, ffmpeg_src_format, src.width, src.height, src.pitch, slice_h,
|
||||||
|
interpolate);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<u8> swizzled_copy_1(
|
||||||
|
const blit_dst_info& dst,
|
||||||
|
const blit_src_info& src,
|
||||||
|
u16 out_w,
|
||||||
|
u16 out_h,
|
||||||
|
u32 slice_h,
|
||||||
|
AVPixelFormat ffmpeg_src_format,
|
||||||
|
AVPixelFormat ffmpeg_dst_format,
|
||||||
|
bool need_convert,
|
||||||
|
bool need_clip,
|
||||||
|
bool interpolate)
|
||||||
|
{
|
||||||
|
std::vector<u8> temp2, temp3;
|
||||||
|
|
||||||
|
if (need_clip)
|
||||||
|
{
|
||||||
|
temp3.resize(dst.pitch * dst.clip_height);
|
||||||
|
|
||||||
|
if (need_convert)
|
||||||
|
{
|
||||||
|
temp2.resize(dst.pitch * std::max<u32>(dst.height, dst.clip_height));
|
||||||
|
|
||||||
|
convert_scale_image(temp2.data(), ffmpeg_dst_format, dst.width, dst.height, dst.pitch,
|
||||||
|
src.pixels, ffmpeg_src_format, src.width, src.height, src.pitch, slice_h,
|
||||||
|
interpolate);
|
||||||
|
|
||||||
|
clip_image(temp3.data(), temp2.data(), dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, dst.pitch, dst.pitch);
|
||||||
|
return temp3;
|
||||||
|
}
|
||||||
|
|
||||||
|
clip_image(temp3.data(), src.pixels, dst.clip_x, dst.clip_y, dst.clip_width, dst.clip_height, dst.bpp, src.pitch, dst.pitch);
|
||||||
|
return temp3;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (need_convert)
|
||||||
|
{
|
||||||
|
temp3.resize(dst.pitch * out_h);
|
||||||
|
|
||||||
|
convert_scale_image(temp3.data(), ffmpeg_dst_format, out_w, out_h, dst.pitch,
|
||||||
|
src.pixels, ffmpeg_src_format, src.width, src.height, src.pitch, slice_h,
|
||||||
|
interpolate);
|
||||||
|
|
||||||
|
return temp3;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
void swizzled_copy_2(
|
||||||
|
u8* linear_pixels,
|
||||||
|
u8* swizzled_pixels,
|
||||||
|
u32 linear_pitch,
|
||||||
|
u16 out_w,
|
||||||
|
u16 out_h,
|
||||||
|
u8 out_bpp)
|
||||||
|
{
|
||||||
|
// TODO: Validate these claims. Are the registers always correctly initialized? Should we trust them at all?
|
||||||
|
// It looks like rsx may ignore the requested swizzle size and just always
|
||||||
|
// round up to nearest power of 2
|
||||||
|
/*
|
||||||
|
u8 sw_width_log2 = REGS(ctx)->nv309e_sw_width_log2();
|
||||||
|
u8 sw_height_log2 = REGS(ctx)->nv309e_sw_height_log2();
|
||||||
|
|
||||||
|
// 0 indicates height of 1 pixel
|
||||||
|
sw_height_log2 = sw_height_log2 == 0 ? 1 : sw_height_log2;
|
||||||
|
|
||||||
|
// swizzle based on destination size
|
||||||
|
u16 sw_width = 1 << sw_width_log2;
|
||||||
|
u16 sw_height = 1 << sw_height_log2;
|
||||||
|
*/
|
||||||
|
|
||||||
|
std::vector<u8> sw_temp;
|
||||||
|
|
||||||
|
u32 sw_width = next_pow2(out_w);
|
||||||
|
u32 sw_height = next_pow2(out_h);
|
||||||
|
|
||||||
|
// Check and pad texture out if we are given non power of 2 output
|
||||||
|
if (sw_width != out_w || sw_height != out_h)
|
||||||
|
{
|
||||||
|
sw_temp.resize(out_bpp * sw_width * sw_height);
|
||||||
|
|
||||||
|
switch (out_bpp)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
pad_texture<u8>(linear_pixels, sw_temp.data(), out_w, out_h, sw_width, sw_height);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
pad_texture<u16>(linear_pixels, sw_temp.data(), out_w, out_h, sw_width, sw_height);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
pad_texture<u32>(linear_pixels, sw_temp.data(), out_w, out_h, sw_width, sw_height);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
linear_pixels = sw_temp.data();
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (out_bpp)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
convert_linear_swizzle<u8, false>(linear_pixels, swizzled_pixels, sw_width, sw_height, linear_pitch);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
convert_linear_swizzle<u16, false>(linear_pixels, swizzled_pixels, sw_width, sw_height, linear_pitch);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
convert_linear_swizzle<u32, false>(linear_pixels, swizzled_pixels, sw_width, sw_height, linear_pitch);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<u8> _mirror_transform(const blit_src_info& src, bool flip_x, bool flip_y)
|
||||||
|
{
|
||||||
|
std::vector<u8> temp1;
|
||||||
|
if (!flip_x && !flip_y)
|
||||||
|
{
|
||||||
|
return temp1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 packed_pitch = src.width * src.bpp;
|
||||||
|
temp1.resize(packed_pitch * src.height);
|
||||||
|
|
||||||
|
const s32 stride_y = (flip_y ? -1 : 1) * static_cast<s32>(src.pitch);
|
||||||
|
|
||||||
|
for (u32 y = 0; y < src.height; ++y)
|
||||||
|
{
|
||||||
|
u8* dst_pixels = temp1.data() + (packed_pitch * y);
|
||||||
|
u8* src_pixels = src.pixels + (static_cast<s32>(y) * stride_y);
|
||||||
|
|
||||||
|
if (flip_x)
|
||||||
|
{
|
||||||
|
if (src.bpp == 4) [[ likely ]]
|
||||||
|
{
|
||||||
|
rsx::memcpy_r<u32>(dst_pixels, src_pixels, src.width);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
rsx::memcpy_r<u16>(dst_pixels, src_pixels, src.width);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::memcpy(dst_pixels, src_pixels, packed_pitch);
|
||||||
|
}
|
||||||
|
|
||||||
|
return temp1;
|
||||||
|
}
|
||||||
|
|
||||||
|
void image_in(context* ctx, u32 /*reg*/, u32 /*arg*/)
|
||||||
|
{
|
||||||
|
auto [success, src, dst] = decode_transfer_registers(ctx);
|
||||||
|
if (!success)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decode extra params before locking
|
||||||
|
const blit_engine::transfer_interpolator in_inter = REGS(ctx)->blit_engine_input_inter();
|
||||||
|
const u16 out_w = REGS(ctx)->blit_engine_output_width();
|
||||||
|
const u16 out_h = REGS(ctx)->blit_engine_output_height();
|
||||||
|
|
||||||
|
// Lock here. RSX cannot execute any locking operations from this point, including ZCULL read barriers
|
||||||
|
auto res = ::rsx::reservation_lock<true>(
|
||||||
|
dst.rsx_address, dst.pitch * dst.clip_height,
|
||||||
|
src.rsx_address, src.pitch * src.height);
|
||||||
|
|
||||||
|
if (!g_cfg.video.force_cpu_blit_processing &&
|
||||||
|
(dst.dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER || src.dma == CELL_GCM_CONTEXT_DMA_MEMORY_FRAME_BUFFER) &&
|
||||||
|
RSX(ctx)->scaled_image_from_memory(src, dst, in_inter == blit_engine::transfer_interpolator::foh))
|
||||||
|
{
|
||||||
|
// HW-accelerated blit
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<u8> mirror_tmp;
|
||||||
|
bool src_is_temp = false;
|
||||||
|
|
||||||
|
// Flip source if needed
|
||||||
|
if (dst.scale_y < 0 || dst.scale_x < 0)
|
||||||
|
{
|
||||||
|
mirror_tmp = _mirror_transform(src, dst.scale_x < 0, dst.scale_y < 0);
|
||||||
|
src.pixels = mirror_tmp.data();
|
||||||
|
src.pitch = src.width * src.bpp;
|
||||||
|
src_is_temp = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const AVPixelFormat in_format = (src.format == rsx::blit_engine::transfer_source_format::r5g6b5) ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
|
||||||
|
const AVPixelFormat out_format = (dst.format == rsx::blit_engine::transfer_destination_format::r5g6b5) ? AV_PIX_FMT_RGB565BE : AV_PIX_FMT_ARGB;
|
||||||
|
|
||||||
|
const bool need_clip =
|
||||||
|
dst.clip_width != src.width ||
|
||||||
|
dst.clip_height != src.height ||
|
||||||
|
dst.clip_x > 0 || dst.clip_y > 0 ||
|
||||||
|
dst.width != out_w || dst.height != out_h;
|
||||||
|
|
||||||
|
const bool need_convert = out_format != in_format || !rsx::fcmp(fabsf(dst.scale_x), 1.f) || !rsx::fcmp(fabsf(dst.scale_y), 1.f);
|
||||||
|
const u32 slice_h = static_cast<u32>(std::ceil(static_cast<f32>(dst.clip_height + dst.clip_y) / dst.scale_y));
|
||||||
|
const bool interpolate = in_inter == blit_engine::transfer_interpolator::foh;
|
||||||
|
|
||||||
|
auto real_dst = dst.pixels;
|
||||||
|
const auto tiled_region = RSX(ctx)->get_tiled_memory_region(utils::address_range::start_length(dst.rsx_address, dst.pitch * dst.clip_height));
|
||||||
|
std::vector<u8> tmp;
|
||||||
|
|
||||||
|
if (tiled_region)
|
||||||
|
{
|
||||||
|
tmp.resize(tiled_region.tile->size);
|
||||||
|
real_dst = dst.pixels;
|
||||||
|
dst.pixels = tmp.data();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (REGS(ctx)->blit_engine_context_surface() != blit_engine::context_surface::swizzle2d)
|
||||||
|
{
|
||||||
|
linear_copy(dst, src, out_w, out_h, slice_h, in_format, out_format, need_convert, need_clip, src_is_temp, interpolate);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const auto swz_temp = swizzled_copy_1(dst, src, out_w, out_h, slice_h, in_format, out_format, need_convert, need_clip, interpolate);
|
||||||
|
auto pixels_src = swz_temp.empty() ? src.pixels : swz_temp.data();
|
||||||
|
|
||||||
|
swizzled_copy_2(const_cast<u8*>(pixels_src), dst.pixels, src.pitch, out_w, out_h, dst.bpp);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tiled_region)
|
||||||
|
{
|
||||||
|
const auto tile_func = dst.bpp == 4
|
||||||
|
? rsx::tile_texel_data32
|
||||||
|
: rsx::tile_texel_data16;
|
||||||
|
|
||||||
|
tile_func(
|
||||||
|
real_dst,
|
||||||
|
dst.pixels,
|
||||||
|
tiled_region.base_address,
|
||||||
|
dst.rsx_address - tiled_region.base_address,
|
||||||
|
tiled_region.tile->size,
|
||||||
|
tiled_region.tile->bank,
|
||||||
|
tiled_region.tile->pitch,
|
||||||
|
dst.clip_width,
|
||||||
|
dst.clip_height
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
10
rpcs3/Emu/RSX/NV47/nv3089.h
Normal file
10
rpcs3/Emu/RSX/NV47/nv3089.h
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
#pragma once
|
||||||
|
#include "context.h"
|
||||||
|
|
||||||
|
namespace rsx
|
||||||
|
{
|
||||||
|
namespace nv3089
|
||||||
|
{
|
||||||
|
void image_in(context* ctx, u32 reg, u32 arg);
|
||||||
|
}
|
||||||
|
}
|
159
rpcs3/Emu/RSX/NV47/nv308a.cpp
Normal file
159
rpcs3/Emu/RSX/NV47/nv308a.cpp
Normal file
|
@ -0,0 +1,159 @@
|
||||||
|
#include "stdafx.h"
|
||||||
|
#include "nv308a.h"
|
||||||
|
|
||||||
|
#include "Emu/RSX/RSXThread.h"
|
||||||
|
|
||||||
|
#include "context_accessors.define.h"
|
||||||
|
|
||||||
|
namespace rsx
|
||||||
|
{
|
||||||
|
namespace nv308a
|
||||||
|
{
|
||||||
|
void color::impl(context* ctx, u32 reg, u32)
|
||||||
|
{
|
||||||
|
const u32 out_x_max = REGS(ctx)->nv308a_size_out_x();
|
||||||
|
const u32 index = reg - NV308A_COLOR;
|
||||||
|
|
||||||
|
if (index >= out_x_max)
|
||||||
|
{
|
||||||
|
// Skip
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get position of the current command arg
|
||||||
|
[[maybe_unused]] const u32 src_offset = RSX(ctx)->fifo_ctrl->get_pos();
|
||||||
|
|
||||||
|
// FIFO args count including this one
|
||||||
|
const u32 fifo_args_cnt = RSX(ctx)->fifo_ctrl->get_remaining_args_count() + 1;
|
||||||
|
|
||||||
|
// The range of methods this function resposible to
|
||||||
|
const u32 method_range = std::min<u32>(0x700 - index, out_x_max - index);
|
||||||
|
|
||||||
|
// Get limit imposed by FIFO PUT (if put is behind get it will result in a number ignored by min)
|
||||||
|
const u32 fifo_read_limit = static_cast<u32>(((RSX(ctx)->ctrl->put & ~3ull) - (RSX(ctx)->fifo_ctrl->get_pos())) / 4);
|
||||||
|
|
||||||
|
u32 count = std::min<u32>({ fifo_args_cnt, fifo_read_limit, method_range });
|
||||||
|
|
||||||
|
const u32 dst_dma = REGS(ctx)->blit_engine_output_location_nv3062();
|
||||||
|
const u32 dst_offset = REGS(ctx)->blit_engine_output_offset_nv3062();
|
||||||
|
const u32 out_pitch = REGS(ctx)->blit_engine_output_pitch_nv3062();
|
||||||
|
|
||||||
|
const u32 x = REGS(ctx)->nv308a_x() + index;
|
||||||
|
const u32 y = REGS(ctx)->nv308a_y();
|
||||||
|
|
||||||
|
const auto fifo_span = RSX(ctx)->fifo_ctrl->get_current_arg_ptr();
|
||||||
|
|
||||||
|
if (fifo_span.size() < count)
|
||||||
|
{
|
||||||
|
count = ::size32(fifo_span);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip "handled methods"
|
||||||
|
RSX(ctx)->fifo_ctrl->skip_methods(count - 1);
|
||||||
|
|
||||||
|
// 308A::COLOR can be used to create custom sync primitives.
|
||||||
|
// Hide this behind strict mode due to the potential performance implications.
|
||||||
|
if (count == 1 && g_cfg.video.strict_rendering_mode && !g_cfg.video.relaxed_zcull_sync)
|
||||||
|
{
|
||||||
|
RSX(ctx)->sync();
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (*REGS(ctx)->blit_engine_nv3062_color_format())
|
||||||
|
{
|
||||||
|
case blit_engine::transfer_destination_format::a8r8g8b8:
|
||||||
|
case blit_engine::transfer_destination_format::y32:
|
||||||
|
{
|
||||||
|
// Bit cast - optimize to mem copy
|
||||||
|
|
||||||
|
const u32 data_length = count * 4;
|
||||||
|
|
||||||
|
const auto dst_address = get_address(dst_offset + (x * 4) + (out_pitch * y), dst_dma, data_length);
|
||||||
|
|
||||||
|
if (!dst_address)
|
||||||
|
{
|
||||||
|
RSX(ctx)->recover_fifo();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto dst = vm::_ptr<u8>(dst_address);
|
||||||
|
const auto src = reinterpret_cast<const u8*>(fifo_span.data());
|
||||||
|
|
||||||
|
rsx::reservation_lock<true> rsx_lock(dst_address, data_length);
|
||||||
|
|
||||||
|
if (RSX(ctx)->fifo_ctrl->last_cmd() & RSX_METHOD_NON_INCREMENT_CMD_MASK) [[unlikely]]
|
||||||
|
{
|
||||||
|
// Move last 32 bits
|
||||||
|
reinterpret_cast<u32*>(dst)[0] = reinterpret_cast<const u32*>(src)[count - 1];
|
||||||
|
RSX(ctx)->invalidate_fragment_program(dst_dma, dst_offset, 4);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (dst_dma & CELL_GCM_LOCATION_MAIN)
|
||||||
|
{
|
||||||
|
// May overlap
|
||||||
|
std::memmove(dst, src, data_length);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Never overlaps
|
||||||
|
std::memcpy(dst, src, data_length);
|
||||||
|
}
|
||||||
|
|
||||||
|
RSX(ctx)->invalidate_fragment_program(dst_dma, dst_offset, count * 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case blit_engine::transfer_destination_format::r5g6b5:
|
||||||
|
{
|
||||||
|
const auto data_length = count * 2;
|
||||||
|
|
||||||
|
const auto dst_address = get_address(dst_offset + (x * 2) + (y * out_pitch), dst_dma, data_length);
|
||||||
|
const auto dst = vm::_ptr<u16>(dst_address);
|
||||||
|
const auto src = utils::bless<const be_t<u32>>(fifo_span.data());
|
||||||
|
|
||||||
|
if (!dst_address)
|
||||||
|
{
|
||||||
|
RSX(ctx)->recover_fifo();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
rsx::reservation_lock<true> rsx_lock(dst_address, data_length);
|
||||||
|
|
||||||
|
auto convert = [](u32 input) -> u16
|
||||||
|
{
|
||||||
|
// Input is considered to be ARGB8
|
||||||
|
u32 r = (input >> 16) & 0xFF;
|
||||||
|
u32 g = (input >> 8) & 0xFF;
|
||||||
|
u32 b = input & 0xFF;
|
||||||
|
|
||||||
|
r = (r * 32) / 255;
|
||||||
|
g = (g * 64) / 255;
|
||||||
|
b = (b * 32) / 255;
|
||||||
|
return static_cast<u16>((r << 11) | (g << 5) | b);
|
||||||
|
};
|
||||||
|
|
||||||
|
if (RSX(ctx)->fifo_ctrl->last_cmd() & RSX_METHOD_NON_INCREMENT_CMD_MASK) [[unlikely]]
|
||||||
|
{
|
||||||
|
// Move last 16 bits
|
||||||
|
dst[0] = convert(src[count - 1]);
|
||||||
|
RSX(ctx)->invalidate_fragment_program(dst_dma, dst_offset, 2);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (u32 i = 0; i < count; i++)
|
||||||
|
{
|
||||||
|
dst[i] = convert(src[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
RSX(ctx)->invalidate_fragment_program(dst_dma, dst_offset, count * 2);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
fmt::throw_exception("Unreachable");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
14
rpcs3/Emu/RSX/NV47/nv308a.h
Normal file
14
rpcs3/Emu/RSX/NV47/nv308a.h
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "context.h"
|
||||||
|
|
||||||
|
namespace rsx
|
||||||
|
{
|
||||||
|
namespace nv308a
|
||||||
|
{
|
||||||
|
struct color
|
||||||
|
{
|
||||||
|
static void impl(context* ctx, u32 reg, u32 arg);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
125
rpcs3/Emu/RSX/NV47/nv406e.cpp
Normal file
125
rpcs3/Emu/RSX/NV47/nv406e.cpp
Normal file
|
@ -0,0 +1,125 @@
|
||||||
|
#include "stdafx.h"
|
||||||
|
#include "nv406e.h"
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#include "Emu/RSX/RSXThread.h"
|
||||||
|
|
||||||
|
#include "context_accessors.define.h"
|
||||||
|
|
||||||
|
namespace rsx
|
||||||
|
{
|
||||||
|
namespace nv406e
|
||||||
|
{
|
||||||
|
void set_reference(context* ctx, u32 /*reg*/, u32 arg)
|
||||||
|
{
|
||||||
|
RSX(ctx)->sync();
|
||||||
|
|
||||||
|
// Write ref+get (get will be written again with the same value at command end)
|
||||||
|
auto& dma = vm::_ref<RsxDmaControl>(RSX(ctx)->dma_address);
|
||||||
|
dma.get.release(RSX(ctx)->fifo_ctrl->get_pos());
|
||||||
|
dma.ref.store(arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void semaphore_acquire(context* ctx, u32 /*reg*/, u32 arg)
|
||||||
|
{
|
||||||
|
RSX(ctx)->sync_point_request.release(true);
|
||||||
|
const u32 addr = get_address(REGS(ctx)->semaphore_offset_406e(), REGS(ctx)->semaphore_context_dma_406e());
|
||||||
|
|
||||||
|
const auto& sema = vm::_ref<RsxSemaphore>(addr).val;
|
||||||
|
|
||||||
|
if (sema == arg)
|
||||||
|
{
|
||||||
|
// Flip semaphore doesnt need wake-up delay
|
||||||
|
if (addr != RSX(ctx)->label_addr + 0x10)
|
||||||
|
{
|
||||||
|
RSX(ctx)->flush_fifo();
|
||||||
|
RSX(ctx)->fifo_wake_delay(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
RSX(ctx)->flush_fifo();
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 start = rsx::uclock();
|
||||||
|
u64 last_check_val = start;
|
||||||
|
|
||||||
|
while (sema != arg)
|
||||||
|
{
|
||||||
|
if (RSX(ctx)->test_stopped())
|
||||||
|
{
|
||||||
|
RSX(ctx)->state += cpu_flag::again;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (const auto tdr = static_cast<u64>(g_cfg.video.driver_recovery_timeout))
|
||||||
|
{
|
||||||
|
const u64 current = rsx::uclock();
|
||||||
|
|
||||||
|
if (current - last_check_val > 20'000)
|
||||||
|
{
|
||||||
|
// Suspicious amnount of time has passed
|
||||||
|
// External pause such as debuggers' pause or operating system sleep may have taken place
|
||||||
|
// Ignore it
|
||||||
|
start += current - last_check_val;
|
||||||
|
}
|
||||||
|
|
||||||
|
last_check_val = current;
|
||||||
|
|
||||||
|
if ((current - start) > tdr)
|
||||||
|
{
|
||||||
|
// If longer than driver timeout force exit
|
||||||
|
rsx_log.error("nv406e::semaphore_acquire has timed out. semaphore_address=0x%X", addr);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RSX(ctx)->cpu_wait({});
|
||||||
|
}
|
||||||
|
|
||||||
|
RSX(ctx)->fifo_wake_delay();
|
||||||
|
RSX(ctx)->performance_counters.idle_time += (rsx::uclock() - start);
|
||||||
|
}
|
||||||
|
|
||||||
|
void semaphore_release(context* ctx, u32 /*reg*/, u32 arg)
|
||||||
|
{
|
||||||
|
const u32 offset = REGS(ctx)->semaphore_offset_406e();
|
||||||
|
|
||||||
|
if (offset % 4)
|
||||||
|
{
|
||||||
|
rsx_log.warning("NV406E semaphore release is using unaligned semaphore, ignoring. (offset=0x%x)", offset);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 ctxt = REGS(ctx)->semaphore_context_dma_406e();
|
||||||
|
|
||||||
|
// By avoiding doing this on flip's semaphore release
|
||||||
|
// We allow last gcm's registers reset to occur in case of a crash
|
||||||
|
if (const bool is_flip_sema = (offset == 0x10 && ctxt == CELL_GCM_CONTEXT_DMA_SEMAPHORE_R);
|
||||||
|
!is_flip_sema)
|
||||||
|
{
|
||||||
|
RSX(ctx)->sync_point_request.release(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 addr = get_address(offset, ctxt);
|
||||||
|
|
||||||
|
// TODO: Check if possible to write on reservations
|
||||||
|
if (RSX(ctx)->label_addr >> 28 != addr >> 28)
|
||||||
|
{
|
||||||
|
rsx_log.error("NV406E semaphore unexpected address. Please report to the developers. (offset=0x%x, addr=0x%x)", offset, addr);
|
||||||
|
RSX(ctx)->recover_fifo();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (addr == RSX(ctx)->device_addr + 0x30 && !arg)
|
||||||
|
{
|
||||||
|
// HW flip synchronization related, 1 is not written without display queue command (TODO: make it behave as real hw)
|
||||||
|
arg = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
util::write_gcm_label<false, true>(ctx, addr, arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
15
rpcs3/Emu/RSX/NV47/nv406e.h
Normal file
15
rpcs3/Emu/RSX/NV47/nv406e.h
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "context.h"
|
||||||
|
|
||||||
|
namespace rsx
|
||||||
|
{
|
||||||
|
namespace nv406e
|
||||||
|
{
|
||||||
|
void set_reference(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void semaphore_acquire(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void semaphore_release(context* ctx, u32 reg, u32 arg);
|
||||||
|
}
|
||||||
|
}
|
629
rpcs3/Emu/RSX/NV47/nv4097.cpp
Normal file
629
rpcs3/Emu/RSX/NV47/nv4097.cpp
Normal file
|
@ -0,0 +1,629 @@
|
||||||
|
#include "stdafx.h"
|
||||||
|
#include "nv4097.h"
|
||||||
|
|
||||||
|
#include "Emu/RSX/RSXThread.h"
|
||||||
|
#include "Emu/RSX/Common/BufferUtils.h"
|
||||||
|
|
||||||
|
#define RSX(ctx) ctx->rsxthr
|
||||||
|
#define REGS(ctx) (&rsx::method_registers)
|
||||||
|
#define RSX_CAPTURE_EVENT(name) if (RSX(ctx)->capture_current_frame) { RSX(ctx)->capture_frame(name); }
|
||||||
|
|
||||||
|
namespace rsx
|
||||||
|
{
|
||||||
|
template<typename Type> struct vertex_data_type_from_element_type;
|
||||||
|
template<> struct vertex_data_type_from_element_type<float> { static const vertex_base_type type = vertex_base_type::f; };
|
||||||
|
template<> struct vertex_data_type_from_element_type<f16> { static const vertex_base_type type = vertex_base_type::sf; };
|
||||||
|
template<> struct vertex_data_type_from_element_type<u8> { static const vertex_base_type type = vertex_base_type::ub; };
|
||||||
|
template<> struct vertex_data_type_from_element_type<u16> { static const vertex_base_type type = vertex_base_type::s32k; };
|
||||||
|
template<> struct vertex_data_type_from_element_type<s16> { static const vertex_base_type type = vertex_base_type::s1; };
|
||||||
|
|
||||||
|
namespace nv4097
|
||||||
|
{
|
||||||
|
///// Program management
|
||||||
|
|
||||||
|
void set_shader_program_dirty(context* ctx, u32, u32)
|
||||||
|
{
|
||||||
|
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::fragment_program_ucode_dirty;
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_transform_constant::impl(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
const u32 index = reg - NV4097_SET_TRANSFORM_CONSTANT;
|
||||||
|
const u32 constant_id = index / 4;
|
||||||
|
const u8 subreg = index % 4;
|
||||||
|
|
||||||
|
// FIFO args count including this one
|
||||||
|
const u32 fifo_args_cnt = RSX(ctx)->fifo_ctrl->get_remaining_args_count() + 1;
|
||||||
|
|
||||||
|
// The range of methods this function resposible to
|
||||||
|
const u32 method_range = 32 - index;
|
||||||
|
|
||||||
|
// Get limit imposed by FIFO PUT (if put is behind get it will result in a number ignored by min)
|
||||||
|
const u32 fifo_read_limit = static_cast<u32>(((RSX(ctx)->ctrl->put & ~3ull) - (RSX(ctx)->fifo_ctrl->get_pos())) / 4);
|
||||||
|
|
||||||
|
const u32 count = std::min<u32>({ fifo_args_cnt, fifo_read_limit, method_range });
|
||||||
|
|
||||||
|
const u32 load = REGS(ctx)->transform_constant_load();
|
||||||
|
|
||||||
|
u32 rcount = count;
|
||||||
|
if (const u32 max = (load + constant_id) * 4 + count + subreg, limit = 468 * 4; max > limit)
|
||||||
|
{
|
||||||
|
// Ignore addresses outside the usable [0, 467] range
|
||||||
|
rsx_log.warning("Invalid transform register index (load=%u, index=%u, count=%u)", load, index, count);
|
||||||
|
|
||||||
|
if ((max - count) < limit)
|
||||||
|
rcount -= max - limit;
|
||||||
|
else
|
||||||
|
rcount = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto values = ®S(ctx)->transform_constants[load + constant_id][subreg];
|
||||||
|
|
||||||
|
const auto fifo_span = RSX(ctx)->fifo_ctrl->get_current_arg_ptr();
|
||||||
|
|
||||||
|
if (fifo_span.size() < rcount)
|
||||||
|
{
|
||||||
|
rcount = ::size32(fifo_span);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (RSX(ctx)->m_graphics_state & rsx::pipeline_state::transform_constants_dirty)
|
||||||
|
{
|
||||||
|
// Minor optimization: don't compare values if we already know we need invalidation
|
||||||
|
copy_data_swap_u32(values, fifo_span.data(), rcount);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (copy_data_swap_u32_cmp(values, fifo_span.data(), rcount))
|
||||||
|
{
|
||||||
|
// Transform constants invalidation is expensive (~8k bytes per update)
|
||||||
|
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::transform_constants_dirty;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
RSX(ctx)->fifo_ctrl->skip_methods(rcount - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_transform_program::impl(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
const u32 index = reg - NV4097_SET_TRANSFORM_PROGRAM;
|
||||||
|
|
||||||
|
// FIFO args count including this one
|
||||||
|
const u32 fifo_args_cnt = RSX(ctx)->fifo_ctrl->get_remaining_args_count() + 1;
|
||||||
|
|
||||||
|
// The range of methods this function resposible to
|
||||||
|
const u32 method_range = 32 - index;
|
||||||
|
|
||||||
|
// Get limit imposed by FIFO PUT (if put is behind get it will result in a number ignored by min)
|
||||||
|
const u32 fifo_read_limit = static_cast<u32>(((RSX(ctx)->ctrl->put & ~3ull) - (RSX(ctx)->fifo_ctrl->get_pos())) / 4);
|
||||||
|
|
||||||
|
const u32 count = std::min<u32>({ fifo_args_cnt, fifo_read_limit, method_range });
|
||||||
|
|
||||||
|
const u32 load_pos = REGS(ctx)->transform_program_load();
|
||||||
|
|
||||||
|
u32 rcount = count;
|
||||||
|
|
||||||
|
if (const u32 max = load_pos * 4 + rcount + (index % 4);
|
||||||
|
max > max_vertex_program_instructions * 4)
|
||||||
|
{
|
||||||
|
rsx_log.warning("Program buffer overflow! Attempted to write %u VP instructions.", max / 4);
|
||||||
|
rcount -= max - (max_vertex_program_instructions * 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto fifo_span = RSX(ctx)->fifo_ctrl->get_current_arg_ptr();
|
||||||
|
|
||||||
|
if (fifo_span.size() < rcount)
|
||||||
|
{
|
||||||
|
rcount = ::size32(fifo_span);
|
||||||
|
}
|
||||||
|
|
||||||
|
copy_data_swap_u32(®S(ctx)->transform_program[load_pos * 4 + index % 4], fifo_span.data(), rcount);
|
||||||
|
|
||||||
|
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::vertex_program_ucode_dirty;
|
||||||
|
REGS(ctx)->transform_program_load_set(load_pos + ((rcount + index % 4) / 4));
|
||||||
|
RSX(ctx)->fifo_ctrl->skip_methods(rcount - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
///// Texture management
|
||||||
|
|
||||||
|
///// Surface management
|
||||||
|
|
||||||
|
void set_surface_dirty_bit(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
if (arg == REGS(ctx)->latch)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (reg)
|
||||||
|
{
|
||||||
|
case NV4097_SET_SURFACE_COLOR_TARGET:
|
||||||
|
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::pipeline_config_dirty;
|
||||||
|
break;
|
||||||
|
case NV4097_SET_SURFACE_CLIP_VERTICAL:
|
||||||
|
case NV4097_SET_SURFACE_CLIP_HORIZONTAL:
|
||||||
|
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::vertex_state_dirty;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
RSX(ctx)->m_graphics_state.set(rtt_config_dirty);
|
||||||
|
RSX(ctx)->m_graphics_state.clear(rtt_config_contested);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_surface_format(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
// The high bits of this register are just log2(dimension), ignore them
|
||||||
|
if ((arg & 0xFFFF) == (REGS(ctx)->latch & 0xFFFF))
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The important parameters have changed (format, type, antialias)
|
||||||
|
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::pipeline_config_dirty;
|
||||||
|
|
||||||
|
// Check if we need to also update fragment state
|
||||||
|
const auto current = REGS(ctx)->decode<NV4097_SET_SURFACE_FORMAT>(arg);
|
||||||
|
const auto previous = REGS(ctx)->decode<NV4097_SET_SURFACE_FORMAT>(REGS(ctx)->latch);
|
||||||
|
|
||||||
|
if (*current.antialias() != *previous.antialias() || // Antialias control has changed, update ROP parameters
|
||||||
|
current.is_integer_color_format() != previous.is_integer_color_format()) // The type of color format also requires ROP control update
|
||||||
|
{
|
||||||
|
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::fragment_state_dirty;
|
||||||
|
}
|
||||||
|
|
||||||
|
set_surface_dirty_bit(ctx, reg, arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_surface_options_dirty_bit(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
if (arg != REGS(ctx)->latch)
|
||||||
|
{
|
||||||
|
RSX(ctx)->on_framebuffer_options_changed(reg);
|
||||||
|
RSX(ctx)->m_graphics_state |= rsx::pipeline_config_dirty;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_color_mask(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
if (arg == REGS(ctx)->latch)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (REGS(ctx)->decode<NV4097_SET_COLOR_MASK>(arg).is_invalid()) [[ unlikely ]]
|
||||||
|
{
|
||||||
|
REGS(ctx)->decode(reg, REGS(ctx)->latch);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
set_surface_options_dirty_bit(ctx, reg, arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_stencil_op(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
if (arg == REGS(ctx)->latch)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto typed = to_stencil_op(arg);
|
||||||
|
if (typed) [[ likely ]]
|
||||||
|
{
|
||||||
|
set_surface_options_dirty_bit(ctx, reg, arg);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
REGS(ctx)->decode(reg, REGS(ctx)->latch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///// Draw call setup (vertex, etc)
|
||||||
|
|
||||||
|
void set_array_element16(context* ctx, u32, u32 arg)
|
||||||
|
{
|
||||||
|
if (RSX(ctx)->in_begin_end)
|
||||||
|
{
|
||||||
|
RSX(ctx)->append_array_element(arg & 0xFFFF);
|
||||||
|
RSX(ctx)->append_array_element(arg >> 16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_array_element32(context* ctx, u32, u32 arg)
|
||||||
|
{
|
||||||
|
if (RSX(ctx)->in_begin_end)
|
||||||
|
RSX(ctx)->append_array_element(arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void draw_arrays(context* /*rsx*/, u32 /*reg*/, u32 arg)
|
||||||
|
{
|
||||||
|
REGS(ctx)->current_draw_clause.command = rsx::draw_command::array;
|
||||||
|
rsx::registers_decoder<NV4097_DRAW_ARRAYS>::decoded_type v(arg);
|
||||||
|
|
||||||
|
REGS(ctx)->current_draw_clause.append(v.start(), v.count());
|
||||||
|
}
|
||||||
|
|
||||||
|
void draw_index_array(context* /*rsx*/, u32 /*reg*/, u32 arg)
|
||||||
|
{
|
||||||
|
REGS(ctx)->current_draw_clause.command = rsx::draw_command::indexed;
|
||||||
|
rsx::registers_decoder<NV4097_DRAW_INDEX_ARRAY>::decoded_type v(arg);
|
||||||
|
|
||||||
|
REGS(ctx)->current_draw_clause.append(v.start(), v.count());
|
||||||
|
}
|
||||||
|
|
||||||
|
void draw_inline_array(context* /*rsx*/, u32 /*reg*/, u32 arg)
|
||||||
|
{
|
||||||
|
arg = std::bit_cast<u32, be_t<u32>>(arg);
|
||||||
|
REGS(ctx)->current_draw_clause.command = rsx::draw_command::inlined_array;
|
||||||
|
REGS(ctx)->current_draw_clause.inline_vertex_array.push_back(arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_transform_program_start(context* ctx, u32 reg, u32)
|
||||||
|
{
|
||||||
|
if (REGS(ctx)->registers[reg] != REGS(ctx)->latch)
|
||||||
|
{
|
||||||
|
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::vertex_program_ucode_dirty;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_vertex_attribute_output_mask(context* ctx, u32 reg, u32)
|
||||||
|
{
|
||||||
|
if (REGS(ctx)->registers[reg] != REGS(ctx)->latch)
|
||||||
|
{
|
||||||
|
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::vertex_program_state_dirty;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_vertex_base_offset(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
util::push_draw_parameter_change(ctx, vertex_base_modifier_barrier, reg, arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_index_base_offset(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
util::push_draw_parameter_change(ctx, index_base_modifier_barrier, reg, arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void check_index_array_dma(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
// Check if either location or index type are invalid
|
||||||
|
if (arg & ~(CELL_GCM_LOCATION_MAIN | (CELL_GCM_DRAW_INDEX_ARRAY_TYPE_16 << 4)))
|
||||||
|
{
|
||||||
|
// Ignore invalid value, recover
|
||||||
|
REGS(ctx)->registers[reg] = REGS(ctx)->latch;
|
||||||
|
RSX(ctx)->recover_fifo();
|
||||||
|
|
||||||
|
rsx_log.error("Invalid NV4097_SET_INDEX_ARRAY_DMA value: 0x%x", arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///// Drawing
|
||||||
|
|
||||||
|
void set_begin_end(context* ctx, u32 /*reg*/, u32 arg)
|
||||||
|
{
|
||||||
|
// Ignore upper bits
|
||||||
|
if (const u8 prim = static_cast<u8>(arg))
|
||||||
|
{
|
||||||
|
const auto primitive_type = to_primitive_type(prim);
|
||||||
|
if (!primitive_type)
|
||||||
|
{
|
||||||
|
RSX(ctx)->in_begin_end = true;
|
||||||
|
|
||||||
|
rsx_log.warning("Invalid NV4097_SET_BEGIN_END value: 0x%x", arg);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
REGS(ctx)->current_draw_clause.reset(primitive_type);
|
||||||
|
RSX(ctx)->begin();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we have immediate mode vertex data in a driver-local buffer
|
||||||
|
if (REGS(ctx)->current_draw_clause.command == rsx::draw_command::none)
|
||||||
|
{
|
||||||
|
const u32 push_buffer_vertices_count = RSX(ctx)->get_push_buffer_vertex_count();
|
||||||
|
const u32 push_buffer_index_count = RSX(ctx)->get_push_buffer_index_count();
|
||||||
|
|
||||||
|
// Need to set this flag since it overrides some register contents
|
||||||
|
REGS(ctx)->current_draw_clause.is_immediate_draw = true;
|
||||||
|
|
||||||
|
if (push_buffer_index_count)
|
||||||
|
{
|
||||||
|
REGS(ctx)->current_draw_clause.command = rsx::draw_command::indexed;
|
||||||
|
REGS(ctx)->current_draw_clause.append(0, push_buffer_index_count);
|
||||||
|
}
|
||||||
|
else if (push_buffer_vertices_count)
|
||||||
|
{
|
||||||
|
REGS(ctx)->current_draw_clause.command = rsx::draw_command::array;
|
||||||
|
REGS(ctx)->current_draw_clause.append(0, push_buffer_vertices_count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
REGS(ctx)->current_draw_clause.is_immediate_draw = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!REGS(ctx)->current_draw_clause.empty())
|
||||||
|
{
|
||||||
|
REGS(ctx)->current_draw_clause.compile();
|
||||||
|
|
||||||
|
if (g_cfg.video.disable_video_output)
|
||||||
|
{
|
||||||
|
RSX(ctx)->execute_nop_draw();
|
||||||
|
RSX(ctx)->rsx::thread::end();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
RSX(ctx)->end();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
RSX(ctx)->in_begin_end = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (RSX(ctx)->pause_on_draw && RSX(ctx)->pause_on_draw.exchange(false))
|
||||||
|
{
|
||||||
|
RSX(ctx)->state -= cpu_flag::dbg_step;
|
||||||
|
RSX(ctx)->state += cpu_flag::dbg_pause;
|
||||||
|
RSX(ctx)->check_state();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear(context* ctx, u32 /*reg*/, u32 arg)
|
||||||
|
{
|
||||||
|
RSX(ctx)->clear_surface(arg);
|
||||||
|
|
||||||
|
RSX_CAPTURE_EVENT("clear");
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear_zcull(context* ctx, u32 /*reg*/, u32 /*arg*/)
|
||||||
|
{
|
||||||
|
RSX_CAPTURE_EVENT("clear zcull memory");
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_face_property(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
if (reg == REGS(ctx)->latch)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool valid;
|
||||||
|
switch (reg)
|
||||||
|
{
|
||||||
|
case NV4097_SET_CULL_FACE:
|
||||||
|
valid = !!to_cull_face(arg); break;
|
||||||
|
case NV4097_SET_FRONT_FACE:
|
||||||
|
valid = !!to_front_face(arg); break;
|
||||||
|
default:
|
||||||
|
valid = false; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (valid) [[ likely ]]
|
||||||
|
{
|
||||||
|
RSX(ctx)->m_graphics_state |= rsx::pipeline_config_dirty;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
REGS(ctx)->registers[reg] = REGS(ctx)->latch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_blend_equation(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
if (reg == REGS(ctx)->latch)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (to_blend_equation(arg & 0xFFFF) &&
|
||||||
|
to_blend_equation((arg >> 16) & 0xFFFF)) [[ likely ]]
|
||||||
|
{
|
||||||
|
RSX(ctx)->m_graphics_state |= rsx::pipeline_config_dirty;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
REGS(ctx)->decode(reg, REGS(ctx)->latch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_blend_factor(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
if (reg == REGS(ctx)->latch)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (to_blend_factor(arg & 0xFFFF) &&
|
||||||
|
to_blend_factor((arg >> 16) & 0xFFFF)) [[ likely ]]
|
||||||
|
{
|
||||||
|
RSX(ctx)->m_graphics_state |= rsx::pipeline_config_dirty;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
REGS(ctx)->decode(reg, REGS(ctx)->latch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
///// Reports
|
||||||
|
|
||||||
|
void get_report(context* ctx, u32 /*reg*/, u32 arg)
|
||||||
|
{
|
||||||
|
u8 type = arg >> 24;
|
||||||
|
u32 offset = arg & 0xffffff;
|
||||||
|
|
||||||
|
auto address_ptr = util::get_report_data_impl(ctx, offset);
|
||||||
|
if (!address_ptr)
|
||||||
|
{
|
||||||
|
rsx_log.error("Bad argument passed to NV4097_GET_REPORT, arg=0x%X", arg);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (type)
|
||||||
|
{
|
||||||
|
case CELL_GCM_ZPASS_PIXEL_CNT:
|
||||||
|
case CELL_GCM_ZCULL_STATS:
|
||||||
|
case CELL_GCM_ZCULL_STATS1:
|
||||||
|
case CELL_GCM_ZCULL_STATS2:
|
||||||
|
case CELL_GCM_ZCULL_STATS3:
|
||||||
|
RSX(ctx)->get_zcull_stats(type, vm::cast(address_ptr));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
rsx_log.error("NV4097_GET_REPORT: Bad type %d", type);
|
||||||
|
|
||||||
|
vm::_ref<atomic_t<CellGcmReportData>>(address_ptr).atomic_op([&](CellGcmReportData& data)
|
||||||
|
{
|
||||||
|
data.timer = RSX(ctx)->timestamp();
|
||||||
|
data.padding = 0;
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void clear_report_value(context* ctx, u32 /*reg*/, u32 arg)
|
||||||
|
{
|
||||||
|
switch (arg)
|
||||||
|
{
|
||||||
|
case CELL_GCM_ZPASS_PIXEL_CNT:
|
||||||
|
case CELL_GCM_ZCULL_STATS:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
rsx_log.error("NV4097_CLEAR_REPORT_VALUE: Bad type: %d", arg);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
RSX(ctx)->clear_zcull_stats(arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_render_mode(context* ctx, u32, u32 arg)
|
||||||
|
{
|
||||||
|
const u32 mode = arg >> 24;
|
||||||
|
switch (mode)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
RSX(ctx)->disable_conditional_rendering();
|
||||||
|
return;
|
||||||
|
case 2:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
rsx_log.error("Unknown render mode %d", mode);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 offset = arg & 0xffffff;
|
||||||
|
auto address_ptr = util::get_report_data_impl(ctx, offset);
|
||||||
|
|
||||||
|
if (!address_ptr)
|
||||||
|
{
|
||||||
|
rsx_log.error("Bad argument passed to NV4097_SET_RENDER_ENABLE, arg=0x%X", arg);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Defer conditional render evaluation
|
||||||
|
RSX(ctx)->enable_conditional_rendering(vm::cast(address_ptr));
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_zcull_render_enable(context* ctx, u32, u32)
|
||||||
|
{
|
||||||
|
RSX(ctx)->notify_zcull_info_changed();
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_zcull_stats_enable(context* ctx, u32, u32)
|
||||||
|
{
|
||||||
|
RSX(ctx)->notify_zcull_info_changed();
|
||||||
|
}
|
||||||
|
|
||||||
|
void set_zcull_pixel_count_enable(context* ctx, u32, u32)
|
||||||
|
{
|
||||||
|
RSX(ctx)->notify_zcull_info_changed();
|
||||||
|
}
|
||||||
|
|
||||||
|
///// Misc (sync objects, etc)
|
||||||
|
|
||||||
|
void set_notify(context* ctx, u32 /*reg*/, u32 /*arg*/)
|
||||||
|
{
|
||||||
|
const u32 location = REGS(ctx)->context_dma_notify();
|
||||||
|
const u32 index = (location & 0x7) ^ 0x7;
|
||||||
|
|
||||||
|
if ((location & ~7) != (CELL_GCM_CONTEXT_DMA_NOTIFY_MAIN_0 & ~7))
|
||||||
|
{
|
||||||
|
if (rsx_log.trace)
|
||||||
|
rsx_log.trace("NV4097_NOTIFY: invalid context = 0x%x", REGS(ctx)->context_dma_notify());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 addr = RSX(ctx)->iomap_table.get_addr(0xf100000 + (index * 0x40));
|
||||||
|
|
||||||
|
ensure(addr != umax);
|
||||||
|
|
||||||
|
vm::_ref<atomic_t<RsxNotify>>(addr).store(
|
||||||
|
{
|
||||||
|
RSX(ctx)->timestamp(),
|
||||||
|
0
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void texture_read_semaphore_release(context* ctx, u32 /*reg*/, u32 arg)
|
||||||
|
{
|
||||||
|
// Pipeline barrier seems to be equivalent to a SHADER_READ stage barrier.
|
||||||
|
// Ideally the GPU only needs to have cached all textures declared up to this point before writing the label.
|
||||||
|
|
||||||
|
// lle-gcm likes to inject system reserved semaphores, presumably for system/vsh usage
|
||||||
|
// Avoid calling render to avoid any havoc(flickering) they may cause from invalid flush/write
|
||||||
|
const u32 offset = REGS(ctx)->semaphore_offset_4097();
|
||||||
|
|
||||||
|
if (offset % 16)
|
||||||
|
{
|
||||||
|
rsx_log.error("NV4097 semaphore using unaligned offset, recovering. (offset=0x%x)", offset);
|
||||||
|
RSX(ctx)->recover_fifo();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 addr = get_address(offset, REGS(ctx)->semaphore_context_dma_4097());
|
||||||
|
|
||||||
|
if (RSX(ctx)->label_addr >> 28 != addr >> 28)
|
||||||
|
{
|
||||||
|
rsx_log.error("NV4097 semaphore unexpected address. Please report to the developers. (offset=0x%x, addr=0x%x)", offset, addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (g_cfg.video.strict_rendering_mode) [[ unlikely ]]
|
||||||
|
{
|
||||||
|
util::write_gcm_label<true, true>(ctx, addr, arg);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
util::write_gcm_label<true, false>(ctx, addr, arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void back_end_write_semaphore_release(context* ctx, u32 /*reg*/, u32 arg)
|
||||||
|
{
|
||||||
|
// Full pipeline barrier. GPU must flush pipeline before writing the label
|
||||||
|
|
||||||
|
const u32 offset = REGS(ctx)->semaphore_offset_4097();
|
||||||
|
|
||||||
|
if (offset % 16)
|
||||||
|
{
|
||||||
|
rsx_log.error("NV4097 semaphore using unaligned offset, recovering. (offset=0x%x)", offset);
|
||||||
|
RSX(ctx)->recover_fifo();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 addr = get_address(offset, REGS(ctx)->semaphore_context_dma_4097());
|
||||||
|
|
||||||
|
if (RSX(ctx)->label_addr >> 28 != addr >> 28)
|
||||||
|
{
|
||||||
|
rsx_log.error("NV4097 semaphore unexpected address. Please report to the developers. (offset=0x%x, addr=0x%x)", offset, addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 val = (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff);
|
||||||
|
util::write_gcm_label<true, true>(ctx, addr, val);
|
||||||
|
}
|
||||||
|
|
||||||
|
void sync(context* ctx, u32, u32)
|
||||||
|
{
|
||||||
|
RSX(ctx)->sync();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
238
rpcs3/Emu/RSX/NV47/nv4097.h
Normal file
238
rpcs3/Emu/RSX/NV47/nv4097.h
Normal file
|
@ -0,0 +1,238 @@
|
||||||
|
// NV47 3D Engine
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
namespace rsx
|
||||||
|
{
|
||||||
|
enum command_barrier_type;
|
||||||
|
enum vertex_base_type;
|
||||||
|
|
||||||
|
namespace nv4097
|
||||||
|
{
|
||||||
|
void clear(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void clear_zcull(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void set_face_property(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void set_notify(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void texture_read_semaphore_release(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void back_end_write_semaphore_release(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void set_array_element16(context* ctx, u32, u32 arg);
|
||||||
|
|
||||||
|
void set_array_element32(context* ctx, u32, u32 arg);
|
||||||
|
|
||||||
|
void draw_arrays(context* /*rsx*/, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void draw_index_array(context* /*rsx*/, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void draw_inline_array(context* /*rsx*/, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void set_transform_program_start(context* ctx, u32 reg, u32);
|
||||||
|
|
||||||
|
void set_vertex_attribute_output_mask(context* ctx, u32 reg, u32);
|
||||||
|
|
||||||
|
void set_begin_end(context* ctxthr, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void get_report(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void clear_report_value(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void set_render_mode(context* ctx, u32, u32 arg);
|
||||||
|
|
||||||
|
void set_zcull_render_enable(context* ctx, u32, u32);
|
||||||
|
|
||||||
|
void set_zcull_stats_enable(context* ctx, u32, u32);
|
||||||
|
|
||||||
|
void set_zcull_pixel_count_enable(context* ctx, u32, u32);
|
||||||
|
|
||||||
|
void sync(context* ctx, u32, u32);
|
||||||
|
|
||||||
|
void set_shader_program_dirty(context* ctx, u32, u32);
|
||||||
|
|
||||||
|
void set_surface_dirty_bit(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void set_surface_format(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void set_surface_options_dirty_bit(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void set_color_mask(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void set_stencil_op(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void set_vertex_base_offset(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void set_index_base_offset(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void check_index_array_dma(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void set_blend_equation(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
void set_blend_factor(context* ctx, u32 reg, u32 arg);
|
||||||
|
|
||||||
|
#define RSX(ctx) ctx->rsxthr
|
||||||
|
#define REGS(ctx) (&rsx::method_registers)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* id = base method register
|
||||||
|
* index = register index in method
|
||||||
|
* count = element count per attribute
|
||||||
|
* register_count = number of registers consumed per attribute. E.g 3-element methods have padding
|
||||||
|
*/
|
||||||
|
template<u32 id, u32 index, int count, int register_count, typename type>
|
||||||
|
void set_vertex_data_impl(context* ctx, u32 arg)
|
||||||
|
{
|
||||||
|
static constexpr usz increment_per_array_index = (register_count * sizeof(type)) / sizeof(u32);
|
||||||
|
|
||||||
|
static constexpr usz attribute_index = index / increment_per_array_index;
|
||||||
|
static constexpr usz vertex_subreg = index % increment_per_array_index;
|
||||||
|
|
||||||
|
constexpr auto vtype = vertex_data_type_from_element_type<type>::type;
|
||||||
|
static_assert(vtype != rsx::vertex_base_type::cmp);
|
||||||
|
static_assert(vtype != rsx::vertex_base_type::ub256);
|
||||||
|
|
||||||
|
// Convert LE data to BE layout
|
||||||
|
if constexpr (sizeof(type) == 4)
|
||||||
|
{
|
||||||
|
arg = std::bit_cast<u32, be_t<u32>>(arg);
|
||||||
|
}
|
||||||
|
else if constexpr (sizeof(type) == 2)
|
||||||
|
{
|
||||||
|
// 2 16-bit values packed in 1 32-bit word
|
||||||
|
const auto be_data = std::bit_cast<u32, be_t<u32>>(arg);
|
||||||
|
|
||||||
|
// After u32 swap, the components are in the wrong position
|
||||||
|
arg = (be_data << 16) | (be_data >> 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
util::push_vertex_data(attribute_index, vertex_subreg, count, vtype);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<u32 index>
|
||||||
|
struct set_vertex_data4ub_m
|
||||||
|
{
|
||||||
|
static void impl(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
set_vertex_data_impl<NV4097_SET_VERTEX_DATA4UB_M, index, 4, 4, u8>(ctx, arg);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<u32 index>
|
||||||
|
struct set_vertex_data1f_m
|
||||||
|
{
|
||||||
|
static void impl(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
set_vertex_data_impl<NV4097_SET_VERTEX_DATA1F_M, index, 1, 1, f32>(ctx, arg);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<u32 index>
|
||||||
|
struct set_vertex_data2f_m
|
||||||
|
{
|
||||||
|
static void impl(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
set_vertex_data_impl<NV4097_SET_VERTEX_DATA2F_M, index, 2, 2, f32>(ctx, arg);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<u32 index>
|
||||||
|
struct set_vertex_data3f_m
|
||||||
|
{
|
||||||
|
static void impl(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
//Register alignment is only 1, 2, or 4 (Rachet & Clank 2)
|
||||||
|
set_vertex_data_impl<NV4097_SET_VERTEX_DATA3F_M, index, 3, 4, f32>(ctx, arg);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<u32 index>
|
||||||
|
struct set_vertex_data4f_m
|
||||||
|
{
|
||||||
|
static void impl(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
set_vertex_data_impl<NV4097_SET_VERTEX_DATA4F_M, index, 4, 4, f32>(ctx, arg);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<u32 index>
|
||||||
|
struct set_vertex_data2s_m
|
||||||
|
{
|
||||||
|
static void impl(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
set_vertex_data_impl<NV4097_SET_VERTEX_DATA2S_M, index, 2, 2, u16>(ctx, arg);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<u32 index>
|
||||||
|
struct set_vertex_data4s_m
|
||||||
|
{
|
||||||
|
static void impl(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
set_vertex_data_impl<NV4097_SET_VERTEX_DATA4S_M, index, 4, 4, u16>(ctx, arg);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<u32 index>
|
||||||
|
struct set_vertex_data_scaled4s_m
|
||||||
|
{
|
||||||
|
static void impl(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
set_vertex_data_impl<NV4097_SET_VERTEX_DATA_SCALED4S_M, index, 4, 4, s16>(ctx, arg);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct set_transform_constant
|
||||||
|
{
|
||||||
|
static void impl(context* ctx, u32 reg, u32 arg);
|
||||||
|
};
|
||||||
|
|
||||||
|
struct set_transform_program
|
||||||
|
{
|
||||||
|
static void impl(context* ctx, u32 reg, u32 arg);
|
||||||
|
};
|
||||||
|
|
||||||
|
template<u32 index>
|
||||||
|
struct set_vertex_array_offset
|
||||||
|
{
|
||||||
|
static void impl(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
util::push_draw_parameter_change(ctx, vertex_array_offset_modifier_barrier, reg, arg);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<u32 index>
|
||||||
|
struct set_texture_dirty_bit
|
||||||
|
{
|
||||||
|
static void impl(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
RSX(ctx)->m_textures_dirty[index] = true;
|
||||||
|
|
||||||
|
if (RSX(ctx)->current_fp_metadata.referenced_textures_mask & (1 << index))
|
||||||
|
{
|
||||||
|
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::fragment_program_state_dirty;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<u32 index>
|
||||||
|
struct set_vertex_texture_dirty_bit
|
||||||
|
{
|
||||||
|
static void impl(context* ctx, u32 reg, u32 arg)
|
||||||
|
{
|
||||||
|
RSX(ctx)->m_vertex_textures_dirty[index] = true;
|
||||||
|
|
||||||
|
if (RSX(ctx)->current_vp_metadata.referenced_textures_mask & (1 << index))
|
||||||
|
{
|
||||||
|
RSX(ctx)->m_graphics_state |= rsx::pipeline_state::vertex_program_state_dirty;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#undef RSX
|
||||||
|
#undef REGS
|
||||||
|
}
|
||||||
|
}
|
7
rpcs3/Emu/RSX/NV47/nv47.h
Normal file
7
rpcs3/Emu/RSX/NV47/nv47.h
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
// 3D Engine definitions
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "nv3089.h"
|
||||||
|
#include "nv308a.h"
|
||||||
|
#include "nv406e.h"
|
||||||
|
#include "nv4097.h"
|
|
@ -8,7 +8,7 @@
|
||||||
|
|
||||||
namespace rsx
|
namespace rsx
|
||||||
{
|
{
|
||||||
void invalid_method(thread*, u32, u32);
|
void invalid_method(context*, u32, u32);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 RSXDisAsm::disasm(u32 pc)
|
u32 RSXDisAsm::disasm(u32 pc)
|
||||||
|
|
|
@ -7,6 +7,8 @@
|
||||||
#include "Core/RSXReservationLock.hpp"
|
#include "Core/RSXReservationLock.hpp"
|
||||||
#include "Emu/Memory/vm_reservation.h"
|
#include "Emu/Memory/vm_reservation.h"
|
||||||
#include "Emu/Cell/lv2/sys_rsx.h"
|
#include "Emu/Cell/lv2/sys_rsx.h"
|
||||||
|
#include "NV47/context.h"
|
||||||
|
|
||||||
#include "util/asm.hpp"
|
#include "util/asm.hpp"
|
||||||
|
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
|
@ -808,6 +810,9 @@ namespace rsx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: This should be properly managed
|
||||||
|
rsx::context ctx{ .rsxthr = this, .register_state = &method_registers };
|
||||||
|
|
||||||
if (m_flattener.is_enabled()) [[unlikely]]
|
if (m_flattener.is_enabled()) [[unlikely]]
|
||||||
{
|
{
|
||||||
switch(m_flattener.test(command))
|
switch(m_flattener.test(command))
|
||||||
|
@ -819,15 +824,15 @@ namespace rsx
|
||||||
case FIFO::EMIT_END:
|
case FIFO::EMIT_END:
|
||||||
{
|
{
|
||||||
// Emit end command to close existing scope
|
// Emit end command to close existing scope
|
||||||
//ensure(in_begin_end);
|
AUDIT(in_begin_end);
|
||||||
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0);
|
methods[NV4097_SET_BEGIN_END](&ctx, NV4097_SET_BEGIN_END, 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case FIFO::EMIT_BARRIER:
|
case FIFO::EMIT_BARRIER:
|
||||||
{
|
{
|
||||||
//ensure(in_begin_end);
|
AUDIT(in_begin_end);
|
||||||
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, 0);
|
methods[NV4097_SET_BEGIN_END](&ctx, NV4097_SET_BEGIN_END, 0);
|
||||||
methods[NV4097_SET_BEGIN_END](this, NV4097_SET_BEGIN_END, m_flattener.get_primitive());
|
methods[NV4097_SET_BEGIN_END](&ctx, NV4097_SET_BEGIN_END, m_flattener.get_primitive());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
@ -846,19 +851,19 @@ namespace rsx
|
||||||
const u32 reg = (command.reg & 0xffff) >> 2;
|
const u32 reg = (command.reg & 0xffff) >> 2;
|
||||||
const u32 value = command.value;
|
const u32 value = command.value;
|
||||||
|
|
||||||
method_registers.decode(reg, value);
|
ctx.register_state->decode(reg, value);
|
||||||
|
|
||||||
if (auto method = methods[reg])
|
if (auto method = methods[reg])
|
||||||
{
|
{
|
||||||
method(this, reg, value);
|
method(&ctx, reg, value);
|
||||||
|
|
||||||
if (state & cpu_flag::again)
|
if (state & cpu_flag::again)
|
||||||
{
|
{
|
||||||
method_registers.decode(reg, method_registers.register_previous_value);
|
ctx.register_state->decode(reg, ctx.register_state->latch);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (method_registers.register_previous_value != value)
|
else if (ctx.register_state->latch != value)
|
||||||
{
|
{
|
||||||
// Something changed, set signal flags if any specified
|
// Something changed, set signal flags if any specified
|
||||||
m_graphics_state |= state_signals[reg];
|
m_graphics_state |= state_signals[reg];
|
||||||
|
|
|
@ -3383,7 +3383,7 @@ namespace rsx
|
||||||
return fifo_ctrl->last_cmd();
|
return fifo_ctrl->last_cmd();
|
||||||
}
|
}
|
||||||
|
|
||||||
void invalid_method(thread*, u32, u32);
|
void invalid_method(context*, u32, u32);
|
||||||
|
|
||||||
void thread::dump_regs(std::string& result, std::any& /*custom_data*/) const
|
void thread::dump_regs(std::string& result, std::any& /*custom_data*/) const
|
||||||
{
|
{
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -375,7 +375,7 @@ namespace rsx
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
using rsx_method_t = void(*)(class thread*, u32 reg, u32 arg);
|
using rsx_method_t = void(*)(class context*, u32 reg, u32 arg);
|
||||||
|
|
||||||
//TODO
|
//TODO
|
||||||
union alignas(4) method_registers_t
|
union alignas(4) method_registers_t
|
||||||
|
@ -442,7 +442,7 @@ namespace rsx
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
std::array<u32, 0x10000 / 4> registers{};
|
std::array<u32, 0x10000 / 4> registers{};
|
||||||
u32 register_previous_value{};
|
u32 latch{};
|
||||||
|
|
||||||
template<u32 opcode>
|
template<u32 opcode>
|
||||||
using decoded_type = typename registers_decoder<opcode>::decoded_type;
|
using decoded_type = typename registers_decoder<opcode>::decoded_type;
|
||||||
|
|
|
@ -95,6 +95,11 @@
|
||||||
<ClCompile Include="Emu\perf_monitor.cpp" />
|
<ClCompile Include="Emu\perf_monitor.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\Common\texture_cache.cpp" />
|
<ClCompile Include="Emu\RSX\Common\texture_cache.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\Core\RSXContext.cpp" />
|
<ClCompile Include="Emu\RSX\Core\RSXContext.cpp" />
|
||||||
|
<ClCompile Include="Emu\RSX\NV47\common.cpp" />
|
||||||
|
<ClCompile Include="Emu\RSX\NV47\nv3089.cpp" />
|
||||||
|
<ClCompile Include="Emu\RSX\NV47\nv308a.cpp" />
|
||||||
|
<ClCompile Include="Emu\RSX\NV47\nv406e.cpp" />
|
||||||
|
<ClCompile Include="Emu\RSX\NV47\nv4097.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\Overlays\HomeMenu\overlay_home_menu.cpp" />
|
<ClCompile Include="Emu\RSX\Overlays\HomeMenu\overlay_home_menu.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\Overlays\HomeMenu\overlay_home_menu_components.cpp" />
|
<ClCompile Include="Emu\RSX\Overlays\HomeMenu\overlay_home_menu_components.cpp" />
|
||||||
<ClCompile Include="Emu\RSX\Overlays\HomeMenu\overlay_home_menu_message_box.cpp" />
|
<ClCompile Include="Emu\RSX\Overlays\HomeMenu\overlay_home_menu_message_box.cpp" />
|
||||||
|
@ -584,6 +589,15 @@
|
||||||
<ClInclude Include="Emu\RSX\Core\RSXDisplay.h" />
|
<ClInclude Include="Emu\RSX\Core\RSXDisplay.h" />
|
||||||
<ClInclude Include="Emu\RSX\Core\RSXReservationLock.hpp" />
|
<ClInclude Include="Emu\RSX\Core\RSXReservationLock.hpp" />
|
||||||
<ClInclude Include="Emu\RSX\Core\RSXVertexTypes.h" />
|
<ClInclude Include="Emu\RSX\Core\RSXVertexTypes.h" />
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\context.h" />
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\context_accessors.define.h" />
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\context_accessors.undef.h" />
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\nv3089.h" />
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\nv308a.h" />
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\nv406e.h" />
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\nv4097.h" />
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\nv47.h" />
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\common.h" />
|
||||||
<ClInclude Include="Emu\RSX\Overlays\HomeMenu\overlay_home_menu.h" />
|
<ClInclude Include="Emu\RSX\Overlays\HomeMenu\overlay_home_menu.h" />
|
||||||
<ClInclude Include="Emu\RSX\Overlays\HomeMenu\overlay_home_menu_components.h" />
|
<ClInclude Include="Emu\RSX\Overlays\HomeMenu\overlay_home_menu_components.h" />
|
||||||
<ClInclude Include="Emu\RSX\Overlays\HomeMenu\overlay_home_menu_message_box.h" />
|
<ClInclude Include="Emu\RSX\Overlays\HomeMenu\overlay_home_menu_message_box.h" />
|
||||||
|
|
|
@ -97,6 +97,9 @@
|
||||||
<Filter Include="Emu\GPU\RSX\Program\Upscalers\FSR1">
|
<Filter Include="Emu\GPU\RSX\Program\Upscalers\FSR1">
|
||||||
<UniqueIdentifier>{cab197c1-581c-49db-9d8b-670335b44cb2}</UniqueIdentifier>
|
<UniqueIdentifier>{cab197c1-581c-49db-9d8b-670335b44cb2}</UniqueIdentifier>
|
||||||
</Filter>
|
</Filter>
|
||||||
|
<Filter Include="Emu\GPU\RSX\NV47">
|
||||||
|
<UniqueIdentifier>{213387bd-09c5-4247-8fb0-b3cae06ba34b}</UniqueIdentifier>
|
||||||
|
</Filter>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClCompile Include="Crypto\aes.cpp">
|
<ClCompile Include="Crypto\aes.cpp">
|
||||||
|
@ -1210,6 +1213,21 @@
|
||||||
<ClCompile Include="Emu\RSX\Program\SPIRVCommon.cpp">
|
<ClCompile Include="Emu\RSX\Program\SPIRVCommon.cpp">
|
||||||
<Filter>Emu\GPU\RSX\Program</Filter>
|
<Filter>Emu\GPU\RSX\Program</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
<ClCompile Include="Emu\RSX\NV47\nv4097.cpp">
|
||||||
|
<Filter>Emu\GPU\RSX\NV47</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="Emu\RSX\NV47\common.cpp">
|
||||||
|
<Filter>Emu\GPU\RSX\NV47</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="Emu\RSX\NV47\nv406e.cpp">
|
||||||
|
<Filter>Emu\GPU\RSX\NV47</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="Emu\RSX\NV47\nv308a.cpp">
|
||||||
|
<Filter>Emu\GPU\RSX\NV47</Filter>
|
||||||
|
</ClCompile>
|
||||||
|
<ClCompile Include="Emu\RSX\NV47\nv3089.cpp">
|
||||||
|
<Filter>Emu\GPU\RSX\NV47</Filter>
|
||||||
|
</ClCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClInclude Include="Crypto\aes.h">
|
<ClInclude Include="Crypto\aes.h">
|
||||||
|
@ -2449,6 +2467,33 @@
|
||||||
<ClInclude Include="Emu\RSX\Program\SPIRVCommon.h">
|
<ClInclude Include="Emu\RSX\Program\SPIRVCommon.h">
|
||||||
<Filter>Emu\GPU\RSX\Program</Filter>
|
<Filter>Emu\GPU\RSX\Program</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\nv47.h">
|
||||||
|
<Filter>Emu\GPU\RSX\NV47</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\nv4097.h">
|
||||||
|
<Filter>Emu\GPU\RSX\NV47</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\nv406e.h">
|
||||||
|
<Filter>Emu\GPU\RSX\NV47</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\nv3089.h">
|
||||||
|
<Filter>Emu\GPU\RSX\NV47</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\nv308a.h">
|
||||||
|
<Filter>Emu\GPU\RSX\NV47</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\context.h">
|
||||||
|
<Filter>Emu\GPU\RSX\NV47</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\common.h">
|
||||||
|
<Filter>Emu\GPU\RSX\NV47</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\context_accessors.define.h">
|
||||||
|
<Filter>Emu\GPU\RSX\NV47</Filter>
|
||||||
|
</ClInclude>
|
||||||
|
<ClInclude Include="Emu\RSX\NV47\context_accessors.undef.h">
|
||||||
|
<Filter>Emu\GPU\RSX\NV47</Filter>
|
||||||
|
</ClInclude>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">
|
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue