mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-14 02:38:37 +12:00
510 lines
12 KiB
C++
510 lines
12 KiB
C++
#pragma once
|
|
|
|
#include "GCM.h"
|
|
#include "RSXTexture.h"
|
|
#include "RSXVertexProgram.h"
|
|
#include "RSXFragmentProgram.h"
|
|
|
|
#include <stack>
|
|
#include "Utilities/Semaphore.h"
|
|
#include "Utilities/Thread.h"
|
|
#include "Utilities/Timer.h"
|
|
#include "Utilities/convert.h"
|
|
|
|
extern u64 get_system_time();
|
|
|
|
struct frame_capture_data
|
|
{
|
|
struct buffer
|
|
{
|
|
std::vector<u8> data;
|
|
size_t width = 0, height = 0;
|
|
};
|
|
|
|
struct draw_state
|
|
{
|
|
std::string name;
|
|
std::pair<std::string, std::string> programs;
|
|
buffer color_buffer[4];
|
|
buffer depth;
|
|
buffer stencil;
|
|
};
|
|
std::vector<std::pair<u32, u32> > command_queue;
|
|
std::vector<draw_state> draw_calls;
|
|
|
|
void reset()
|
|
{
|
|
command_queue.clear();
|
|
draw_calls.clear();
|
|
}
|
|
};
|
|
|
|
extern bool user_asked_for_frame_capture;
|
|
extern frame_capture_data frame_debug;
|
|
|
|
namespace rsx
|
|
{
|
|
enum class shader_language
|
|
{
|
|
glsl,
|
|
hlsl
|
|
};
|
|
}
|
|
|
|
namespace convert
|
|
{
|
|
template<>
|
|
struct to_impl_t<rsx::shader_language, std::string>
|
|
{
|
|
static rsx::shader_language func(const std::string &from)
|
|
{
|
|
if (from == "glsl")
|
|
return rsx::shader_language::glsl;
|
|
|
|
if (from == "hlsl")
|
|
return rsx::shader_language::hlsl;
|
|
|
|
throw;
|
|
}
|
|
};
|
|
|
|
template<>
|
|
struct to_impl_t<std::string, rsx::shader_language>
|
|
{
|
|
static std::string func(rsx::shader_language from)
|
|
{
|
|
switch (from)
|
|
{
|
|
case rsx::shader_language::glsl:
|
|
return "glsl";
|
|
case rsx::shader_language::hlsl:
|
|
return "hlsl";
|
|
}
|
|
|
|
throw;
|
|
}
|
|
};
|
|
}
|
|
namespace rsx
|
|
{
|
|
namespace limits
|
|
{
|
|
enum
|
|
{
|
|
textures_count = 16,
|
|
vertex_textures_count = 4,
|
|
vertex_count = 16,
|
|
fragment_count = 32,
|
|
tiles_count = 15,
|
|
zculls_count = 8,
|
|
color_buffers_count = 4
|
|
};
|
|
}
|
|
|
|
struct decompiled_shader
|
|
{
|
|
std::string code;
|
|
};
|
|
|
|
struct finalized_shader
|
|
{
|
|
u64 ucode_hash;
|
|
std::string code;
|
|
};
|
|
|
|
template<typename Type, typename KeyType = u64, typename Hasher = std::hash<KeyType>>
|
|
struct cache
|
|
{
|
|
private:
|
|
std::unordered_map<KeyType, Type, Hasher> m_entries;
|
|
|
|
public:
|
|
const Type* find(u64 key) const
|
|
{
|
|
auto found = m_entries.find(key);
|
|
|
|
if (found == m_entries.end())
|
|
return nullptr;
|
|
|
|
return &found->second;
|
|
}
|
|
|
|
void insert(KeyType key, const Type &shader)
|
|
{
|
|
m_entries.insert({ key, shader });
|
|
}
|
|
};
|
|
|
|
struct shaders_cache
|
|
{
|
|
cache<decompiled_shader> decompiled_fragment_shaders;
|
|
cache<decompiled_shader> decompiled_vertex_shaders;
|
|
cache<finalized_shader> finailized_fragment_shaders;
|
|
cache<finalized_shader> finailized_vertex_shaders;
|
|
|
|
void load(const std::string &path, shader_language lang);
|
|
void load(shader_language lang);
|
|
|
|
static std::string path_to_root();
|
|
};
|
|
|
|
//TODO
|
|
union alignas(4) method_registers_t
|
|
{
|
|
u8 _u8[0x10000];
|
|
u32 _u32[0x10000 >> 2];
|
|
/*
|
|
struct alignas(4)
|
|
{
|
|
u8 pad[NV4097_SET_TEXTURE_OFFSET - 4];
|
|
|
|
struct alignas(4) texture_t
|
|
{
|
|
u32 offset;
|
|
|
|
union format_t
|
|
{
|
|
u32 _u32;
|
|
|
|
struct
|
|
{
|
|
u32: 1;
|
|
u32 location : 1;
|
|
u32 cubemap : 1;
|
|
u32 border_type : 1;
|
|
u32 dimension : 4;
|
|
u32 format : 8;
|
|
u32 mipmap : 16;
|
|
};
|
|
} format;
|
|
|
|
union address_t
|
|
{
|
|
u32 _u32;
|
|
|
|
struct
|
|
{
|
|
u32 wrap_s : 4;
|
|
u32 aniso_bias : 4;
|
|
u32 wrap_t : 4;
|
|
u32 unsigned_remap : 4;
|
|
u32 wrap_r : 4;
|
|
u32 gamma : 4;
|
|
u32 signed_remap : 4;
|
|
u32 zfunc : 4;
|
|
};
|
|
} address;
|
|
|
|
u32 control0;
|
|
u32 control1;
|
|
u32 filter;
|
|
u32 image_rect;
|
|
u32 border_color;
|
|
} textures[limits::textures_count];
|
|
};
|
|
*/
|
|
u32& operator[](int index)
|
|
{
|
|
return _u32[index >> 2];
|
|
}
|
|
};
|
|
|
|
extern u32 method_registers[0x10000 >> 2];
|
|
|
|
u32 get_vertex_type_size(u32 type);
|
|
|
|
u32 get_address(u32 offset, u32 location);
|
|
|
|
template<typename T>
|
|
void pad_texture(void* inputPixels, void* outputPixels, u16 inputWidth, u16 inputHeight, u16 outputWidth, u16 outputHeight)
|
|
{
|
|
T *src, *dst;
|
|
src = static_cast<T*>(inputPixels);
|
|
dst = static_cast<T*>(outputPixels);
|
|
|
|
for (u16 h = 0; h < inputHeight; ++h)
|
|
{
|
|
const u32 padded_pos = h * outputWidth;
|
|
const u32 pos = h * inputWidth;
|
|
for (u16 w = 0; w < inputWidth; ++w)
|
|
{
|
|
dst[padded_pos + w] = src[pos + w];
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Note: What the ps3 calls swizzling in this case is actually z-ordering / morton ordering of pixels
|
|
* - Input can be swizzled or linear, bool flag handles conversion to and from
|
|
* - It will handle any width and height that are a power of 2, square or non square
|
|
* Restriction: It has mixed results if the height or width is not a power of 2
|
|
*/
|
|
template<typename T>
|
|
void convert_linear_swizzle(void* inputPixels, void* outputPixels, u16 width, u16 height, bool inputIsSwizzled)
|
|
{
|
|
u32 log2width, log2height;
|
|
|
|
log2width = log2(width);
|
|
log2height = log2(height);
|
|
|
|
// Max mask possible for square texture
|
|
u32 x_mask = 0x55555555;
|
|
u32 y_mask = 0xAAAAAAAA;
|
|
|
|
// We have to limit the masks to the lower of the two dimensions to allow for non-square textures
|
|
u32 limit_mask = (log2width < log2height) ? log2width : log2height;
|
|
// double the limit mask to account for bits in both x and y
|
|
limit_mask = 1 << (limit_mask << 1);
|
|
|
|
//x_mask, bits above limit are 1's for x-carry
|
|
x_mask = (x_mask | ~(limit_mask - 1));
|
|
//y_mask. bits above limit are 0'd, as we use a different method for y-carry over
|
|
y_mask = (y_mask & (limit_mask - 1));
|
|
|
|
u32 offs_y = 0;
|
|
u32 offs_x = 0;
|
|
u32 offs_x0 = 0; //total y-carry offset for x
|
|
u32 y_incr = limit_mask;
|
|
|
|
T *src, *dst;
|
|
|
|
if (!inputIsSwizzled)
|
|
{
|
|
for (int y = 0; y < height; ++y)
|
|
{
|
|
src = static_cast<T*>(inputPixels) + y*width;
|
|
dst = static_cast<T*>(outputPixels) + offs_y;
|
|
offs_x = offs_x0;
|
|
for (int x = 0; x < width; ++x)
|
|
{
|
|
dst[offs_x] = src[x];
|
|
offs_x = (offs_x - x_mask) & x_mask;
|
|
}
|
|
offs_y = (offs_y - y_mask) & y_mask;
|
|
if (offs_y == 0) offs_x0 += y_incr;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int y = 0; y < height; ++y)
|
|
{
|
|
src = static_cast<T*>(inputPixels) + offs_y;
|
|
dst = static_cast<T*>(outputPixels) + y*width;
|
|
offs_x = offs_x0;
|
|
for (int x = 0; x < width; ++x)
|
|
{
|
|
dst[x] = src[offs_x];
|
|
offs_x = (offs_x - x_mask) & x_mask;
|
|
}
|
|
offs_y = (offs_y - y_mask) & y_mask;
|
|
if (offs_y == 0) offs_x0 += y_incr;
|
|
}
|
|
}
|
|
}
|
|
|
|
struct surface_info
|
|
{
|
|
u8 log2height;
|
|
u8 log2width;
|
|
u8 antialias;
|
|
u8 depth_format;
|
|
u8 color_format;
|
|
|
|
u32 width;
|
|
u32 height;
|
|
u32 format;
|
|
|
|
void unpack(u32 surface_format)
|
|
{
|
|
format = surface_format;
|
|
|
|
log2height = surface_format >> 24;
|
|
log2width = (surface_format >> 16) & 0xff;
|
|
antialias = (surface_format >> 12) & 0xf;
|
|
depth_format = (surface_format >> 5) & 0x7;
|
|
color_format = surface_format & 0x1f;
|
|
|
|
width = 1 << (u32(log2width) + 1);
|
|
height = 1 << (u32(log2width) + 1);
|
|
}
|
|
};
|
|
|
|
struct data_array_format_info
|
|
{
|
|
u16 frequency = 0;
|
|
u8 stride = 0;
|
|
u8 size = 0;
|
|
u8 type = CELL_GCM_VERTEX_F;
|
|
|
|
void unpack_array(u32 data_array_format)
|
|
{
|
|
frequency = data_array_format >> 16;
|
|
stride = (data_array_format >> 8) & 0xff;
|
|
size = (data_array_format >> 4) & 0xf;
|
|
type = data_array_format & 0xf;
|
|
}
|
|
};
|
|
|
|
class thread : public named_thread_t
|
|
{
|
|
protected:
|
|
std::stack<u32> m_call_stack;
|
|
|
|
public:
|
|
struct shaders_cache shaders_cache;
|
|
|
|
CellGcmControl* ctrl = nullptr;
|
|
|
|
Timer timer_sync;
|
|
|
|
GcmTileInfo tiles[limits::tiles_count];
|
|
GcmZcullInfo zculls[limits::zculls_count];
|
|
|
|
rsx::texture textures[limits::textures_count];
|
|
rsx::vertex_texture vertex_textures[limits::vertex_textures_count];
|
|
|
|
|
|
/**
|
|
* RSX can sources vertex attributes from 2 places:
|
|
* - Immediate values passed by NV4097_SET_VERTEX_DATA*_M + ARRAY_ID write.
|
|
* For a given ARRAY_ID the last command of this type defines the actual type of the immediate value.
|
|
* Since there can be only a single value per ARRAY_ID passed this way, all vertex in the draw call
|
|
* shares it.
|
|
* - Vertex array values passed by offset/stride/size/format description.
|
|
*
|
|
* A given ARRAY_ID can have both an immediate value and a vertex array enabled at the same time
|
|
* (See After Burner Climax intro cutscene). In such case the vertex array has precedence over the
|
|
* immediate value. As soon as the vertex array is disabled (size set to 0) the immediate value
|
|
* must be used if the vertex attrib mask request it.
|
|
*
|
|
* Note that behavior when both vertex array and immediate value system are disabled but vertex attrib mask
|
|
* request inputs is unknow.
|
|
*/
|
|
data_array_format_info register_vertex_info[limits::vertex_count];
|
|
std::vector<u8> register_vertex_data[limits::vertex_count];
|
|
data_array_format_info vertex_arrays_info[limits::vertex_count];
|
|
std::vector<u8> vertex_arrays[limits::vertex_count];
|
|
std::vector<u8> vertex_index_array;
|
|
u32 vertex_draw_count = 0;
|
|
|
|
std::unordered_map<u32, color4_base<f32>> transform_constants;
|
|
|
|
// Constant stored for whole frame
|
|
std::unordered_map<u32, color4f> local_transform_constants;
|
|
|
|
u32 transform_program[512 * 4] = {};
|
|
|
|
virtual void load_vertex_data(u32 first, u32 count);
|
|
virtual void load_vertex_index_data(u32 first, u32 count);
|
|
|
|
bool capture_current_frame = false;
|
|
void capture_frame(const std::string &name);
|
|
public:
|
|
u32 ioAddress, ioSize;
|
|
int flip_status;
|
|
int flip_mode;
|
|
int debug_level;
|
|
int frequency_mode;
|
|
|
|
u32 tiles_addr;
|
|
u32 zculls_addr;
|
|
vm::ps3::ptr<CellGcmDisplayInfo> gcm_buffers;
|
|
u32 gcm_buffers_count;
|
|
u32 gcm_current_buffer;
|
|
u32 ctxt_addr;
|
|
u32 report_main_addr;
|
|
u32 label_addr;
|
|
enum class Draw_command
|
|
{
|
|
draw_command_array,
|
|
draw_command_inlined_array,
|
|
draw_command_indexed,
|
|
} draw_command;
|
|
u32 draw_mode;
|
|
|
|
u32 local_mem_addr, main_mem_addr;
|
|
bool strict_ordering[0x1000];
|
|
|
|
|
|
bool draw_inline_vertex_array;
|
|
std::vector<u32> inline_vertex_array;
|
|
|
|
public:
|
|
u32 draw_array_count;
|
|
u32 draw_array_first;
|
|
double fps_limit = 59.94;
|
|
|
|
public:
|
|
semaphore_t sem_flip;
|
|
u64 last_flip_time;
|
|
vm::ps3::ptr<void(u32)> flip_handler = vm::null;
|
|
vm::ps3::ptr<void(u32)> user_handler = vm::null;
|
|
vm::ps3::ptr<void(u32)> vblank_handler = vm::null;
|
|
u64 vblank_count;
|
|
|
|
public:
|
|
std::set<u32> m_used_gcm_commands;
|
|
|
|
protected:
|
|
virtual ~thread() {}
|
|
|
|
virtual void on_task() override;
|
|
|
|
public:
|
|
virtual std::string get_name() const override;
|
|
|
|
virtual void begin();
|
|
virtual void end();
|
|
|
|
virtual void on_init() = 0;
|
|
virtual void on_init_thread() = 0;
|
|
virtual bool do_method(u32 cmd, u32 value) { return false; }
|
|
virtual void flip(int buffer) = 0;
|
|
virtual u64 timestamp() const;
|
|
|
|
/**
|
|
* Fill buffer with 4x4 scale offset matrix.
|
|
* Vertex shader's position is to be multiplied by this matrix.
|
|
* if is_d3d is set, the matrix is modified to use d3d convention.
|
|
*/
|
|
void fill_scale_offset_data(void *buffer, bool is_d3d = true) const;
|
|
|
|
/**
|
|
* Fill buffer with vertex program constants.
|
|
* Buffer must be at least 512 float4 wide.
|
|
*/
|
|
void fill_vertex_program_constants_data(void *buffer);
|
|
|
|
/**
|
|
* Write inlined array data to buffer.
|
|
* The storage of inlined data looks different from memory stored arrays.
|
|
* There is no swapping required except for 4 u8 (according to Bleach Soul Resurection)
|
|
*/
|
|
void write_inline_array_to_buffer(void *dst_buffer);
|
|
|
|
/**
|
|
* Copy rtt values to buffer.
|
|
* TODO: It's more efficient to combine multiple call of this function into one.
|
|
*/
|
|
virtual void copy_render_targets_to_memory(void *buffer, u8 rtt) {};
|
|
|
|
/**
|
|
* Copy depth content to buffer.
|
|
* TODO: It's more efficient to combine multiple call of this function into one.
|
|
*/
|
|
virtual void copy_depth_buffer_to_memory(void *buffer) {};
|
|
|
|
/**
|
|
* Copy stencil content to buffer.
|
|
* TODO: It's more efficient to combine multiple call of this function into one.
|
|
*/
|
|
virtual void copy_stencil_buffer_to_memory(void *buffer) {};
|
|
|
|
virtual std::pair<std::string, std::string> get_programs() const { return std::make_pair("", ""); };
|
|
public:
|
|
void reset();
|
|
void init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress);
|
|
|
|
u32 ReadIO32(u32 addr);
|
|
void WriteIO32(u32 addr, u32 value);
|
|
};
|
|
}
|