rpcs3/rpcs3/Emu/RSX/RSXThread.h

510 lines
12 KiB
C++

#pragma once
#include "GCM.h"
#include "RSXTexture.h"
#include "RSXVertexProgram.h"
#include "RSXFragmentProgram.h"
#include <stack>
#include "Utilities/Semaphore.h"
#include "Utilities/Thread.h"
#include "Utilities/Timer.h"
#include "Utilities/convert.h"
extern u64 get_system_time();
struct frame_capture_data
{
struct buffer
{
std::vector<u8> data;
size_t width = 0, height = 0;
};
struct draw_state
{
std::string name;
std::pair<std::string, std::string> programs;
buffer color_buffer[4];
buffer depth;
buffer stencil;
};
std::vector<std::pair<u32, u32> > command_queue;
std::vector<draw_state> draw_calls;
void reset()
{
command_queue.clear();
draw_calls.clear();
}
};
extern bool user_asked_for_frame_capture;
extern frame_capture_data frame_debug;
namespace rsx
{
enum class shader_language
{
glsl,
hlsl
};
}
namespace convert
{
template<>
struct to_impl_t<rsx::shader_language, std::string>
{
static rsx::shader_language func(const std::string &from)
{
if (from == "glsl")
return rsx::shader_language::glsl;
if (from == "hlsl")
return rsx::shader_language::hlsl;
throw;
}
};
template<>
struct to_impl_t<std::string, rsx::shader_language>
{
static std::string func(rsx::shader_language from)
{
switch (from)
{
case rsx::shader_language::glsl:
return "glsl";
case rsx::shader_language::hlsl:
return "hlsl";
}
throw;
}
};
}
namespace rsx
{
namespace limits
{
enum
{
textures_count = 16,
vertex_textures_count = 4,
vertex_count = 16,
fragment_count = 32,
tiles_count = 15,
zculls_count = 8,
color_buffers_count = 4
};
}
struct decompiled_shader
{
std::string code;
};
struct finalized_shader
{
u64 ucode_hash;
std::string code;
};
template<typename Type, typename KeyType = u64, typename Hasher = std::hash<KeyType>>
struct cache
{
private:
std::unordered_map<KeyType, Type, Hasher> m_entries;
public:
const Type* find(u64 key) const
{
auto found = m_entries.find(key);
if (found == m_entries.end())
return nullptr;
return &found->second;
}
void insert(KeyType key, const Type &shader)
{
m_entries.insert({ key, shader });
}
};
struct shaders_cache
{
cache<decompiled_shader> decompiled_fragment_shaders;
cache<decompiled_shader> decompiled_vertex_shaders;
cache<finalized_shader> finailized_fragment_shaders;
cache<finalized_shader> finailized_vertex_shaders;
void load(const std::string &path, shader_language lang);
void load(shader_language lang);
static std::string path_to_root();
};
//TODO
union alignas(4) method_registers_t
{
u8 _u8[0x10000];
u32 _u32[0x10000 >> 2];
/*
struct alignas(4)
{
u8 pad[NV4097_SET_TEXTURE_OFFSET - 4];
struct alignas(4) texture_t
{
u32 offset;
union format_t
{
u32 _u32;
struct
{
u32: 1;
u32 location : 1;
u32 cubemap : 1;
u32 border_type : 1;
u32 dimension : 4;
u32 format : 8;
u32 mipmap : 16;
};
} format;
union address_t
{
u32 _u32;
struct
{
u32 wrap_s : 4;
u32 aniso_bias : 4;
u32 wrap_t : 4;
u32 unsigned_remap : 4;
u32 wrap_r : 4;
u32 gamma : 4;
u32 signed_remap : 4;
u32 zfunc : 4;
};
} address;
u32 control0;
u32 control1;
u32 filter;
u32 image_rect;
u32 border_color;
} textures[limits::textures_count];
};
*/
u32& operator[](int index)
{
return _u32[index >> 2];
}
};
extern u32 method_registers[0x10000 >> 2];
u32 get_vertex_type_size(u32 type);
u32 get_address(u32 offset, u32 location);
template<typename T>
void pad_texture(void* inputPixels, void* outputPixels, u16 inputWidth, u16 inputHeight, u16 outputWidth, u16 outputHeight)
{
T *src, *dst;
src = static_cast<T*>(inputPixels);
dst = static_cast<T*>(outputPixels);
for (u16 h = 0; h < inputHeight; ++h)
{
const u32 padded_pos = h * outputWidth;
const u32 pos = h * inputWidth;
for (u16 w = 0; w < inputWidth; ++w)
{
dst[padded_pos + w] = src[pos + w];
}
}
}
/* Note: What the ps3 calls swizzling in this case is actually z-ordering / morton ordering of pixels
* - Input can be swizzled or linear, bool flag handles conversion to and from
* - It will handle any width and height that are a power of 2, square or non square
* Restriction: It has mixed results if the height or width is not a power of 2
*/
template<typename T>
void convert_linear_swizzle(void* inputPixels, void* outputPixels, u16 width, u16 height, bool inputIsSwizzled)
{
u32 log2width, log2height;
log2width = log2(width);
log2height = log2(height);
// Max mask possible for square texture
u32 x_mask = 0x55555555;
u32 y_mask = 0xAAAAAAAA;
// We have to limit the masks to the lower of the two dimensions to allow for non-square textures
u32 limit_mask = (log2width < log2height) ? log2width : log2height;
// double the limit mask to account for bits in both x and y
limit_mask = 1 << (limit_mask << 1);
//x_mask, bits above limit are 1's for x-carry
x_mask = (x_mask | ~(limit_mask - 1));
//y_mask. bits above limit are 0'd, as we use a different method for y-carry over
y_mask = (y_mask & (limit_mask - 1));
u32 offs_y = 0;
u32 offs_x = 0;
u32 offs_x0 = 0; //total y-carry offset for x
u32 y_incr = limit_mask;
T *src, *dst;
if (!inputIsSwizzled)
{
for (int y = 0; y < height; ++y)
{
src = static_cast<T*>(inputPixels) + y*width;
dst = static_cast<T*>(outputPixels) + offs_y;
offs_x = offs_x0;
for (int x = 0; x < width; ++x)
{
dst[offs_x] = src[x];
offs_x = (offs_x - x_mask) & x_mask;
}
offs_y = (offs_y - y_mask) & y_mask;
if (offs_y == 0) offs_x0 += y_incr;
}
}
else
{
for (int y = 0; y < height; ++y)
{
src = static_cast<T*>(inputPixels) + offs_y;
dst = static_cast<T*>(outputPixels) + y*width;
offs_x = offs_x0;
for (int x = 0; x < width; ++x)
{
dst[x] = src[offs_x];
offs_x = (offs_x - x_mask) & x_mask;
}
offs_y = (offs_y - y_mask) & y_mask;
if (offs_y == 0) offs_x0 += y_incr;
}
}
}
struct surface_info
{
u8 log2height;
u8 log2width;
u8 antialias;
u8 depth_format;
u8 color_format;
u32 width;
u32 height;
u32 format;
void unpack(u32 surface_format)
{
format = surface_format;
log2height = surface_format >> 24;
log2width = (surface_format >> 16) & 0xff;
antialias = (surface_format >> 12) & 0xf;
depth_format = (surface_format >> 5) & 0x7;
color_format = surface_format & 0x1f;
width = 1 << (u32(log2width) + 1);
height = 1 << (u32(log2width) + 1);
}
};
struct data_array_format_info
{
u16 frequency = 0;
u8 stride = 0;
u8 size = 0;
u8 type = CELL_GCM_VERTEX_F;
void unpack_array(u32 data_array_format)
{
frequency = data_array_format >> 16;
stride = (data_array_format >> 8) & 0xff;
size = (data_array_format >> 4) & 0xf;
type = data_array_format & 0xf;
}
};
class thread : public named_thread_t
{
protected:
std::stack<u32> m_call_stack;
public:
struct shaders_cache shaders_cache;
CellGcmControl* ctrl = nullptr;
Timer timer_sync;
GcmTileInfo tiles[limits::tiles_count];
GcmZcullInfo zculls[limits::zculls_count];
rsx::texture textures[limits::textures_count];
rsx::vertex_texture vertex_textures[limits::vertex_textures_count];
/**
* RSX can sources vertex attributes from 2 places:
* - Immediate values passed by NV4097_SET_VERTEX_DATA*_M + ARRAY_ID write.
* For a given ARRAY_ID the last command of this type defines the actual type of the immediate value.
* Since there can be only a single value per ARRAY_ID passed this way, all vertex in the draw call
* shares it.
* - Vertex array values passed by offset/stride/size/format description.
*
* A given ARRAY_ID can have both an immediate value and a vertex array enabled at the same time
* (See After Burner Climax intro cutscene). In such case the vertex array has precedence over the
* immediate value. As soon as the vertex array is disabled (size set to 0) the immediate value
* must be used if the vertex attrib mask request it.
*
* Note that behavior when both vertex array and immediate value system are disabled but vertex attrib mask
* request inputs is unknow.
*/
data_array_format_info register_vertex_info[limits::vertex_count];
std::vector<u8> register_vertex_data[limits::vertex_count];
data_array_format_info vertex_arrays_info[limits::vertex_count];
std::vector<u8> vertex_arrays[limits::vertex_count];
std::vector<u8> vertex_index_array;
u32 vertex_draw_count = 0;
std::unordered_map<u32, color4_base<f32>> transform_constants;
// Constant stored for whole frame
std::unordered_map<u32, color4f> local_transform_constants;
u32 transform_program[512 * 4] = {};
virtual void load_vertex_data(u32 first, u32 count);
virtual void load_vertex_index_data(u32 first, u32 count);
bool capture_current_frame = false;
void capture_frame(const std::string &name);
public:
u32 ioAddress, ioSize;
int flip_status;
int flip_mode;
int debug_level;
int frequency_mode;
u32 tiles_addr;
u32 zculls_addr;
vm::ps3::ptr<CellGcmDisplayInfo> gcm_buffers;
u32 gcm_buffers_count;
u32 gcm_current_buffer;
u32 ctxt_addr;
u32 report_main_addr;
u32 label_addr;
enum class Draw_command
{
draw_command_array,
draw_command_inlined_array,
draw_command_indexed,
} draw_command;
u32 draw_mode;
u32 local_mem_addr, main_mem_addr;
bool strict_ordering[0x1000];
bool draw_inline_vertex_array;
std::vector<u32> inline_vertex_array;
public:
u32 draw_array_count;
u32 draw_array_first;
double fps_limit = 59.94;
public:
semaphore_t sem_flip;
u64 last_flip_time;
vm::ps3::ptr<void(u32)> flip_handler = vm::null;
vm::ps3::ptr<void(u32)> user_handler = vm::null;
vm::ps3::ptr<void(u32)> vblank_handler = vm::null;
u64 vblank_count;
public:
std::set<u32> m_used_gcm_commands;
protected:
virtual ~thread() {}
virtual void on_task() override;
public:
virtual std::string get_name() const override;
virtual void begin();
virtual void end();
virtual void on_init() = 0;
virtual void on_init_thread() = 0;
virtual bool do_method(u32 cmd, u32 value) { return false; }
virtual void flip(int buffer) = 0;
virtual u64 timestamp() const;
/**
* Fill buffer with 4x4 scale offset matrix.
* Vertex shader's position is to be multiplied by this matrix.
* if is_d3d is set, the matrix is modified to use d3d convention.
*/
void fill_scale_offset_data(void *buffer, bool is_d3d = true) const;
/**
* Fill buffer with vertex program constants.
* Buffer must be at least 512 float4 wide.
*/
void fill_vertex_program_constants_data(void *buffer);
/**
* Write inlined array data to buffer.
* The storage of inlined data looks different from memory stored arrays.
* There is no swapping required except for 4 u8 (according to Bleach Soul Resurection)
*/
void write_inline_array_to_buffer(void *dst_buffer);
/**
* Copy rtt values to buffer.
* TODO: It's more efficient to combine multiple call of this function into one.
*/
virtual void copy_render_targets_to_memory(void *buffer, u8 rtt) {};
/**
* Copy depth content to buffer.
* TODO: It's more efficient to combine multiple call of this function into one.
*/
virtual void copy_depth_buffer_to_memory(void *buffer) {};
/**
* Copy stencil content to buffer.
* TODO: It's more efficient to combine multiple call of this function into one.
*/
virtual void copy_stencil_buffer_to_memory(void *buffer) {};
virtual std::pair<std::string, std::string> get_programs() const { return std::make_pair("", ""); };
public:
void reset();
void init(const u32 ioAddress, const u32 ioSize, const u32 ctrlAddress, const u32 localAddress);
u32 ReadIO32(u32 addr);
void WriteIO32(u32 addr, u32 value);
};
}