mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-07-15 19:28:43 +12:00
rsx: research native texel byte order on cpu readback (WCB) [WIP]
This commit is contained in:
parent
59be9dc36e
commit
be6b5922dd
5 changed files with 143 additions and 28 deletions
|
@ -45,6 +45,8 @@ namespace rsx
|
||||||
u16 real_pitch;
|
u16 real_pitch;
|
||||||
u16 rsx_pitch;
|
u16 rsx_pitch;
|
||||||
|
|
||||||
|
u32 gcm_format = 0;
|
||||||
|
|
||||||
u64 cache_tag = 0;
|
u64 cache_tag = 0;
|
||||||
|
|
||||||
rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order;
|
rsx::texture_create_flags view_flags = rsx::texture_create_flags::default_component_order;
|
||||||
|
@ -96,6 +98,11 @@ namespace rsx
|
||||||
image_type = type;
|
image_type = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void set_gcm_format(u32 format)
|
||||||
|
{
|
||||||
|
gcm_format = format;
|
||||||
|
}
|
||||||
|
|
||||||
u16 get_width() const
|
u16 get_width() const
|
||||||
{
|
{
|
||||||
return width;
|
return width;
|
||||||
|
@ -120,6 +127,11 @@ namespace rsx
|
||||||
{
|
{
|
||||||
return image_type;
|
return image_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 get_gcm_format() const
|
||||||
|
{
|
||||||
|
return gcm_format;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename commandbuffer_type, typename section_storage_type, typename image_resource_type, typename image_view_type, typename image_storage_type, typename texture_format>
|
template <typename commandbuffer_type, typename section_storage_type, typename image_resource_type, typename image_view_type, typename image_storage_type, typename texture_format>
|
||||||
|
|
|
@ -425,6 +425,16 @@ namespace gl
|
||||||
rsx::scale_image_nearest(dst, const_cast<const void*>(data), width, height, rsx_pitch, real_pitch, pixel_size, samples);
|
rsx::scale_image_nearest(dst, const_cast<const void*>(data), width, height, rsx_pitch, real_pitch, pixel_size, samples);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* switch (gcm_format)
|
||||||
|
{
|
||||||
|
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
|
||||||
|
rsx::shuffle_texel_data_wzyx<u16>(dst, rsx_pitch, width, height);
|
||||||
|
break;
|
||||||
|
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
|
||||||
|
rsx::shuffle_texel_data_wzyx<u32>(dst, rsx_pitch, width, height);
|
||||||
|
break;
|
||||||
|
}*/
|
||||||
|
|
||||||
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
|
||||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||||
|
|
||||||
|
|
|
@ -98,6 +98,51 @@ namespace vk
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::pair<u32, bool> get_compatible_gcm_format(rsx::surface_color_format color_format)
|
||||||
|
{
|
||||||
|
switch (color_format)
|
||||||
|
{
|
||||||
|
case rsx::surface_color_format::r5g6b5:
|
||||||
|
return{ CELL_GCM_TEXTURE_R5G6B5, false };
|
||||||
|
|
||||||
|
case rsx::surface_color_format::a8r8g8b8:
|
||||||
|
return{ CELL_GCM_TEXTURE_A8R8G8B8, true }; //verified
|
||||||
|
|
||||||
|
case rsx::surface_color_format::a8b8g8r8:
|
||||||
|
return{ CELL_GCM_TEXTURE_A8R8G8B8, false };
|
||||||
|
|
||||||
|
case rsx::surface_color_format::x8b8g8r8_o8b8g8r8:
|
||||||
|
case rsx::surface_color_format::x8b8g8r8_z8b8g8r8:
|
||||||
|
return{ CELL_GCM_TEXTURE_A8R8G8B8, true };
|
||||||
|
|
||||||
|
case rsx::surface_color_format::x8r8g8b8_z8r8g8b8:
|
||||||
|
case rsx::surface_color_format::x8r8g8b8_o8r8g8b8:
|
||||||
|
return{ CELL_GCM_TEXTURE_A8R8G8B8, false };
|
||||||
|
|
||||||
|
case rsx::surface_color_format::w16z16y16x16:
|
||||||
|
return{ CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT, true };
|
||||||
|
|
||||||
|
case rsx::surface_color_format::w32z32y32x32:
|
||||||
|
return{ CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT, true };
|
||||||
|
|
||||||
|
case rsx::surface_color_format::x1r5g5b5_o1r5g5b5:
|
||||||
|
case rsx::surface_color_format::x1r5g5b5_z1r5g5b5:
|
||||||
|
return{ CELL_GCM_TEXTURE_A1R5G5B5, false };
|
||||||
|
|
||||||
|
case rsx::surface_color_format::b8:
|
||||||
|
return{ CELL_GCM_TEXTURE_B8, false };
|
||||||
|
|
||||||
|
case rsx::surface_color_format::g8b8:
|
||||||
|
return{ CELL_GCM_TEXTURE_G8B8, true };
|
||||||
|
|
||||||
|
case rsx::surface_color_format::x32:
|
||||||
|
return{ CELL_GCM_TEXTURE_X32_FLOAT, true }; //verified
|
||||||
|
|
||||||
|
default:
|
||||||
|
return{ CELL_GCM_TEXTURE_A8R8G8B8, false };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Maps color_format, depth_stencil_format and color count to an int as below :
|
/** Maps color_format, depth_stencil_format and color count to an int as below :
|
||||||
* idx = color_count + 5 * depth_stencil_idx + 15 * color_format_idx
|
* idx = color_count + 5 * depth_stencil_idx + 15 * color_format_idx
|
||||||
* This should perform a 1:1 mapping
|
* This should perform a 1:1 mapping
|
||||||
|
@ -2354,8 +2399,11 @@ void VKGSRender::prepare_rtts()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const auto color_fmt = rsx::method_registers.surface_color();
|
||||||
|
const auto depth_fmt = rsx::method_registers.surface_depth_fmt();
|
||||||
|
|
||||||
m_rtts.prepare_render_target(&*m_current_command_buffer,
|
m_rtts.prepare_render_target(&*m_current_command_buffer,
|
||||||
rsx::method_registers.surface_color(), rsx::method_registers.surface_depth_fmt(),
|
color_fmt, depth_fmt,
|
||||||
clip_width, clip_height,
|
clip_width, clip_height,
|
||||||
rsx::method_registers.surface_color_target(),
|
rsx::method_registers.surface_color_target(),
|
||||||
surface_addresses, zeta_address,
|
surface_addresses, zeta_address,
|
||||||
|
@ -2378,13 +2426,13 @@ void VKGSRender::prepare_rtts()
|
||||||
m_surface_info[i].address = m_surface_info[i].pitch = 0;
|
m_surface_info[i].address = m_surface_info[i].pitch = 0;
|
||||||
m_surface_info[i].width = clip_width;
|
m_surface_info[i].width = clip_width;
|
||||||
m_surface_info[i].height = clip_height;
|
m_surface_info[i].height = clip_height;
|
||||||
m_surface_info[i].color_format = rsx::method_registers.surface_color();
|
m_surface_info[i].color_format = color_fmt;
|
||||||
}
|
}
|
||||||
|
|
||||||
m_depth_surface_info.address = m_depth_surface_info.pitch = 0;
|
m_depth_surface_info.address = m_depth_surface_info.pitch = 0;
|
||||||
m_depth_surface_info.width = clip_width;
|
m_depth_surface_info.width = clip_width;
|
||||||
m_depth_surface_info.height = clip_height;
|
m_depth_surface_info.height = clip_height;
|
||||||
m_depth_surface_info.depth_format = rsx::method_registers.surface_depth_fmt();
|
m_depth_surface_info.depth_format = depth_fmt;
|
||||||
|
|
||||||
//Bind created rtts as current fbo...
|
//Bind created rtts as current fbo...
|
||||||
std::vector<u8> draw_buffers = vk::get_draw_buffers(rsx::method_registers.surface_color_target());
|
std::vector<u8> draw_buffers = vk::get_draw_buffers(rsx::method_registers.surface_color_target());
|
||||||
|
@ -2395,7 +2443,7 @@ void VKGSRender::prepare_rtts()
|
||||||
std::vector<vk::image*> bound_images;
|
std::vector<vk::image*> bound_images;
|
||||||
bound_images.reserve(5);
|
bound_images.reserve(5);
|
||||||
|
|
||||||
const auto bpp = get_format_block_size_in_bytes(rsx::method_registers.surface_color());
|
const auto bpp = get_format_block_size_in_bytes(color_fmt);
|
||||||
|
|
||||||
for (u8 index : draw_buffers)
|
for (u8 index : draw_buffers)
|
||||||
{
|
{
|
||||||
|
@ -2445,13 +2493,14 @@ void VKGSRender::prepare_rtts()
|
||||||
|
|
||||||
if (g_cfg.video.write_color_buffers)
|
if (g_cfg.video.write_color_buffers)
|
||||||
{
|
{
|
||||||
|
const auto color_fmt_info = vk::get_compatible_gcm_format(color_fmt);
|
||||||
for (u8 index : draw_buffers)
|
for (u8 index : draw_buffers)
|
||||||
{
|
{
|
||||||
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
|
if (!m_surface_info[index].address || !m_surface_info[index].pitch) continue;
|
||||||
const u32 range = m_surface_info[index].pitch * m_surface_info[index].height;
|
const u32 range = m_surface_info[index].pitch * m_surface_info[index].height;
|
||||||
|
|
||||||
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range,
|
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_render_targets[index]), m_surface_info[index].address, range,
|
||||||
m_surface_info[index].width, m_surface_info[index].height, m_surface_info[index].pitch);
|
m_surface_info[index].width, m_surface_info[index].height, m_surface_info[index].pitch, color_fmt_info.first, color_fmt_info.second);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2460,11 +2509,17 @@ void VKGSRender::prepare_rtts()
|
||||||
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
|
if (m_depth_surface_info.address && m_depth_surface_info.pitch)
|
||||||
{
|
{
|
||||||
u32 pitch = m_depth_surface_info.width * 2;
|
u32 pitch = m_depth_surface_info.width * 2;
|
||||||
if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16) pitch *= 2;
|
u32 gcm_format = CELL_GCM_TEXTURE_DEPTH16;
|
||||||
|
|
||||||
|
if (m_depth_surface_info.depth_format != rsx::surface_depth_format::z16)
|
||||||
|
{
|
||||||
|
gcm_format = CELL_GCM_TEXTURE_DEPTH24_D8;
|
||||||
|
pitch *= 2;
|
||||||
|
}
|
||||||
|
|
||||||
const u32 range = pitch * m_depth_surface_info.height;
|
const u32 range = pitch * m_depth_surface_info.height;
|
||||||
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range,
|
m_texture_cache.lock_memory_region(std::get<1>(m_rtts.m_bound_depth_stencil), m_depth_surface_info.address, range,
|
||||||
m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch);
|
m_depth_surface_info.width, m_depth_surface_info.height, m_depth_surface_info.pitch, gcm_format, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2512,7 +2567,7 @@ void VKGSRender::prepare_rtts()
|
||||||
fbo_images.push_back(std::make_unique<vk::image_view>(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres));
|
fbo_images.push_back(std::make_unique<vk::image_view>(*m_device, raw->value, VK_IMAGE_VIEW_TYPE_2D, raw->info.format, vk::default_component_map(), subres));
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t idx = vk::get_render_pass_location(vk::get_compatible_surface_format(rsx::method_registers.surface_color()).first, vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()), (u8)draw_buffers.size());
|
size_t idx = vk::get_render_pass_location(vk::get_compatible_surface_format(color_fmt).first, vk::get_compatible_depth_surface_format(m_optimal_tiling_supported_formats, rsx::method_registers.surface_depth_fmt()), (u8)draw_buffers.size());
|
||||||
VkRenderPass current_render_pass = m_render_passes[idx];
|
VkRenderPass current_render_pass = m_render_passes[idx];
|
||||||
|
|
||||||
if (m_draw_fbo)
|
if (m_draw_fbo)
|
||||||
|
|
|
@ -21,6 +21,7 @@ namespace vk
|
||||||
VkFence dma_fence = VK_NULL_HANDLE;
|
VkFence dma_fence = VK_NULL_HANDLE;
|
||||||
bool synchronized = false;
|
bool synchronized = false;
|
||||||
bool flushed = false;
|
bool flushed = false;
|
||||||
|
bool pack_unpack_swap_bytes = false;
|
||||||
u64 sync_timestamp = 0;
|
u64 sync_timestamp = 0;
|
||||||
u64 last_use_timestamp = 0;
|
u64 last_use_timestamp = 0;
|
||||||
vk::render_device* m_device = nullptr;
|
vk::render_device* m_device = nullptr;
|
||||||
|
@ -40,13 +41,16 @@ namespace vk
|
||||||
rsx::buffered_section::reset(base, length, policy);
|
rsx::buffered_section::reset(base, length, policy);
|
||||||
}
|
}
|
||||||
|
|
||||||
void create(const u16 w, const u16 h, const u16 depth, const u16 mipmaps, vk::image_view *view, vk::image *image, const u32 rsx_pitch=0, bool managed=true)
|
void create(const u16 w, const u16 h, const u16 depth, const u16 mipmaps, vk::image_view *view, vk::image *image, const u32 rsx_pitch, bool managed, const u32 gcm_format, bool pack_swap_bytes = false)
|
||||||
{
|
{
|
||||||
width = w;
|
width = w;
|
||||||
height = h;
|
height = h;
|
||||||
this->depth = depth;
|
this->depth = depth;
|
||||||
this->mipmaps = mipmaps;
|
this->mipmaps = mipmaps;
|
||||||
|
|
||||||
|
this->gcm_format = gcm_format;
|
||||||
|
this->pack_unpack_swap_bytes = pack_swap_bytes;
|
||||||
|
|
||||||
if (managed)
|
if (managed)
|
||||||
{
|
{
|
||||||
managed_texture.reset(image);
|
managed_texture.reset(image);
|
||||||
|
@ -265,20 +269,6 @@ namespace vk
|
||||||
const u8 bpp = real_pitch / width;
|
const u8 bpp = real_pitch / width;
|
||||||
|
|
||||||
//We have to do our own byte swapping since the driver doesnt do it for us
|
//We have to do our own byte swapping since the driver doesnt do it for us
|
||||||
bool swap_bytes = false;
|
|
||||||
switch (vram_texture->info.format)
|
|
||||||
{
|
|
||||||
case VK_FORMAT_D32_SFLOAT_S8_UINT:
|
|
||||||
case VK_FORMAT_D24_UNORM_S8_UINT:
|
|
||||||
//TODO: Hardware tests to determine correct memory layout
|
|
||||||
case VK_FORMAT_D16_UNORM:
|
|
||||||
case VK_FORMAT_R16G16B16A16_SFLOAT:
|
|
||||||
case VK_FORMAT_R32G32B32A32_SFLOAT:
|
|
||||||
case VK_FORMAT_R32_SFLOAT:
|
|
||||||
swap_bytes = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (real_pitch == rsx_pitch)
|
if (real_pitch == rsx_pitch)
|
||||||
{
|
{
|
||||||
switch (bpp)
|
switch (bpp)
|
||||||
|
@ -289,23 +279,29 @@ namespace vk
|
||||||
do_memory_transfer<u8, false>(pixels_dst, pixels_src);
|
do_memory_transfer<u8, false>(pixels_dst, pixels_src);
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
if (swap_bytes)
|
if (pack_unpack_swap_bytes)
|
||||||
do_memory_transfer<u16, true>(pixels_dst, pixels_src);
|
do_memory_transfer<u16, true>(pixels_dst, pixels_src);
|
||||||
else
|
else
|
||||||
do_memory_transfer<u16, false>(pixels_dst, pixels_src);
|
do_memory_transfer<u16, false>(pixels_dst, pixels_src);
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
if (swap_bytes)
|
if (pack_unpack_swap_bytes)
|
||||||
do_memory_transfer<u32, true>(pixels_dst, pixels_src);
|
do_memory_transfer<u32, true>(pixels_dst, pixels_src);
|
||||||
else
|
else
|
||||||
do_memory_transfer<u32, false>(pixels_dst, pixels_src);
|
do_memory_transfer<u32, false>(pixels_dst, pixels_src);
|
||||||
break;
|
break;
|
||||||
case 8:
|
case 8:
|
||||||
if (swap_bytes)
|
if (pack_unpack_swap_bytes)
|
||||||
do_memory_transfer<u64, true>(pixels_dst, pixels_src);
|
do_memory_transfer<u64, true>(pixels_dst, pixels_src);
|
||||||
else
|
else
|
||||||
do_memory_transfer<u64, false>(pixels_dst, pixels_src);
|
do_memory_transfer<u64, false>(pixels_dst, pixels_src);
|
||||||
break;
|
break;
|
||||||
|
case 16:
|
||||||
|
if (pack_unpack_swap_bytes)
|
||||||
|
do_memory_transfer<u128, true>(pixels_dst, pixels_src);
|
||||||
|
else
|
||||||
|
do_memory_transfer<u128, false>(pixels_dst, pixels_src);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -314,12 +310,22 @@ namespace vk
|
||||||
//usually we can just get away with nearest filtering
|
//usually we can just get away with nearest filtering
|
||||||
const u8 samples = rsx_pitch / real_pitch;
|
const u8 samples = rsx_pitch / real_pitch;
|
||||||
|
|
||||||
rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, rsx_pitch, real_pitch, bpp, samples, swap_bytes);
|
rsx::scale_image_nearest(pixels_dst, pixels_src, width, height, rsx_pitch, real_pitch, bpp, samples, pack_unpack_swap_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
dma_buffer->unmap();
|
dma_buffer->unmap();
|
||||||
//Its highly likely that this surface will be reused, so we just leave resources in place
|
//Its highly likely that this surface will be reused, so we just leave resources in place
|
||||||
|
|
||||||
|
switch (gcm_format)
|
||||||
|
{
|
||||||
|
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT:
|
||||||
|
rsx::shuffle_texel_data_wzyx<u16>(pixels_dst, rsx_pitch, width, height);
|
||||||
|
break;
|
||||||
|
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT:
|
||||||
|
rsx::shuffle_texel_data_wzyx<u32>(pixels_dst, rsx_pitch, width, height);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -692,7 +698,7 @@ namespace vk
|
||||||
|
|
||||||
cached_texture_section& region = find_cached_texture(rsx_address, rsx_size, true, width, height, section_depth);
|
cached_texture_section& region = find_cached_texture(rsx_address, rsx_size, true, width, height, section_depth);
|
||||||
region.reset(rsx_address, rsx_size);
|
region.reset(rsx_address, rsx_size);
|
||||||
region.create(width, height, section_depth, mipmaps, view, image);
|
region.create(width, height, section_depth, mipmaps, view, image, 0, true, gcm_format);
|
||||||
region.set_dirty(false);
|
region.set_dirty(false);
|
||||||
region.set_context(context);
|
region.set_context(context);
|
||||||
region.set_image_type(type);
|
region.set_image_type(type);
|
||||||
|
|
|
@ -162,6 +162,38 @@ namespace rsx
|
||||||
|
|
||||||
std::array<float, 4> get_constant_blend_colors();
|
std::array<float, 4> get_constant_blend_colors();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shuffle texel layout from xyzw to wzyx
|
||||||
|
* TODO: Variable src/dst and optional se conversion
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
void shuffle_texel_data_wzyx(void *data, u16 row_pitch_in_bytes, u16 row_length_in_texels, u16 num_rows)
|
||||||
|
{
|
||||||
|
char *raw_src = (char*)data;
|
||||||
|
T tmp[4];
|
||||||
|
|
||||||
|
for (u16 n = 0; n < num_rows; ++n)
|
||||||
|
{
|
||||||
|
T* src = (T*)raw_src;
|
||||||
|
raw_src += row_pitch_in_bytes;
|
||||||
|
|
||||||
|
for (u16 m = 0; m < row_length_in_texels; ++m)
|
||||||
|
{
|
||||||
|
tmp[0] = src[3];
|
||||||
|
tmp[1] = src[2];
|
||||||
|
tmp[2] = src[1];
|
||||||
|
tmp[3] = src[0];
|
||||||
|
|
||||||
|
src[0] = tmp[0];
|
||||||
|
src[1] = tmp[1];
|
||||||
|
src[2] = tmp[2];
|
||||||
|
src[3] = tmp[3];
|
||||||
|
|
||||||
|
src += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Clips a rect so that it never falls outside the parent region
|
* Clips a rect so that it never falls outside the parent region
|
||||||
* attempt_fit: allows resizing of the requested region. If false, failure to fit will result in the child rect being pinned to (0, 0)
|
* attempt_fit: allows resizing of the requested region. If false, failure to fit will result in the child rect being pinned to (0, 0)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue