Use aligned stores in write_index_array_data_to_buffer

Ensure that target buffer is cache line aligned.
Improve stx::make_single to support alignment.
This commit is contained in:
Nekotekina 2021-12-15 17:47:02 +03:00
parent 76ccaf5e6f
commit 262ff01619
3 changed files with 19 additions and 25 deletions

View file

@ -831,7 +831,7 @@ namespace
const __m128i value = _mm_shuffle_epi8(raw, s_bswap_u16_mask); const __m128i value = _mm_shuffle_epi8(raw, s_bswap_u16_mask);
max = _mm_max_epu16(max, value); max = _mm_max_epu16(max, value);
min = _mm_min_epu16(min, value); min = _mm_min_epu16(min, value);
_mm_storeu_si128(dst_stream++, value); _mm_store_si128(dst_stream++, value);
} }
const u16 min_index = sse41_hmin_epu16(min); const u16 min_index = sse41_hmin_epu16(min);
@ -857,7 +857,7 @@ namespace
const __m128i value = _mm_shuffle_epi8(raw, s_bswap_u32_mask); const __m128i value = _mm_shuffle_epi8(raw, s_bswap_u32_mask);
max = _mm_max_epu32(max, value); max = _mm_max_epu32(max, value);
min = _mm_min_epu32(min, value); min = _mm_min_epu32(min, value);
_mm_storeu_si128(dst_stream++, value); _mm_store_si128(dst_stream++, value);
} }
__m128i tmp = _mm_srli_si128(min, 8); __m128i tmp = _mm_srli_si128(min, 8);
@ -944,7 +944,7 @@ namespace
const __m256i value_with_max_restart = _mm256_or_si256(mask, value); const __m256i value_with_max_restart = _mm256_or_si256(mask, value);
max = _mm256_max_epu16(max, value_with_min_restart); max = _mm256_max_epu16(max, value_with_min_restart);
min = _mm256_min_epu16(min, value_with_max_restart); min = _mm256_min_epu16(min, value_with_max_restart);
_mm256_storeu_si256(dst_stream++, value_with_max_restart); _mm256_store_si256(dst_stream++, value_with_max_restart);
} }
__m128i tmp = _mm256_extracti128_si256(min, 1); __m128i tmp = _mm256_extracti128_si256(min, 1);
@ -981,7 +981,7 @@ namespace
const __m128i value_with_max_restart = _mm_or_si128(mask, value); const __m128i value_with_max_restart = _mm_or_si128(mask, value);
max = _mm_max_epu16(max, value_with_min_restart); max = _mm_max_epu16(max, value_with_min_restart);
min = _mm_min_epu16(min, value_with_max_restart); min = _mm_min_epu16(min, value_with_max_restart);
_mm_storeu_si128(dst_stream++, value_with_max_restart); _mm_store_si128(dst_stream++, value_with_max_restart);
} }
const u16 min_index = sse41_hmin_epu16(min); const u16 min_index = sse41_hmin_epu16(min);
@ -1010,7 +1010,7 @@ namespace
const __m128i value_with_max_restart = _mm_or_si128(mask, value); const __m128i value_with_max_restart = _mm_or_si128(mask, value);
max = _mm_max_epu32(max, value_with_min_restart); max = _mm_max_epu32(max, value_with_min_restart);
min = _mm_min_epu32(min, value_with_max_restart); min = _mm_min_epu32(min, value_with_max_restart);
_mm_storeu_si128(dst_stream++, value_with_max_restart); _mm_store_si128(dst_stream++, value_with_max_restart);
} }
__m128i tmp = _mm_srli_si128(min, 8); __m128i tmp = _mm_srli_si128(min, 8);

View file

@ -144,15 +144,15 @@ namespace
if (emulate_restart) upload_size *= 2; if (emulate_restart) upload_size *= 2;
VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<4>(upload_size); VkDeviceSize offset_in_index_buffer = m_index_buffer_ring_info.alloc<64>(upload_size);
void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size); void* buf = m_index_buffer_ring_info.map(offset_in_index_buffer, upload_size);
std::span<std::byte> dst; std::span<std::byte> dst;
std::vector<std::byte> tmp; stx::single_ptr<std::byte[]> tmp;
if (emulate_restart) if (emulate_restart)
{ {
tmp.resize(upload_size); tmp = stx::make_single<std::byte[], false, 64>(upload_size);
dst = tmp; dst = std::span<std::byte>(tmp.get(), upload_size);
} }
else else
{ {
@ -182,11 +182,11 @@ namespace
{ {
if (index_type == rsx::index_array_type::u16) if (index_type == rsx::index_array_type::u16)
{ {
index_count = rsx::remove_restart_index(static_cast<u16*>(buf), reinterpret_cast<u16*>(tmp.data()), index_count, u16{umax}); index_count = rsx::remove_restart_index(static_cast<u16*>(buf), reinterpret_cast<u16*>(tmp.get()), index_count, u16{umax});
} }
else else
{ {
index_count = rsx::remove_restart_index(static_cast<u32*>(buf), reinterpret_cast<u32*>(tmp.data()), index_count, u32{umax}); index_count = rsx::remove_restart_index(static_cast<u32*>(buf), reinterpret_cast<u32*>(tmp.get()), index_count, u32{umax});
} }
} }

View file

@ -49,7 +49,7 @@ namespace stx
// Control block with data and reference counter // Control block with data and reference counter
template <typename T> template <typename T>
class alignas(T) shared_data final : align_filler<sizeof(shared_counter), alignof(T)> class shared_data final : align_filler<sizeof(shared_counter), alignof(T)>
{ {
public: public:
shared_counter m_ctr{}; shared_counter m_ctr{};
@ -64,7 +64,7 @@ namespace stx
}; };
template <typename T> template <typename T>
class alignas(T) shared_data<T[]> final : align_filler<sizeof(shared_counter) + sizeof(usz), alignof(T)> class shared_data<T[]> final : align_filler<sizeof(shared_counter) + sizeof(usz), alignof(T)>
{ {
public: public:
usz m_count{}; usz m_count{};
@ -98,8 +98,6 @@ namespace stx
friend class atomic_ptr; friend class atomic_ptr;
public: public:
using pointer = T*;
using element_type = std::remove_extent_t<T>; using element_type = std::remove_extent_t<T>;
constexpr single_ptr() noexcept = default; constexpr single_ptr() noexcept = default;
@ -109,7 +107,7 @@ namespace stx
// Default constructor or null_ptr should be used instead // Default constructor or null_ptr should be used instead
[[deprecated("Use null_ptr")]] single_ptr(std::nullptr_t) = delete; [[deprecated("Use null_ptr")]] single_ptr(std::nullptr_t) = delete;
explicit single_ptr(shared_data<T>&, pointer ptr) noexcept explicit single_ptr(shared_data<T>&, element_type* ptr) noexcept
: m_ptr(ptr) : m_ptr(ptr)
{ {
} }
@ -258,7 +256,7 @@ namespace stx
return single_ptr<T>(*ptr, &ptr->m_data); return single_ptr<T>(*ptr, &ptr->m_data);
} }
template <typename T, bool Init = true> template <typename T, bool Init = true, usz Align = alignof(std::remove_extent_t<T>)>
static std::enable_if_t<std::is_unbounded_array_v<T>, single_ptr<T>> make_single(usz count) noexcept static std::enable_if_t<std::is_unbounded_array_v<T>, single_ptr<T>> make_single(usz count) noexcept
{ {
static_assert(sizeof(shared_data<T>) - offsetof(shared_data<T>, m_ctr) == sizeof(shared_counter)); static_assert(sizeof(shared_data<T>) - offsetof(shared_data<T>, m_ctr) == sizeof(shared_counter));
@ -269,9 +267,9 @@ namespace stx
std::byte* bytes = nullptr; std::byte* bytes = nullptr;
if constexpr (alignof(etype) > (__STDCPP_DEFAULT_NEW_ALIGNMENT__)) if constexpr (Align > (__STDCPP_DEFAULT_NEW_ALIGNMENT__))
{ {
bytes = static_cast<std::byte*>(::operator new(size, std::align_val_t{alignof(etype)})); bytes = static_cast<std::byte*>(::operator new(size, std::align_val_t{Align}));
} }
else else
{ {
@ -305,9 +303,9 @@ namespace stx
ptr->~shared_data<T>(); ptr->~shared_data<T>();
if constexpr (alignof(etype) > (__STDCPP_DEFAULT_NEW_ALIGNMENT__)) if constexpr (Align > (__STDCPP_DEFAULT_NEW_ALIGNMENT__))
{ {
::operator delete[](bytes, std::align_val_t{alignof(etype)}); ::operator delete[](bytes, std::align_val_t{Align});
} }
else else
{ {
@ -347,8 +345,6 @@ namespace stx
friend class atomic_ptr; friend class atomic_ptr;
public: public:
using pointer = T*;
using element_type = std::remove_extent_t<T>; using element_type = std::remove_extent_t<T>;
constexpr shared_ptr() noexcept = default; constexpr shared_ptr() noexcept = default;
@ -594,8 +590,6 @@ namespace stx
friend class atomic_ptr; friend class atomic_ptr;
public: public:
using pointer = T*;
using element_type = std::remove_extent_t<T>; using element_type = std::remove_extent_t<T>;
using shared_type = shared_ptr<T>; using shared_type = shared_ptr<T>;