Use attribute((target)) on clang and GCC.

This commit is contained in:
Tom Lally 2022-09-02 15:55:23 +01:00
parent 68fa5b32a1
commit c7e5fc4dbb
2 changed files with 10 additions and 29 deletions

View file

@ -1,9 +1,5 @@
project(CemuCafe)
if(CMAKE_C_COMPILER_ID STREQUAL "GNU")
add_compile_options(-mssse3 -mavx2)
endif()
file(GLOB_RECURSE CPP_FILES *.cpp)
file(GLOB_RECURSE H_FILES *.h)

View file

@ -3,10 +3,16 @@
#include "Cafe/HW/Latte/ISA/RegDefines.h"
#if BOOST_OS_LINUX
#if __GNUC__
#include <immintrin.h>
#endif
#ifdef __GNUC__
#define ATTRIBUTE_AVX2 __attribute__((target("avx2")))
#else
#define ATTRIBUTE_AVX2
#endif
struct
{
const void* lastPtr;
@ -284,10 +290,7 @@ void LatteIndices_generateAutoLineLoopIndices(void* indexDataOutput, uint32 coun
indexMax = std::max(count, 1u) - 1;
}
#if BOOST_OS_LINUX || BOOST_OS_MACOS
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
#endif
ATTRIBUTE_AVX2
void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
{
// using AVX + AVX2 we can process 16 indices at a time
@ -352,14 +355,7 @@ void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDat
indexMin = std::min(indexMin, _minIndex);
}
#if BOOST_OS_LINUX || BOOST_OS_MACOS
#pragma clang attribute pop
#endif
#if BOOST_OS_LINUX || BOOST_OS_MACOS
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
#endif
ATTRIBUTE_AVX2
void LatteIndices_fastConvertU16_SSE41(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
{
// SSSE3 & SSE4.1 optimized decoding
@ -423,14 +419,7 @@ void LatteIndices_fastConvertU16_SSE41(const void* indexDataInput, void* indexDa
indexMin = std::min(indexMin, _minIndex);
}
#if BOOST_OS_LINUX || BOOST_OS_MACOS
#pragma clang attribute pop
#endif
#if BOOST_OS_LINUX || BOOST_OS_MACOS
#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function)
#endif
ATTRIBUTE_AVX2
void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax)
{
// using AVX + AVX2 we can process 8 indices at a time
@ -497,10 +486,6 @@ void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDat
indexMin = std::min(indexMin, _minIndex);
}
#if BOOST_OS_LINUX || BOOST_OS_MACOS
#pragma clang attribute pop
#endif
template<typename T>
void _LatteIndices_alternativeCalculateIndexMinMax(const void* indexData, uint32 count, uint32 primitiveRestartIndex, uint32& indexMin, uint32& indexMax)
{