From c7e5fc4dbbe9fa793e02aa28b3e5437c85eff2d1 Mon Sep 17 00:00:00 2001 From: Tom Lally Date: Fri, 2 Sep 2022 15:55:23 +0100 Subject: [PATCH] Use attribute((target)) on clang and GCC. --- src/Cafe/CMakeLists.txt | 4 --- src/Cafe/HW/Latte/Core/LatteIndices.cpp | 35 +++++++------------------ 2 files changed, 10 insertions(+), 29 deletions(-) diff --git a/src/Cafe/CMakeLists.txt b/src/Cafe/CMakeLists.txt index c27708f3..776aef59 100644 --- a/src/Cafe/CMakeLists.txt +++ b/src/Cafe/CMakeLists.txt @@ -1,9 +1,5 @@ project(CemuCafe) -if(CMAKE_C_COMPILER_ID STREQUAL "GNU") - add_compile_options(-mssse3 -mavx2) -endif() - file(GLOB_RECURSE CPP_FILES *.cpp) file(GLOB_RECURSE H_FILES *.h) diff --git a/src/Cafe/HW/Latte/Core/LatteIndices.cpp b/src/Cafe/HW/Latte/Core/LatteIndices.cpp index 7edaad00..64f215a6 100644 --- a/src/Cafe/HW/Latte/Core/LatteIndices.cpp +++ b/src/Cafe/HW/Latte/Core/LatteIndices.cpp @@ -3,10 +3,16 @@ #include "Cafe/HW/Latte/ISA/RegDefines.h" -#if BOOST_OS_LINUX +#if __GNUC__ #include #endif +#ifdef __GNUC__ +#define ATTRIBUTE_AVX2 __attribute__((target("avx2"))) +#else +#define ATTRIBUTE_AVX2 +#endif + struct { const void* lastPtr; @@ -284,10 +290,7 @@ void LatteIndices_generateAutoLineLoopIndices(void* indexDataOutput, uint32 coun indexMax = std::max(count, 1u) - 1; } -#if BOOST_OS_LINUX || BOOST_OS_MACOS -#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) -#endif - +ATTRIBUTE_AVX2 void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) { // using AVX + AVX2 we can process 16 indices at a time @@ -352,14 +355,7 @@ void LatteIndices_fastConvertU16_AVX2(const void* indexDataInput, void* indexDat indexMin = std::min(indexMin, _minIndex); } -#if BOOST_OS_LINUX || BOOST_OS_MACOS -#pragma clang attribute pop -#endif - -#if BOOST_OS_LINUX || BOOST_OS_MACOS -#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) -#endif - +ATTRIBUTE_AVX2 void LatteIndices_fastConvertU16_SSE41(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) { // SSSE3 & SSE4.1 optimized decoding @@ -423,14 +419,7 @@ void LatteIndices_fastConvertU16_SSE41(const void* indexDataInput, void* indexDa indexMin = std::min(indexMin, _minIndex); } -#if BOOST_OS_LINUX || BOOST_OS_MACOS -#pragma clang attribute pop -#endif - -#if BOOST_OS_LINUX || BOOST_OS_MACOS -#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) -#endif - +ATTRIBUTE_AVX2 void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDataOutput, uint32 count, uint32& indexMin, uint32& indexMax) { // using AVX + AVX2 we can process 8 indices at a time @@ -497,10 +486,6 @@ void LatteIndices_fastConvertU32_AVX2(const void* indexDataInput, void* indexDat indexMin = std::min(indexMin, _minIndex); } -#if BOOST_OS_LINUX || BOOST_OS_MACOS -#pragma clang attribute pop -#endif - template void _LatteIndices_alternativeCalculateIndexMinMax(const void* indexData, uint32 count, uint32 primitiveRestartIndex, uint32& indexMin, uint32& indexMax) {