cmake: Detect AVX + allow build system to disable Intel intrinsics

This commit is contained in:
Anonymous Maarten
2023-02-25 00:21:15 +01:00
committed by Anonymous Maarten
parent 683411e96f
commit 4681240241
16 changed files with 175 additions and 116 deletions

View File

@@ -185,6 +185,46 @@
#include <SDL3/SDL.h>
#include <SDL3/SDL_intrin.h>
#ifdef __ARM_NEON
#define HAVE_NEON_INTRINSICS 1
#endif
#if defined(__MMX__) && !defined(SDL_DISABLE_MMX)
#define HAVE_MMX_INTRINSICS 1
#endif
#if defined(__SSE__) && !defined(SDL_DISABLE_SSE)
#define HAVE_SSE_INTRINSICS 1
#endif
#if defined(__SSE2__) && !defined(SDL_DISABLE_SSE2)
#define HAVE_SSE2_INTRINSICS 1
#endif
#if defined(__SSE3__) && !defined(SDL_DISABLE_SSE3)
#define HAVE_SSE3_INTRINSICS 1
#endif
#if defined(__AVX__) && !defined(SDL_DISABLE_AVX)
#define HAVE_AVX_INTRINSICS 1
#endif
#if defined __clang__
#if (!__has_attribute(target))
#undef HAVE_AVX_INTRINSICS
#endif
#if (defined(_MSC_VER) || defined(__SCE__)) && !defined(__AVX__)
#undef HAVE_AVX_INTRINSICS
#endif
#elif defined __GNUC__
#if (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 9)
#undef HAVE_AVX_INTRINSICS
#endif
#endif
#define SDL_MAIN_NOIMPL /* don't drag in header-only implementation of SDL_main */
#include <SDL3/SDL_main.h>

View File

@@ -29,35 +29,6 @@
#define DEBUG_AUDIOSTREAM 0
#ifdef __ARM_NEON
#define HAVE_NEON_INTRINSICS 1
#endif
#ifdef __SSE__
#define HAVE_SSE_INTRINSICS 1
#endif
#ifdef __SSE3__
#define HAVE_SSE3_INTRINSICS 1
#endif
#if defined(HAVE_IMMINTRIN_H) && !defined(SDL_DISABLE_IMMINTRIN_H)
#define HAVE_AVX_INTRINSICS 1
#endif
#if defined __clang__
#if (!__has_attribute(target))
#undef HAVE_AVX_INTRINSICS
#endif
#if (defined(_MSC_VER) || defined(__SCE__)) && !defined(__AVX__)
#undef HAVE_AVX_INTRINSICS
#endif
#elif defined __GNUC__
#if (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 9)
#undef HAVE_AVX_INTRINSICS
#endif
#endif
/**
* Initialize an SDL_AudioCVT structure for conversion.
*

View File

@@ -27,10 +27,6 @@
#define HAVE_NEON_INTRINSICS 1
#endif
#ifdef __SSE2__
#define HAVE_SSE2_INTRINSICS 1
#endif
#if defined(__x86_64__) && HAVE_SSE2_INTRINSICS
#define NEED_SCALAR_CONVERTER_FALLBACKS 0 /* x86_64 guarantees SSE2. */
#elif __MACOS__ && HAVE_SSE2_INTRINSICS

View File

@@ -166,7 +166,7 @@ static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info)
}
}
#ifdef __MMX__
#if HAVE_MMX_INTRINSICS
/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
static void BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo *info)
@@ -409,7 +409,7 @@ static void BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo *info)
_mm_empty();
}
#endif /* __MMX__ */
#endif /* HAVE_MMX_INTRINSICS */
#if SDL_ARM_SIMD_BLITTERS
void BlitARGBto565PixelAlphaARMSIMDAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
@@ -750,7 +750,7 @@ static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask)
}
}
#ifdef __MMX__
#if HAVE_MMX_INTRINSICS
/* fast RGB565->RGB565 blending with surface alpha */
static void Blit565to565SurfaceAlphaMMX(SDL_BlitInfo *info)
@@ -1025,7 +1025,7 @@ static void Blit555to555SurfaceAlphaMMX(SDL_BlitInfo *info)
}
}
#endif /* __MMX__ */
#endif /* HAVE_MMX_INTRINSICS */
/* fast RGB565->RGB565 blending with surface alpha */
static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info)
@@ -1357,15 +1357,13 @@ SDL_CalculateBlitA(SDL_Surface *surface)
case 4:
if (sf->Rmask == df->Rmask && sf->Gmask == df->Gmask && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
#if defined(__MMX__)
#if HAVE_MMX_INTRINSICS
if (sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0 && sf->Ashift % 8 == 0 && sf->Aloss == 0) {
#ifdef __MMX__
if (SDL_HasMMX()) {
return BlitRGBtoRGBPixelAlphaMMX;
}
#endif
}
#endif /* __MMX__ */
#endif /* HAVE_MMX_INTRINSICS */
if (sf->Amask == 0xff000000) {
#if SDL_ARM_NEON_BLITTERS
if (SDL_HasNEON()) {
@@ -1407,7 +1405,7 @@ SDL_CalculateBlitA(SDL_Surface *surface)
case 2:
if (surface->map->identity) {
if (df->Gmask == 0x7e0) {
#ifdef __MMX__
#if HAVE_MMX_INTRINSICS
if (SDL_HasMMX()) {
return Blit565to565SurfaceAlphaMMX;
} else
@@ -1416,7 +1414,7 @@ SDL_CalculateBlitA(SDL_Surface *surface)
return Blit565to565SurfaceAlpha;
}
} else if (df->Gmask == 0x3e0) {
#ifdef __MMX__
#if HAVE_MMX_INTRINSICS
if (SDL_HasMMX()) {
return Blit555to555SurfaceAlphaMMX;
} else
@@ -1430,7 +1428,7 @@ SDL_CalculateBlitA(SDL_Surface *surface)
case 4:
if (sf->Rmask == df->Rmask && sf->Gmask == df->Gmask && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
#ifdef __MMX__
#if HAVE_MMX_INTRINSICS
if (sf->Rshift % 8 == 0 && sf->Gshift % 8 == 0 && sf->Bshift % 8 == 0 && SDL_HasMMX()) {
return BlitRGBtoRGBSurfaceAlphaMMX;
}

View File

@@ -23,7 +23,7 @@
#include "SDL_blit.h"
#include "SDL_blit_copy.h"
#ifdef __SSE__
#if HAVE_SSE_INTRINSICS
/* This assumes 16-byte aligned src and dst */
static SDL_INLINE void SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len)
{
@@ -48,9 +48,9 @@ static SDL_INLINE void SDL_memcpySSE(Uint8 *dst, const Uint8 *src, int len)
SDL_memcpy(dst, src, len & 63);
}
}
#endif /* __SSE__ */
#endif /* HAVE_SSE_INTRINSICS */
#ifdef __MMX__
#if HAVE_MMX_INTRINSICS
#ifdef _MSC_VER
#pragma warning(disable : 4799)
#endif
@@ -81,7 +81,7 @@ static SDL_INLINE void SDL_memcpyMMX(Uint8 *dst, const Uint8 *src, int len)
SDL_memcpy(dst + skip, src + skip, remain);
}
}
#endif /* __MMX__ */
#endif /* HAVE_MMX_INTRINSICS */
void SDL_BlitCopy(SDL_BlitInfo *info)
{
@@ -122,7 +122,7 @@ void SDL_BlitCopy(SDL_BlitInfo *info)
return;
}
#ifdef __SSE__
#if HAVE_SSE_INTRINSICS
if (SDL_HasSSE() &&
!((uintptr_t)src & 15) && !(srcskip & 15) &&
!((uintptr_t)dst & 15) && !(dstskip & 15)) {
@@ -135,7 +135,7 @@ void SDL_BlitCopy(SDL_BlitInfo *info)
}
#endif
#ifdef __MMX__
#if HAVE_MMX_INTRINSICS
if (SDL_HasMMX() && !(srcskip & 7) && !(dstskip & 7)) {
while (h--) {
SDL_memcpyMMX(dst, src, w);

View File

@@ -22,7 +22,7 @@
#include "SDL_blit.h"
#ifdef __SSE__
#if HAVE_SSE_INTRINSICS
/* *INDENT-OFF* */ /* clang-format off */
#if defined(_MSC_VER) && !defined(__clang__)
@@ -376,7 +376,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
{
color |= (color << 8);
color |= (color << 16);
#ifdef __SSE__
#if HAVE_SSE_INTRINSICS
if (SDL_HasSSE()) {
fill_function = SDL_FillSurfaceRect1SSE;
break;
@@ -389,7 +389,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
case 2:
{
color |= (color << 16);
#ifdef __SSE__
#if HAVE_SSE_INTRINSICS
if (SDL_HasSSE()) {
fill_function = SDL_FillSurfaceRect2SSE;
break;
@@ -408,7 +408,7 @@ int SDL_FillSurfaceRects(SDL_Surface *dst, const SDL_Rect *rects, int count,
case 4:
{
#ifdef __SSE__
#if HAVE_SSE_INTRINSICS
if (SDL_HasSSE()) {
fill_function = SDL_FillSurfaceRect4SSE;
break;

View File

@@ -332,10 +332,6 @@ static int scale_mat(const Uint32 *src, int src_w, int src_h, int src_pitch,
return 0;
}
#if defined(__SSE2__)
#define HAVE_SSE2_INTRINSICS 1
#endif
#if defined(__ARM_NEON)
#define HAVE_NEON_INTRINSICS 1
#define CAST_uint8x8_t (uint8x8_t)

View File

@@ -310,7 +310,7 @@ static SDL_bool yuv_rgb_sse(
Uint8 *rgb, Uint32 rgb_stride,
YCbCrType yuv_type)
{
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (!SDL_HasSSE2()) {
return SDL_FALSE;
}
@@ -1114,7 +1114,7 @@ static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const voi
const Uint8 *src1, *src2;
Uint8 *dstUV;
Uint8 *tmp = NULL;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
@@ -1144,7 +1144,7 @@ static int SDL_ConvertPixels_PackUVPlanes_to_NV(int width, int height, const voi
y = UVheight;
while (y--) {
x = UVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
while (x >= 16) {
__m128i u = _mm_loadu_si128((__m128i *)src1);
@@ -1187,7 +1187,7 @@ static int SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const vo
const Uint8 *srcUV;
Uint8 *dst1, *dst2;
Uint8 *tmp = NULL;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
@@ -1217,7 +1217,7 @@ static int SDL_ConvertPixels_SplitNV_to_UVPlanes(int width, int height, const vo
y = UVheight;
while (y--) {
x = UVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
__m128i mask = _mm_set1_epi16(0x00FF);
while (x >= 16) {
@@ -1264,7 +1264,7 @@ static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int
const int dstUVPitchLeft = (dstUVPitch - UVwidth * 2) / sizeof(Uint16);
const Uint16 *srcUV;
Uint16 *dstUV;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
@@ -1277,7 +1277,7 @@ static int SDL_ConvertPixels_SwapNV(int width, int height, const void *src, int
y = UVheight;
while (y--) {
x = UVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
while (x >= 8) {
__m128i uv = _mm_loadu_si128((__m128i *)srcUV);
@@ -1372,7 +1372,7 @@ static int SDL_ConvertPixels_Planar2x2_to_Planar2x2(int width, int height,
SDL_GetPixelFormatName(dst_format));
}
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
#define PACKED4_TO_PACKED4_ROW_SSE2(shuffle) \
while (x >= 4) { \
__m128i yuv = _mm_loadu_si128((__m128i *)srcYUV); \
@@ -1399,14 +1399,14 @@ static int SDL_ConvertPixels_YUY2_to_UYVY(int width, int height, const void *src
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
const Uint8 *srcYUV = (const Uint8 *)src;
Uint8 *dstYUV = (Uint8 *)dst;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
y = height;
while (y--) {
x = YUVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
}
@@ -1440,14 +1440,14 @@ static int SDL_ConvertPixels_YUY2_to_YVYU(int width, int height, const void *src
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
const Uint8 *srcYUV = (const Uint8 *)src;
Uint8 *dstYUV = (Uint8 *)dst;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
y = height;
while (y--) {
x = YUVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
}
@@ -1481,14 +1481,14 @@ static int SDL_ConvertPixels_UYVY_to_YUY2(int width, int height, const void *src
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
const Uint8 *srcYUV = (const Uint8 *)src;
Uint8 *dstYUV = (Uint8 *)dst;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
y = height;
while (y--) {
x = YUVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 3, 0, 1));
}
@@ -1522,14 +1522,14 @@ static int SDL_ConvertPixels_UYVY_to_YVYU(int width, int height, const void *src
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
const Uint8 *srcYUV = (const Uint8 *)src;
Uint8 *dstYUV = (Uint8 *)dst;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
y = height;
while (y--) {
x = YUVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(0, 3, 2, 1));
}
@@ -1563,14 +1563,14 @@ static int SDL_ConvertPixels_YVYU_to_YUY2(int width, int height, const void *src
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
const Uint8 *srcYUV = (const Uint8 *)src;
Uint8 *dstYUV = (Uint8 *)dst;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
y = height;
while (y--) {
x = YUVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(1, 2, 3, 0));
}
@@ -1604,14 +1604,14 @@ static int SDL_ConvertPixels_YVYU_to_UYVY(int width, int height, const void *src
const int dstYUVPitchLeft = (dst_pitch - YUVwidth * 4);
const Uint8 *srcYUV = (const Uint8 *)src;
Uint8 *dstYUV = (Uint8 *)dst;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
const SDL_bool use_SSE2 = SDL_HasSSE2();
#endif
y = height;
while (y--) {
x = YUVwidth;
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
if (use_SSE2) {
PACKED4_TO_PACKED4_ROW_SSE2(_MM_SHUFFLE(2, 1, 0, 3));
}

View File

@@ -6,7 +6,6 @@
#include "yuv_rgb.h"
#define PRECISION 6
#define PRECISION_FACTOR (1<<PRECISION)
@@ -240,7 +239,7 @@ void rgb24_yuv420_std(
}
}
#ifdef __SSE2__
#if HAVE_SSE2_INTRINSICS
#define SSE_FUNCTION_NAME yuv420_rgb565_sse
#define STD_FUNCTION_NAME yuv420_rgb565_std
@@ -683,7 +682,7 @@ void rgb24_yuv420_sseu(uint32_t width, uint32_t height,
}
#endif //__SSE2__
#endif //HAVE_SSE2_INTRINSICS
#ifdef __loongarch_sx