Added SSE accelerated RGB565 to 32-bit pixel conversions
This commit is contained in:
@@ -38,7 +38,7 @@
|
|||||||
// Functions to blit from N-bit surfaces to other surfaces
|
// Functions to blit from N-bit surfaces to other surfaces
|
||||||
|
|
||||||
#define BLIT_FEATURE_NONE 0x00
|
#define BLIT_FEATURE_NONE 0x00
|
||||||
#define BLIT_FEATURE_HAS_MMX 0x01
|
#define BLIT_FEATURE_HAS_SSE41 0x01
|
||||||
#define BLIT_FEATURE_HAS_ALTIVEC 0x02
|
#define BLIT_FEATURE_HAS_ALTIVEC 0x02
|
||||||
#define BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH 0x04
|
#define BLIT_FEATURE_ALTIVEC_DONT_USE_PREFETCH 0x04
|
||||||
|
|
||||||
@@ -875,8 +875,8 @@ static Uint32 GetBlitFeatures(void)
|
|||||||
static Uint32 features = ~0u;
|
static Uint32 features = ~0u;
|
||||||
if (features == ~0u) {
|
if (features == ~0u) {
|
||||||
features = (0
|
features = (0
|
||||||
// Feature 1 is has-MMX
|
// Feature 1 is has-SSE41
|
||||||
| ((SDL_HasMMX()) ? BLIT_FEATURE_HAS_MMX : 0)
|
| ((SDL_HasSSE41()) ? BLIT_FEATURE_HAS_SSE41 : 0)
|
||||||
// Feature 2 is has-AltiVec
|
// Feature 2 is has-AltiVec
|
||||||
| ((SDL_HasAltiVec()) ? BLIT_FEATURE_HAS_ALTIVEC : 0)
|
| ((SDL_HasAltiVec()) ? BLIT_FEATURE_HAS_ALTIVEC : 0)
|
||||||
// Feature 4 is dont-use-prefetch
|
// Feature 4 is dont-use-prefetch
|
||||||
@@ -890,8 +890,8 @@ static Uint32 GetBlitFeatures(void)
|
|||||||
#pragma altivec_model off
|
#pragma altivec_model off
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
// Feature 1 is has-MMX
|
// Feature 1 is has-SSE41
|
||||||
#define GetBlitFeatures() ((SDL_HasMMX() ? BLIT_FEATURE_HAS_MMX : 0))
|
#define GetBlitFeatures() ((SDL_HasSSE41() ? BLIT_FEATURE_HAS_SSE41 : 0))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// This is now endian dependent
|
// This is now endian dependent
|
||||||
@@ -1163,6 +1163,109 @@ static void Blit_XRGB8888_RGB565(SDL_BlitInfo *info)
|
|||||||
#endif // USE_DUFFS_LOOP
|
#endif // USE_DUFFS_LOOP
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef SDL_SSE4_1_INTRINSICS
|
||||||
|
|
||||||
|
static void SDL_TARGETING("sse4.1") Blit_RGB565_32_SSE41(SDL_BlitInfo *info, int Rshift, int Gshift, int Bshift, int Amask)
|
||||||
|
{
|
||||||
|
int c;
|
||||||
|
int width, height;
|
||||||
|
const Uint16 *src;
|
||||||
|
Uint32 *dst;
|
||||||
|
int srcskip, dstskip;
|
||||||
|
Uint8 r, g, b;
|
||||||
|
|
||||||
|
// Set up some basic variables
|
||||||
|
width = info->dst_w;
|
||||||
|
height = info->dst_h;
|
||||||
|
src = (const Uint16 *)info->src;
|
||||||
|
srcskip = info->src_skip / 2;
|
||||||
|
dst = (Uint32 *)info->dst;
|
||||||
|
dstskip = info->dst_skip / 4;
|
||||||
|
|
||||||
|
while (height--) {
|
||||||
|
// Copy in 4 pixel chunks
|
||||||
|
for (c = width / 4; c; --c) {
|
||||||
|
// Load 4 16-bit RGB565 pixels into an SSE register
|
||||||
|
__m128i pixels_rgb565 = _mm_loadu_si128((__m128i*)src);
|
||||||
|
|
||||||
|
// Extract Red components (5 bits)
|
||||||
|
__m128i red_5bit = _mm_and_si128(pixels_rgb565, _mm_set1_epi16(0xF800)); // Mask for Red
|
||||||
|
red_5bit = _mm_srli_epi16(red_5bit, 11); // Shift to get 5-bit value
|
||||||
|
__m128i red_8bit = _mm_cvtepu16_epi32(red_5bit); // Convert to 32-bit and zero-extend
|
||||||
|
red_8bit = _mm_slli_epi32(red_8bit, 3); // Scale to 8 bits (multiply by 8)
|
||||||
|
red_8bit = _mm_or_si128(red_8bit, _mm_srli_epi32(red_8bit, 5)); // Replicate top 3 bits for better scaling
|
||||||
|
|
||||||
|
// Extract Green components (6 bits)
|
||||||
|
__m128i green_6bit = _mm_and_si128(pixels_rgb565, _mm_set1_epi16(0x07E0)); // Mask for Green
|
||||||
|
green_6bit = _mm_srli_epi16(green_6bit, 5); // Shift to get 6-bit value
|
||||||
|
__m128i green_8bit = _mm_cvtepu16_epi32(green_6bit); // Convert to 32-bit and zero-extend
|
||||||
|
green_8bit = _mm_slli_epi32(green_8bit, 2); // Scale to 8 bits (multiply by 4)
|
||||||
|
green_8bit = _mm_or_si128(green_8bit, _mm_srli_epi32(green_8bit, 6)); // Replicate top 2 bits
|
||||||
|
|
||||||
|
// Extract Blue components (5 bits)
|
||||||
|
__m128i blue_5bit = _mm_and_si128(pixels_rgb565, _mm_set1_epi16(0x001F)); // Mask for Blue
|
||||||
|
__m128i blue_8bit = _mm_cvtepu16_epi32(blue_5bit); // Convert to 32-bit and zero-extend
|
||||||
|
blue_8bit = _mm_slli_epi32(blue_8bit, 3); // Scale to 8 bits (multiply by 8)
|
||||||
|
blue_8bit = _mm_or_si128(blue_8bit, _mm_srli_epi32(blue_8bit, 5)); // Replicate top 3 bits
|
||||||
|
|
||||||
|
// Set Alpha to opaque (0xFF)
|
||||||
|
__m128i alpha_8bit = _mm_set1_epi32(Amask);
|
||||||
|
|
||||||
|
// Combine into 32-bit values
|
||||||
|
__m128i argb_pixels_low = _mm_or_si128(alpha_8bit, _mm_slli_epi32(red_8bit, Rshift));
|
||||||
|
argb_pixels_low = _mm_or_si128(argb_pixels_low, _mm_slli_epi32(green_8bit, Gshift));
|
||||||
|
argb_pixels_low = _mm_or_si128(argb_pixels_low, _mm_slli_epi32(blue_8bit, Bshift));
|
||||||
|
|
||||||
|
// Store the results
|
||||||
|
_mm_storeu_si128((__m128i*)dst, argb_pixels_low);
|
||||||
|
src += 4;
|
||||||
|
dst += 4;
|
||||||
|
}
|
||||||
|
// Get any leftovers
|
||||||
|
switch (width & 3) {
|
||||||
|
case 3:
|
||||||
|
RGB_FROM_RGB565(*src, r, g, b);
|
||||||
|
*dst++ = (r << Rshift) | (g << Gshift) | (b << Bshift) | Amask;
|
||||||
|
++src;
|
||||||
|
SDL_FALLTHROUGH;
|
||||||
|
case 2:
|
||||||
|
RGB_FROM_RGB565(*src, r, g, b);
|
||||||
|
*dst++ = (r << Rshift) | (g << Gshift) | (b << Bshift) | Amask;
|
||||||
|
++src;
|
||||||
|
SDL_FALLTHROUGH;
|
||||||
|
case 1:
|
||||||
|
RGB_FROM_RGB565(*src, r, g, b);
|
||||||
|
*dst++ = (r << Rshift) | (g << Gshift) | (b << Bshift) | Amask;
|
||||||
|
++src;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
src += srcskip;
|
||||||
|
dst += dstskip;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Blit_RGB565_ARGB8888_SSE41(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
Blit_RGB565_32_SSE41(info, 16, 8, 0, 0xFF000000);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Blit_RGB565_ABGR8888_SSE41(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
Blit_RGB565_32_SSE41(info, 0, 8, 16, 0xFF000000);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Blit_RGB565_RGBA8888_SSE41(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
Blit_RGB565_32_SSE41(info, 24, 16, 8, 0x000000FF);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Blit_RGB565_BGRA8888_SSE41(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
Blit_RGB565_32_SSE41(info, 8, 16, 24, 0x000000FF);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // SDL_SSE4_1_INTRINSICS
|
||||||
|
|
||||||
#ifdef SDL_HAVE_BLIT_N_RGB565
|
#ifdef SDL_HAVE_BLIT_N_RGB565
|
||||||
|
|
||||||
// Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces
|
// Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces
|
||||||
@@ -2700,6 +2803,16 @@ static const struct blit_table normal_blit_2[] = {
|
|||||||
{ 0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
|
{ 0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000, 0x00000000,
|
||||||
BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
|
BLIT_FEATURE_HAS_ALTIVEC, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef SDL_SSE4_1_INTRINSICS
|
||||||
|
{ 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
|
||||||
|
BLIT_FEATURE_HAS_SSE41, Blit_RGB565_ARGB8888_SSE41, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
|
||||||
|
{ 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
|
||||||
|
BLIT_FEATURE_HAS_SSE41, Blit_RGB565_ABGR8888_SSE41, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
|
||||||
|
{ 0x0000F800, 0x000007E0, 0x0000001F, 4, 0xFF000000, 0x00FF0000, 0x0000FF00,
|
||||||
|
BLIT_FEATURE_HAS_SSE41, Blit_RGB565_RGBA8888_SSE41, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
|
||||||
|
{ 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x0000FF00, 0x00FF0000, 0xFF000000,
|
||||||
|
BLIT_FEATURE_HAS_SSE41, Blit_RGB565_BGRA8888_SSE41, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
|
||||||
|
#endif
|
||||||
#ifdef SDL_HAVE_BLIT_N_RGB565
|
#ifdef SDL_HAVE_BLIT_N_RGB565
|
||||||
{ 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
|
{ 0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
|
||||||
0, Blit_RGB565_ARGB8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
|
0, Blit_RGB565_ARGB8888, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
|
||||||
|
|||||||
Reference in New Issue
Block a user