#include #include int main(void) { __m128i x = _mm_set_epi32( 0x5a5aa5a5, 0xfec80124, 0x12345678, 0x5533ccff); union { __m128i v; unsigned int s[4]; } y; #ifdef __XOP__ __m128i sel = _mm_set_epi32( 0x40414243, 0x44454647, 0x48494a4b, 0x4c4d4e4f); y.v = _mm_perm_epi8(x, x, sel); #elif __SSSE3__ __m128i sel = _mm_set_epi32( 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f); __m128i c0f = _mm_set1_epi8(0x0f); __m128i c33 = _mm_set1_epi8(0x33); __m128i c55 = _mm_set1_epi8(0x55); __m128i t1, t2; t1 = _mm_shuffle_epi8(x, sel); t1 = _mm_or_si128(_mm_slli_epi32(_mm_and_si128(t1, c0f), 4), _mm_and_si128(_mm_srli_epi32(t1, 4), c0f)); t1 = _mm_or_si128(_mm_slli_epi32(_mm_and_si128(t1, c33), 2), _mm_and_si128(_mm_srli_epi32(t1, 2), c33)); t2 = _mm_and_si128(t1, c55); y.v = _mm_or_si128(_mm_add_epi32(t2, t2), _mm_and_si128(_mm_srli_epi32(t1, 1), c55)); #else #error At least SSSE3 is required #endif printf("%08x %08x %08x %08x\n", y.s[0], y.s[1], y.s[2], y.s[3]); return 0; }