正确使用加载/存储

如何使用加载/存储来正确alignmentint16_t字节交换?

 void byte_swapping(uint16_t* dest, const uint16_t* src, size_t count) { __m128i _s, _d; for (uint16_t const * end(dest + count); dest != end; dest += 8, src += 8) { _s = _mm_load_si128((__m128i*)src); _d = _mm_or_si128(_mm_slli_epi16(_s, 8), _mm_srli_epi16(_s, 8)); _mm_store_si128((__m128i*) dest, _d); } } 

count不是8的倍数,或者srcdest不是16字节对齐时,你的代码将会失败。

这是你的代码的一个固定(和测试)的版本:

 void byte_swapping(uint16_t* dest, const uint16_t* src, size_t count) { size_t i; for (i = 0; i + 8 <= count; i += 8) { __m128i s = _mm_loadu_si128((__m128i*)&src[i]); __m128i d = _mm_or_si128(_mm_slli_epi16(s, 8), _mm_srli_epi16(s, 8)); _mm_storeu_si128((__m128i*)&dest[i], d); } for ( ; i < count; ++i) // handle residual elements { uint16_t w = src[i]; w = (w >> 8) | (w << 8); dest[i] = w; } }