#include #include using namespace std; short input[8] = {0, -1, 2048, -2048, 0x7fff, -0x8000, short(2048*3.1415), short(2048*2.7182)}; float output[8]; int main() { // get input: __m128i val = _mm_loadu_si128((__m128i*)input); // add 0x8000 to wrap to unsigned short domain: val = _mm_add_epi16(val, _mm_set1_epi16(0x8000)); // interleave with upper part of float(1<<23)/2048.f: __m128i lo = _mm_unpacklo_epi16(val, _mm_set1_epi16(0x4580)); __m128i hi = _mm_unpackhi_epi16(val, _mm_set1_epi16(0x4580)); // interpret as float and subtract float((1<<23) + (0x8000))/2048.f __m128 lo_f = _mm_sub_ps(_mm_castsi128_ps(lo), _mm_set_ps1(float((1<<23) + (1<<15))/2048.f)); __m128 hi_f = _mm_sub_ps(_mm_castsi128_ps(hi), _mm_set_ps1(float((1<<23) + (1<<15))/2048.f)); // store: _mm_storeu_ps(output, lo_f); _mm_storeu_ps(output+4, hi_f); for(int i=0; i<8; ++i) std::cout << output[i] << " "; std::cout << "\n"; }