使用SSE/AVX 進行高效Double/int64 轉換
SSE2 提供用於在單精度浮點和32 位元整數之間轉換向量的內在函數,但缺少雙精確度浮點和64 位元整數的直接對應項。 AVX 也不提供這些轉換。
模擬內在函數的方法
1.對於有限值:
如果可以容忍某些限制,只需兩個語句即可執行double 和int64之間的轉換:
double -> ; uint64_t:
__m128i double_to_uint64(__m128d x){ x = _mm_add_pd(x, _mm_set1_pd(0x0010000000000000)); return _mm_xor_si128( _mm_castpd_si128(x), _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)) ); }
__m128i double_to_int64(__m128d x){ x = _mm_add_pd(x, _mm_set1_pd(0x0018000000000000)); return _mm_sub_epi64( _mm_castpd_si128(x), _mm_castpd_si128(_mm_set1_pd(0x0018000000000000)) ); }
__m128d uint64_to_double(__m128i x){ x = _mm_or_si128(x, _mm_castpd_si128(_mm_set1_pd(0x0010000000000000))); return _mm_sub_pd(_mm_castsi128_pd(x), _mm_set1_pd(0x0010000000000000)); }
雙:
__m128d int64_to_double(__m128i x){ x = _mm_add_epi64(x, _mm_castpd_si128(_mm_set1_pd(0x0018000000000000))); return _mm_sub_pd(_mm_castsi128_pd(x), _mm_set1_pd(0x0018000000000000)); }
2。全範圍int64 -> double:
要將全範圍int64 轉換為double,uint64_t 需要5 條指令,int64_t 需要6 條指令:uint64_t -> ;雙:
__m128d uint64_to_double_full(__m128i x){ __m128i xH = _mm_srli_epi64(x, 32); xH = _mm_or_si128(xH, _mm_castpd_si128(_mm_set1_pd(19342813113834066795298816.))); // 2^84 __m128i xL = _mm_blend_epi16(x, _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)), 0xcc); // 2^52 __m128d f = _mm_sub_pd(_mm_castsi128_pd(xH), _mm_set1_pd(19342813118337666422669312.)); // 2^84 + 2^52 return _mm_add_pd(f, _mm_castsi128_pd(xL)); }
int64_t -> double:
__m128d int64_to_double_full(__m128i x){ __m128i xH = _mm_srai_epi32(x, 16); xH = _mm_blend_epi16(xH, _mm_setzero_si128(), 0x33); xH = _mm_add_epi64(xH, _mm_castpd_si128(_mm_set1_pd(442721857769029238784.))); // 3*2^67 __m128i xL = _mm_blend_epi16(x, _mm_castpd_si128(_mm_set1_pd(0x0010000000000000)), 0x88); // 2^52 __m128d f = _mm_sub_pd(_mm_castsi128_pd(xH), _mm_set1_pd(442726361368656609280.)); // 3*2^67 + 2^52 return _mm_add_pd(f, _mm_castsi128_pd(xL)); }
AVX512
AVX512 確實提供與64 位元整數(有符號和無符號)之間的直接轉換簽署了。這些轉換是使用 _mm512_cvtpd_epi64 和 _mm256_cvtpd_epi64 等內在函數完成的。 請注意,這些解決方案不以完整程式碼形式提供。讀者應完成它們並根據需要進行最佳化,以使它們適應特定的上下文。以上是如何使用 SSE/AVX 指令在雙精確度浮點型和 64 位元整數之間高效率轉換?的詳細內容。更多資訊請關注PHP中文網其他相關文章!