从字符串获取 IPv4 地址的最快方法
有问题的原始代码:
UINT32 GetIP(const char *p) { UINT32 dwIP=0,dwIP_Part=0; while(true) { if(p[0] == 0) { dwIP = (dwIP << 8) | dwIP_Part; break; } if(p[0]=='.') { dwIP = (dwIP << 8) | dwIP_Part; dwIP_Part = 0; p++; } dwIP_Part = (dwIP_Part*10)+(p[0]-'0'); p++; } return dwIP; }
更快的向量化解决方案:
利用 x86 指令集,更有效的解决方案如下:
UINT32 MyGetIP(const char *str) { // Load and convert input __m128i input = _mm_lddqu_si128((const __m128i*)str); input = _mm_sub_epi8(input, _mm_set1_epi8('0')); // Generate shuffled array __m128i cmp = input; UINT32 mask = _mm_movemask_epi8(cmp); __m128i shuf = shuffleTable[mask]; __m128i arr = _mm_shuffle_epi8(input, shuf); // Calculate coefficients __m128i coeffs = _mm_set_epi8(0, 100, 10, 1, 0, 100, 10, 1, 0, 100, 10, 1, 0, 100, 10, 1); // Multiply and accumulate __m128i prod = _mm_maddubs_epi16(coeffs, arr); prod = _mm_hadd_epi16(prod, prod); // Reorder result __m128i imm = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, 4, 2, 0); prod = _mm_shuffle_epi8(prod, imm); // Extract result return _mm_extract_epi32(prod, 0); }
ShuffleTable 的预计算:
void MyInit() { int len[4]; for (len[0] = 1; len[0] <= 3; len[0]++) for (len[1] = 1; len[1] <= 3; len[1]++) for (len[2] = 1; len[2] <= 3; len[2]++) for (len[3] = 1; len[3] <= 3; len[3]++) { int slen = len[0] + len[1] + len[2] + len[3] + 4; int rem = 16 - slen; for (int rmask = 0; rmask < 1<<rem; rmask++) { int mask = 0; char shuf[16] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; int pos = 0; for (int i = 0; i < 4; i++) { for (int j = 0; j < len[i]; j++) { shuf[(3-i) * 4 + (len[i]-1-j)] = pos; pos++; } mask ^= (1<<pos); pos++; } mask ^= (rmask<<slen); _mm_store_si128(&shuffleTable[mask], _mm_loadu_si128((__m128i*)shuf)); } } }
评估:
由于矢量化技术,该解决方案的速度明显加快,性能比原始代码高出 7.8 倍。它在 3.4 GHz 处理器的单核上每秒可处理约 3.36 亿个 IP 地址。
以上是如何使用矢量化技术来加速 IPv4 地址从字符串到整数的转换?的详细内容。更多信息请关注PHP中文网其他相关文章!