search

Home  >  Q&A  >  body text

c++ - 平方根倒数算法的效率比较的一个问题

问题源于这样一道作业题:

作业中提到的的那个invSqrt的代码如下:

float Q_rsqrt( float number )
{
    long i;
    float x2, y;
    const float threehalfs = 1.5F;

    x2 = number * 0.5F;
    y  = number;
    i  = * ( long * ) &y;                       // evil floating point bit level hacking
    i  = 0x5f3759df - ( i >> 1 );               // what the fuck?
    y  = * ( float * ) &i;
    y  = y * ( threehalfs - ( x2 * y * y ) );   // 1st iteration 
//      y  = y * ( threehalfs - ( x2 * y * y ) );   // 2nd iteration, this can be removed

    return y;
}

然后就自己写了个比较性能和精度的程序:

#include<stdio.h>
#include<math.h>
#include<time.h>
float Q_rsqrt(float number)        //从wiki上获取的源代码
{
    long i;
    float x2, y;
    const float threehalfs = 1.5F;
    x2 = number * 0.5F;
    y = number;
    i = *(long *)&y;                       // evil floating point bit level hacking
    i = 0x5f3759df - (i >> 1);               // what the fuck?
    y = *(float *)&i;
    y = y * (threehalfs - (x2 * y * y));   // 1st iteration
    y = y * (threehalfs - (x2 * y * y));   // 2nd iteration, this can be removed
    return y;
}

int main()
{
    int LoopCount = 1000000;        //循环计算次数
    float dur = 0;        //时间间隔,初始化为0
    float x;        //待计算数
    printf("Please input a number:");        //输入提示
    scanf_s("%f", &x, sizeof(float));

    float y;
    clock_t t1 = clock();
    for (int i = 0; i < LoopCount; i++) {
        y =(float)( 1 / sqrt(x));
    }
    t1 = clock() - t1;
    printf("1/sqrt(%f)=%f\n", x, y);
    printf("%ld\n", t1);
    printf("Use Time_1:%.10f s\n", ((float)t1 / CLOCKS_PER_SEC));

    float z;
    clock_t t2 = clock();
    for (int j = 0; j < LoopCount; j++) {
        z = Q_rsqrt(x);
    }
    t2 = clock() - t2;
    printf("Q_rsqrt(%f)=%f\n", x, z);
    printf("%ld\n", t2);
    printf("Use Time_2:%.10f s\n", ((float)t2 / CLOCKS_PER_SEC));
    
    getchar();
    getchar();
    return 0;
}

之后,出现了一个很奇怪的问题:

是的。。。C中的库函数效率基本上是下面那个算法的4倍!!!
但是,如果用C++中的库函数:

#include<stdio.h>
#include<cmath>
#include<time.h>
float Q_rsqrt(float number)        //从wiki上获取的源代码
{
    long i;
    float x2, y;
    const float threehalfs = 1.5F;
    x2 = number * 0.5F;
    y = number;
    i = *(long *)&y;                       // evil floating point bit level hacking
    i = 0x5f3759df - (i >> 1);               // what the fuck?
    y = *(float *)&i;
    y = y * (threehalfs - (x2 * y * y));   // 1st iteration
    y = y * (threehalfs - (x2 * y * y));   // 2nd iteration, this can be removed
    return y;
}

int main()
{
    int LoopCount = 1000000;        //循环计算次数
    float dur = 0;        //时间间隔,初始化为0
    float x;        //待计算数
    printf("Please input a number:");        //输入提示
    scanf_s("%f", &x, sizeof(float));

    float y;
    clock_t t1 = clock();
    for (int i = 0; i < LoopCount; i++) {
        y =( 1 / sqrt(x));
    }
    t1 = clock() - t1;
    printf("1/sqrt(%f)=%f\n", x, y);
    printf("%ld\n", t1);
    printf("Use Time_1:%.10f s\n", ((float)t1 / CLOCKS_PER_SEC));

    float z;
    clock_t t2 = clock();
    for (int j = 0; j < LoopCount; j++) {
        z = Q_rsqrt(x);
    }
    t2 = clock() - t2;
    printf("Q_rsqrt(%f)=%f\n", x, z);
    printf("%ld\n", t2);
    printf("Use Time_2:%.10f s\n", ((float)t2 / CLOCKS_PER_SEC));
    
    getchar();
    getchar();
    return 0;
}

然后跑出来的结果是这样的:

因此。。。似乎C的效率比C++高了很多很多?现在在VS2015中的实现就连那个神奇的快速算法也比不过了?(还是。。。我这样的比较是不对的。。。)

疑惑中。。。

阿神阿神2804 days ago841

reply all(2)I'll reply

  • 伊谢尔伦

    伊谢尔伦2017-04-17 13:35:02

    Any issue involving efficiency comparison, the first few prerequisites to be determined are:

    • Whether the same compiler is used

    • Whether the same compilation switch is used (for example, if optimization is turned on)

    • Whether it is running under the same environment configuration

    • Whether the running results are accurate and quantitative and repeatable

    After the above conditions are determined, the reliability of the results can be confirmed, and then the reasons for the results can be considered.

    I don’t know if the above prerequisites have been considered in the question.

    The question only tested the results of two algorithms in the same case. Maybe this case is the best case for one algorithm and the worst case for the other algorithm, so the results cannot explain the problem. For example, for an algorithm that generally has a complexity of O(n log n), the worst case may be O(n^2) and the optimal case may be O(n). The merits of an algorithm cannot be determined by just one case.

    reply
    0
  • PHPz

    PHPz2017-04-17 13:35:02

    Additional to @jaege’s answer. You only calculated two values, which makes it difficult to explain the situation. If you want to compare, at least run some more values, such as tens of thousands of values, and then compare.

    reply
    0
  • Cancelreply