搜尋

首頁  >  問答  >  主體

c++ - 为什么Boost库的搜索函数明显比std的search慢

在学习boost的时候测试了一下boost的algorithm中的三个search函数,分别是Boyer-Moore Search,Boyer-Moore-Horspool Search,Knuth-Morris-Pratt Search,同时也测试了std的search函数,测试的结果有点意外,std的search花的时间比前面三个快了两个数量级,问题出在哪儿?测试的代码如下:

#define _CRT_SECURE_NO_WARNINGS

#include <boost/algorithm/searching/boyer_moore.hpp>
#include <boost/algorithm/searching/boyer_moore_horspool.hpp>
#include <boost\algorithm\searching\knuth_morris_pratt.hpp>
#include <string>
#include <vector>
#include <algorithm>
#include <iostream>
#include <Windows.h>
#include <time.h>
using namespace std; 
using namespace boost::algorithm;



void printMessage(char* msg, int startX, int startY)
{
    HANDLE hd;
    COORD pos;
    pos.X = startX;
    pos.Y = startY;
    hd = GetStdHandle(STD_OUTPUT_HANDLE);
    SetConsoleCursorPosition(hd, pos);
    printf("%s", msg);
}

int main(int argc, char ** argv)
{   
    printf("Generating test set\n");
    vector<int> vec;
    srand(unsigned int(time(NULL)));
    for (int i = 0; i < 10000; i++)
    {
        vec.push_back(rand() % 15000);
        char msg[50];
        sprintf(msg,"%.2f%%\n", i*1.0/99.99);

        printMessage(msg, 0, 1);
    }


    LARGE_INTEGER t1, t2, tc;
    QueryPerformanceFrequency(&tc);
    QueryPerformanceCounter(&t1);


    printf("Testing boyer moore search\n");

    int count = 0;
    for (int i = 0; i < 1000; i++)
    {
        vector<int> key;
        key.push_back(rand() % 15000);
        if (boyer_moore_search(vec.begin(), vec.end(), key.begin(), key.end()) != vec.end())
        {
            count++;
        }
        char msg[50];
        sprintf(msg,"%.2f%%\n", i / 9.99);
        printMessage(msg, 0, 3);
    }
    cout << "Hit " << count << "times\n";
    QueryPerformanceCounter(&t2);
    printf("Costing time: %.2f s\n", (t2.QuadPart - t1.QuadPart)*1.0 / tc.QuadPart);
    system("pause");

    QueryPerformanceCounter(&t1);
    printf("Testing boyer_moore_horspool search\n");
    count = 0;
    for (int i = 0; i < 1000; i++)
    {
        vector<int> key;
        key.push_back(rand() % 15000);
        if (boyer_moore_horspool_search(vec.begin(), vec.end(), key.begin(), key.end()) != vec.end())
        {
            count++;
        }
        char msg[50];
        sprintf(msg, "%.2f%%\n", i / 9.99);
        printMessage(msg, 0, 8);
    }
    cout << "Hit " << count << "times\n";
    QueryPerformanceCounter(&t2);
    printf("Costing time: %.2f s\n", (t2.QuadPart - t1.QuadPart)*1.0 / tc.QuadPart);
    system("pause");

    QueryPerformanceCounter(&t1);
    printf("Testing knuth_morris_pratt search\n");
    count = 0;
    for (int i = 0; i < 1000; i++)
    {
        vector<int> key;
        key.push_back(rand() % 15000);
        if (knuth_morris_pratt_search(vec.begin(), vec.end(), key.begin(), key.end()) != vec.end())
        {
            count++;
        }
        char msg[50];
        sprintf(msg, "%.2f%%\n", i/ 9.99);
        printMessage(msg, 0, 13);
    }
    cout << "Hit " << count << "times\n";
    QueryPerformanceCounter(&t2);
    printf("Costing time: %.2f s\n", (t2.QuadPart - t1.QuadPart)*1.0 / tc.QuadPart);
    system("pause");


    QueryPerformanceCounter(&t1);
    printf("Tesing std search\n");
    count = 0;
    for (int i = 0; i < 1000; i++)
    {
        vector<int> key;
        key.push_back(rand() % 15000);
        if (search(vec.begin(), vec.end(), key.begin(), key.end()) != vec.end())
        {
            count++;
        }
        char msg[50];
        sprintf(msg, "%.2f%%\n", i/ 9.99);
        printMessage(msg, 0, 18);
    }
    cout << "Hit " << count << "times\n";
    QueryPerformanceCounter(&t2);
    printf("Costing time: %.2f s\n", (t2.QuadPart - t1.QuadPart)*1.0 / tc.QuadPart);
    system("pause");

    return 0;
}

测试环境是vs2013,测试结果如下图:

是什么导致了这么大的差距?

PHP中文网PHP中文网2806 天前657

全部回覆(1)我來回復

  • 天蓬老师

    天蓬老师2017-04-17 11:59:50

    問題在於幾次測試所使用的來源串相同,但目標串不同;應該要保證幾次的測試的目標串也相同,才能比較演算法本身的差異(「控制變數法」)。

    例如暴力法串配對:

    pythondef search(source, target):
        for si in range(len(source)):
            for ti in range(len(target)):
                if si + ti >= len(source) or source[si+ti] != target[ti]:
                    break
                if ti == len(target)-1:
                    return si
        return -1
    

    最壞情況下的時間複雜度是O(mn), 其中m, n分別是source, target字串長度,例如有兩個字串:
    source: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab
    target: aaaaab
    這種情況就會是O(mn)的複雜度,但如果target是:baaaaa
    則是O(m)。

    可以把程式的key填充放到vec填滿的循環裡去,都是隨機數沒什麼關係,當然也可以單獨用一個循環。

    個人認為,單純用隨機數做source和target串,幾個演算法的表現都會接近O(m);若想凸顯幾個演算法的複雜度差異,可以針對具體演算法實現構造特殊case(如上面的source, target特例就是根據暴力法的具體實現構造的)

    回覆
    0
  • 取消回覆