增加string的分配性能

我将Java GCtesting程序移植到C ++(请参阅下面的代码)以及Python。 Java和Python的性能比C ++要好得多,我认为这是因为每次调用new函数都必须创build这些string。 我已经尝试过使用Boost的fast_pool_allocator但实际上700毫秒到1200毫秒的性能恶化。 我使用分配器是错误的,还是有什么我应该做的?

编辑:用g++ -O3 -march=native --std=c++11 garbage.cpp -lboost_system编译g++ -O3 -march=native --std=c++11 garbage.cpp -lboost_system 。 g ++是版本4.8.1一个迭代需要Python约300ms,Java约50ms。 std::allocator给出大约700ms, boost::fast_pool_allocator给出大约1200ms。

 #include <string> #include <vector> #include <chrono> #include <list> #include <iostream> #include <boost/pool/pool_alloc.hpp> #include <memory> //#include <gc/gc_allocator.h> using namespace std; #include <sstream> typedef boost::fast_pool_allocator<char> c_allocator; //typedef std::allocator<char> c_allocator; typedef basic_string<char, char_traits<char>, c_allocator> pool_string; namespace patch { template <typename T> pool_string to_string(const T& in) { std::basic_stringstream<char, char_traits<char>, c_allocator> stm; stm << in; return stm.str(); } } #include "mytime.hpp" class Garbage { public: vector<pool_string> outer; vector<pool_string> old; const int nThreads = 1; //static auto time = chrono::high_resolution_clock(); void go() { // outer.resize(1000000); //old.reserve(1000000); auto tt = mytime::msecs(); for (int i = 0; i < 10; ++i) { if (i % 100 == 0) { cout << "DOING AN OLD" << endl; doOld(); tt = mytime::msecs(); } for (int j = 0; j < 1000000/nThreads; ++j) outer.push_back(patch::to_string(j)); outer.clear(); auto t = mytime::msecs(); cout << (t - tt) << endl; tt = t; } } void doOld() { old.clear(); for (int i = 0; i < 1000000/nThreads; ++i) old.push_back(patch::to_string(i)); } }; int main() { Garbage().go(); } 

    问题是你每次使用一个新的字符串流来转换一个整数。

    修理它:

     namespace patch { template <typename T> pool_string to_string(const T& in) { return boost::lexical_cast<pool_string>(in); } } 

    现在的时机是:

     DOING AN OLD 0.175462 0.0670085 0.0669926 0.0687969 0.0692518 0.0669318 0.0669196 0.0669187 0.0668962 0.0669185 real 0m0.801s user 0m0.784s sys 0m0.016s 

    看到它住在Coliru

    完整的代码供参考:

     #include <boost/pool/pool_alloc.hpp> #include <chrono> #include <iostream> #include <list> #include <memory> #include <sstream> #include <string> #include <vector> #include <boost/lexical_cast.hpp> //#include <gc/gc_allocator.h> using string = std::string; namespace patch { template <typename T> string to_string(const T& in) { return boost::lexical_cast<string>(in); } } class Timer { typedef std::chrono::high_resolution_clock clock; clock::time_point _start; public: Timer() { reset(); } void reset() { _start = now(); } double elapsed() { using namespace std::chrono; auto e = now() - _start; return duration_cast<nanoseconds>(e).count()*1.0e-9; } clock::time_point now() { return clock::now(); } }; class Garbage { public: std::vector<string> outer; std::vector<string> old; const int nThreads = 1; void go() { outer.resize(1000000); //old.reserve(1000000); Timer timer; for (int i = 0; i < 10; ++i) { if (i % 100 == 0) { std::cout << "DOING AN OLD" << std::endl; doOld(); } for (int j = 0; j < 1000000/nThreads; ++j) outer.push_back(patch::to_string(j)); outer.clear(); std::cout << timer.elapsed() << std::endl; timer.reset(); } } void doOld() { old.clear(); for (int i = 0; i < 1000000/nThreads; ++i) old.push_back(patch::to_string(i)); } }; int main() { Garbage().go(); } 

    由于我没有在我的机器上使用boost,所以我简化了代码以使用标准的C ++ 11 to_string (因此意外“修复”了发现的问题),并得到了以下结果:

     #include <string> #include <vector> #include <chrono> #include <list> #include <iostream> #include <memory> //#include <gc/gc_allocator.h> #include <sstream> using namespace std; class Timer { typedef std::chrono::high_resolution_clock clock; clock::time_point _start; public: Timer() { reset(); } void reset() { _start = now(); } double elapsed() { using namespace std::chrono; auto e = now() - _start; return duration_cast<nanoseconds>(e).count()*1.0e-9; } clock::time_point now() { return clock::now(); } }; class Garbage { public: vector<string> outer; vector<string> old; const int nThreads = 1; Timer timer; void go() { // outer.resize(1000000); //old.reserve(1000000); for (int i = 0; i < 10; ++i) { if (i % 100 == 0) { cout << "DOING AN OLD" << endl; doOld(); } for (int j = 0; j < 1000000/nThreads; ++j) outer.push_back(to_string(j)); outer.clear(); cout << timer.elapsed() << endl; timer.reset(); } } void doOld() { old.clear(); for (int i = 0; i < 1000000/nThreads; ++i) old.push_back(to_string(i)); } }; int main() { Garbage().go(); } 

    编译:

     $ g++ -O3 -std=c++11 gc.cpp $ ./a.out DOING AN OLD 0.414637 0.189082 0.189143 0.186336 0.184449 0.18504 0.186302 0.186055 0.183123 0.186835 

    从2014年4月18日星期五开始,使用相同的编译器选项可以实现类似的结果。

    我的处理器是AMD Phenom(tm)II X4 965,运行频率为3.6GHz(如果我没记错的话)。