fork download
  1. #include <iostream>
  2. #include <vector>
  3. #include <chrono>
  4. using namespace std;
  5.  
  6. size_t creation1_ns = 0;
  7. size_t creation2_ns = 0;
  8.  
  9. size_t iteration1_ns = 0;
  10. size_t iteration2_ns = 0;
  11.  
  12. double test1(size_t w, size_t h){
  13. auto start = chrono::steady_clock::now();
  14. vector<vector<double>> data;
  15. data.reserve(h);
  16. for(size_t row = 0; row < h; ++row){
  17. data.emplace_back(vector<double>(w));
  18. }
  19. creation1_ns += chrono::duration_cast<chrono::nanoseconds>(chrono::steady_clock::now() - start).count();
  20.  
  21. start = chrono::steady_clock::now();
  22. for(size_t row = 0; row < h; ++row){
  23. for(size_t col = 0; col < w; ++col){
  24. data[row][col] += rand(); // Write something
  25. }
  26. }
  27.  
  28. double sum = 0.0;
  29. for(size_t row = 0; row < h; ++row){
  30. for(size_t col = 0; col < w; ++col){
  31. sum += data[row][col]; // Read something
  32. }
  33. }
  34. iteration1_ns += chrono::duration_cast<chrono::nanoseconds>(chrono::steady_clock::now() - start).count();
  35.  
  36. return sum;
  37. }
  38.  
  39.  
  40. double test2(size_t w, size_t h){
  41. auto start = chrono::steady_clock::now();
  42. vector<double> data(w*h);
  43. creation2_ns += chrono::duration_cast<chrono::nanoseconds>(chrono::steady_clock::now() - start).count();
  44.  
  45. start = chrono::steady_clock::now();
  46. for(size_t row = 0; row < h; ++row){
  47. size_t offs = row*w;
  48. for(size_t col = 0; col < w; ++col){
  49. data[offs + col] += rand(); // Write something
  50. }
  51. }
  52.  
  53. double sum = 0.0;
  54. for(size_t row = 0; row < h; ++row){
  55. size_t offs = row*w;
  56. for(size_t col = 0; col < w; ++col){
  57. sum += data[offs + col]; // Read something
  58. }
  59. }
  60. iteration2_ns += chrono::duration_cast<chrono::nanoseconds>(chrono::steady_clock::now() - start).count();
  61.  
  62. return sum;
  63. }
  64.  
  65. int main() {
  66. srand(2);
  67. const auto thoroughness = 50000.0;
  68. auto s = 0;
  69.  
  70. for(int h = 16; h < 10000; h *= 8){
  71. for(int w = 16; w < 10000; w *= 8){
  72. creation1_ns = 0;
  73. creation2_ns = 0;
  74. iteration1_ns = 0;
  75. iteration2_ns = 0;
  76.  
  77. auto trials = thoroughness/(w*h);
  78.  
  79. for(int r = 0; r < trials; ++r){
  80. s+= test1(w, h);
  81. s+= test2(w, h);
  82. }
  83.  
  84. cout<<w<<"x"<<h
  85. <<" alloc speed up: "<< (double(creation1_ns) / creation2_ns)<<"x"
  86. <<" iter speed up: "<< (double(iteration1_ns) / iteration2_ns)<<"x" <<endl;
  87. }
  88. }
  89.  
  90.  
  91. cout<<"print sum to avoid the code from being removed: "<<s<<endl;
  92. return 0;
  93. }
  94.  
Success #stdin #stdout 2.72s 3472KB
stdin
Standard input is empty
stdout
16x16 alloc speed up: 1.97028x iter speed up: 1.01526x
128x16 alloc speed up: 1.62749x iter speed up: 1.08437x
1024x16 alloc speed up: 1.96126x iter speed up: 0.818857x
8192x16 alloc speed up: 1.08617x iter speed up: 1.07597x
16x128 alloc speed up: 2.60728x iter speed up: 1.05362x
128x128 alloc speed up: 1.9301x iter speed up: 1.05701x
1024x128 alloc speed up: 1.10907x iter speed up: 1.06326x
8192x128 alloc speed up: 1.07762x iter speed up: 1.04457x
16x1024 alloc speed up: 5.71674x iter speed up: 1.0753x
128x1024 alloc speed up: 1.1498x iter speed up: 1.02847x
1024x1024 alloc speed up: 1.03182x iter speed up: 1.04656x
8192x1024 alloc speed up: 1.02505x iter speed up: 1.00829x
16x8192 alloc speed up: 2.1236x iter speed up: 1.07109x
128x8192 alloc speed up: 1.16434x iter speed up: 1.04066x
1024x8192 alloc speed up: 1.16075x iter speed up: 1.03849x
8192x8192 alloc speed up: 1.0298x iter speed up: 1.04191x
print sum to avoid the code from being removed: -2147483648