#include <time.h> #include <stdio.h> #include <stdlib.h> int main() { // create and init array const size_t arr_size = 50000000; size_t i, k; for (i = 0; i < arr_size; ++i) arr[i] = 10; double result = 0; clock_t start, end; const int c_iterations = 10; // iterations of experiment float faster_avg = 0; // ----------------------------------------------------------------- for (k = 0; k < c_iterations; ++k) { result = 0; // Sequential for (i = 0; i < arr_size; i += 8) { //result += arr[i + 0] + arr[i + 1] + arr[i + 2] + arr[i + 3] + arr[i + 4] + arr[i + 5] + arr[i + 6] + arr[i + 7]; result += arr[i + 0]; result += arr[i + 1]; result += arr[i + 2]; result += arr[i + 3]; result += arr[i + 4]; result += arr[i + 5]; result += arr[i + 6]; result += arr[i + 7]; } const float c_time_seq = (float)(end - start) / CLOCKS_PER_SEC; // ----------------------------------------------------------------- result = 0; // IPL-optimization register double v0, v1, v2, v3, v4, v5, v6, v7; v0 = v1 = v2 = v3 = v4 = v5 = v6 = v7 = 0; for (i = 0; i < arr_size; i += 8) { v0 += arr[i + 0]; v1 += arr[i + 1]; v2 += arr[i + 2]; v3 += arr[i + 3]; v4 += arr[i + 4]; v5 += arr[i + 5]; v6 += arr[i + 6]; v7 += arr[i + 7]; } result = v0 + v1 + v2 + v3 + v4 + v5 + v6 + v7; const float c_time_ipl = (float)(end - start) / CLOCKS_PER_SEC; const float c_faster = c_time_seq / c_time_ipl; faster_avg += c_faster; } faster_avg = faster_avg / c_iterations; return 0; }
Standard input is empty
seq: 0.100000 sec, res: 0, ipl: 0.090000 sec, faster 1.111111 X, res: 0 seq: 0.100000 sec, res: 0, ipl: 0.090000 sec, faster 1.111111 X, res: 0 seq: 0.100000 sec, res: 0, ipl: 0.090000 sec, faster 1.111111 X, res: 0 seq: 0.100000 sec, res: 0, ipl: 0.100000 sec, faster 1.000000 X, res: 0 seq: 0.100000 sec, res: 0, ipl: 0.090000 sec, faster 1.111111 X, res: 0 seq: 0.100000 sec, res: 0, ipl: 0.090000 sec, faster 1.111111 X, res: 0 seq: 0.100000 sec, res: 0, ipl: 0.090000 sec, faster 1.111111 X, res: 0 seq: 0.110000 sec, res: 0, ipl: 0.080000 sec, faster 1.375000 X, res: 0 seq: 0.100000 sec, res: 0, ipl: 0.090000 sec, faster 1.111111 X, res: 0 seq: 0.110000 sec, res: 0, ipl: 0.080000 sec, faster 1.375000 X, res: 0 faster AVG: 1.152778