#include <time.h>
#include <stdio.h>
#include <stdlib.h>

int main() {
	// create and init array
	const size_t arr_size = 50000000;
	double *arr = (double*)malloc(arr_size * sizeof(double));
	size_t i, k;
	for (i = 0; i < arr_size; ++i)
		arr[i] = 10;

	double result = 0;
	clock_t start, end;
	const int c_iterations = 10;	// iterations of experiment
	float faster_avg = 0;
	// -----------------------------------------------------------------


	for (k = 0; k < c_iterations; ++k) {
		result = 0;

		// Sequential
		start = clock();

		for (i = 0; i < arr_size; i += 8) {
			//result += arr[i + 0] + arr[i + 1] + arr[i + 2] + arr[i + 3] +	arr[i + 4] + arr[i + 5] + arr[i + 6] + arr[i + 7];
			
			result += arr[i + 0];
			result += arr[i + 1];
			result += arr[i + 2];
			result += arr[i + 3];
			result += arr[i + 4];
			result += arr[i + 5];
			result += arr[i + 6];
			result += arr[i + 7];			
		}

		end = clock();
		const float c_time_seq = (float)(end - start) / CLOCKS_PER_SEC;
		printf("seq: %f sec, res: %u, ", c_time_seq, result);
		// -----------------------------------------------------------------

		result = 0;

		// IPL-optimization
		start = clock();

		register double v0, v1, v2, v3, v4, v5, v6, v7;
		v0 = v1 = v2 = v3 = v4 = v5 = v6 = v7 = 0;

		for (i = 0; i < arr_size; i += 8) {
			v0 += arr[i + 0];
			v1 += arr[i + 1];
			v2 += arr[i + 2];
			v3 += arr[i + 3];
			v4 += arr[i + 4];
			v5 += arr[i + 5];
			v6 += arr[i + 6];
			v7 += arr[i + 7];
		}
		result = v0 + v1 + v2 + v3 + v4 + v5 + v6 + v7;


		end = clock();
		const float c_time_ipl = (float)(end - start) / CLOCKS_PER_SEC;
		const float c_faster = c_time_seq / c_time_ipl;

		printf("ipl: %f sec, faster %f X, res: %u \n", c_time_ipl, c_faster, result);
		faster_avg += c_faster;
	}

	faster_avg = faster_avg / c_iterations;
	printf("faster AVG: %f \n", faster_avg);


	getchar();
	return 0;
}