#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <omp.h>
void down_sweep(int *a, int n) {
a[n - 1] = 0; // Set the identity
for (int d = (int)log2(n) - 1; d >= 0; --d) {
#pragma omp parallel for
for (int i = 0; i <= n - (1 << (d+1)); i += (1 << (d+1))) {
int t = a[i + (1 << d) - 1];
a[i + (1 << d) - 1] = a[i + (1 << (d+1)) - 1];
a[i + (1 << (d+1)) - 1] += t;
}
}
}
int main() {
int input_size = 1000000;
int next_power_of_two
= (int)pow(2, ceil(log2
(input_size
))); int *input
= malloc(next_power_of_two
* sizeof(int)); if (input == NULL) {
return EXIT_FAILURE;
}
for (int i = 0; i < next_power_of_two; i++) {
input[i] = 3;
}
struct timespec start_time, end_time;
clock_gettime(CLOCK_MONOTONIC, &start_time);
down_sweep(input, next_power_of_two);
clock_gettime(CLOCK_MONOTONIC, &end_time);
double seconds = (end_time.tv_sec - start_time.tv_sec) +
(end_time.tv_nsec - start_time.tv_nsec) / 1e9;
double operations = (double)(next_power_of_two - 1); // Number of operations
double gdlops = (operations / seconds) / 1e9;
printf("Time taken: %f seconds\n", seconds
); printf("GDLOPS: %f\n", gdlops
);
return 0;
}
CiNpbmNsdWRlIDxzdGRpby5oPgojaW5jbHVkZSA8c3RkbGliLmg+CiNpbmNsdWRlIDxtYXRoLmg+CiNpbmNsdWRlIDx0aW1lLmg+CiNpbmNsdWRlIDxvbXAuaD4KCnZvaWQgZG93bl9zd2VlcChpbnQgKmEsIGludCBuKSB7CiAgICBhW24gLSAxXSA9IDA7IC8vIFNldCB0aGUgaWRlbnRpdHkKCiAgICBmb3IgKGludCBkID0gKGludClsb2cyKG4pIC0gMTsgZCA+PSAwOyAtLWQpIHsKICAgICAgICAjcHJhZ21hIG9tcCBwYXJhbGxlbCBmb3IKICAgICAgICBmb3IgKGludCBpID0gMDsgaSA8PSBuIC0gKDEgPDwgKGQrMSkpOyBpICs9ICgxIDw8IChkKzEpKSkgewogICAgICAgICAgICBpbnQgdCA9IGFbaSArICgxIDw8IGQpIC0gMV07CiAgICAgICAgICAgIGFbaSArICgxIDw8IGQpIC0gMV0gPSBhW2kgKyAoMSA8PCAoZCsxKSkgLSAxXTsKICAgICAgICAgICAgYVtpICsgKDEgPDwgKGQrMSkpIC0gMV0gKz0gdDsKICAgICAgICB9CiAgICB9Cn0KaW50IG1haW4oKSB7CiAgICBpbnQgaW5wdXRfc2l6ZSA9IDEwMDAwMDA7CiAgICBpbnQgbmV4dF9wb3dlcl9vZl90d28gPSAoaW50KXBvdygyLCBjZWlsKGxvZzIoaW5wdXRfc2l6ZSkpKTsKICAgIGludCAqaW5wdXQgPSBtYWxsb2MobmV4dF9wb3dlcl9vZl90d28gKiBzaXplb2YoaW50KSk7CiAgICBpZiAoaW5wdXQgPT0gTlVMTCkgewogICAgICAgIHJldHVybiBFWElUX0ZBSUxVUkU7CiAgICB9CgogICAgZm9yIChpbnQgaSA9IDA7IGkgPCBuZXh0X3Bvd2VyX29mX3R3bzsgaSsrKSB7CiAgICAgICAgaW5wdXRbaV0gPSAzOwogICAgfQoKICAgIHN0cnVjdCB0aW1lc3BlYyBzdGFydF90aW1lLCBlbmRfdGltZTsKICAgIGNsb2NrX2dldHRpbWUoQ0xPQ0tfTU9OT1RPTklDLCAmc3RhcnRfdGltZSk7CgogICAgZG93bl9zd2VlcChpbnB1dCwgbmV4dF9wb3dlcl9vZl90d28pOwoKICAgIGNsb2NrX2dldHRpbWUoQ0xPQ0tfTU9OT1RPTklDLCAmZW5kX3RpbWUpOwogICAgZG91YmxlIHNlY29uZHMgPSAoZW5kX3RpbWUudHZfc2VjIC0gc3RhcnRfdGltZS50dl9zZWMpICsKICAgICAgICAgICAgICAgICAgICAgKGVuZF90aW1lLnR2X25zZWMgLSBzdGFydF90aW1lLnR2X25zZWMpIC8gMWU5OwogICAgZG91YmxlIG9wZXJhdGlvbnMgPSAoZG91YmxlKShuZXh0X3Bvd2VyX29mX3R3byAtIDEpOyAvLyBOdW1iZXIgb2Ygb3BlcmF0aW9ucwogICAgZG91YmxlIGdkbG9wcyA9IChvcGVyYXRpb25zIC8gc2Vjb25kcykgLyAxZTk7CgogICAgcHJpbnRmKCJUaW1lIHRha2VuOiAlZiBzZWNvbmRzXG4iLCBzZWNvbmRzKTsKICAgIHByaW50ZigiR0RMT1BTOiAlZlxuIiwgZ2Rsb3BzKTsKCiAgICBmcmVlKGlucHV0KTsKICAgIHJldHVybiAwOwp9Cg==