double vectors_dot_prod(const double *x, const double *y, int n)
{
double res = 0.0;
int i;
for (i = 0; i < n; i++)
{
res += x[i] * y[i];
}
return res;
}
void matrix_vector_mult(const double **mat, const double *vec,
double *result, int rows, int cols)
{ // in matrix form: result = mat * vec;
int i;
for (i = 0; i < rows; i++)
{
result[i] = vectors_dot_prod(mat[i], vec, cols);
}
}
double vectors_dot_prod2(const double *x, const double *y, int n)
{
double res = 0.0;
int i = 0;
for (; i <= n-4; i+=4)
{
res += (x[i] * y[i] +
x[i+1] * y[i+1] +
x[i+2] * y[i+2] +
x[i+3] * y[i+3]);
}
for (; i < n; i++)
{
res += x[i] * y[i];
}
return res;
}
void matrix_vector_mult2(const double **mat, const double *vec,
double *result, int rows, int cols)
{ // in matrix form: result = mat * vec;
int i;
for (i = 0; i < rows; i++)
{
result[i] = vectors_dot_prod2(mat[i], vec, cols);
}
}
#include <time.h>
#include <stdio.h>
int main(int argc, const char *argv[])
{
static double mat[300][50];
for (int i=0; i<300; i++)
for (int j=0; j<50; j++)
mat[i][j] = (i+j);
static const double *matp[300];
for (int i=0; i<300; i++)
matp[i] = &mat[i][0];
static double vector[50];
for (int i=0; i<50; i++)
vector[i] = i*i;
static double result[300];
clock_t start = clock();
for (int n=0; n<100000; n++)
{
matrix_vector_mult2(matp, vector, result, 300, 50);
}
clock_t stop = clock();
printf("Computing time = %0.3fus\n",
double(stop - start)/CLOCKS_PER_SEC/100000*1000000);
return 0;
}
ZG91YmxlIHZlY3RvcnNfZG90X3Byb2QoY29uc3QgZG91YmxlICp4LCBjb25zdCBkb3VibGUgKnksIGludCBuKQp7CiAgICBkb3VibGUgcmVzID0gMC4wOwogICAgaW50IGk7CiAgICBmb3IgKGkgPSAwOyBpIDwgbjsgaSsrKQogICAgewogICAgICAgIHJlcyArPSB4W2ldICogeVtpXTsKICAgIH0KICAgIHJldHVybiByZXM7Cn0KCnZvaWQgbWF0cml4X3ZlY3Rvcl9tdWx0KGNvbnN0IGRvdWJsZSAqKm1hdCwgY29uc3QgZG91YmxlICp2ZWMsCiAgICAgICAgICAgICAgICAgICAgICAgIGRvdWJsZSAqcmVzdWx0LCBpbnQgcm93cywgaW50IGNvbHMpCnsgLy8gaW4gbWF0cml4IGZvcm06IHJlc3VsdCA9IG1hdCAqIHZlYzsKICAgIGludCBpOwogICAgZm9yIChpID0gMDsgaSA8IHJvd3M7IGkrKykKICAgIHsKICAgICAgICByZXN1bHRbaV0gPSB2ZWN0b3JzX2RvdF9wcm9kKG1hdFtpXSwgdmVjLCBjb2xzKTsKICAgIH0KfQoKZG91YmxlIHZlY3RvcnNfZG90X3Byb2QyKGNvbnN0IGRvdWJsZSAqeCwgY29uc3QgZG91YmxlICp5LCBpbnQgbikKewogICAgZG91YmxlIHJlcyA9IDAuMDsKICAgIGludCBpID0gMDsKICAgIGZvciAoOyBpIDw9IG4tNDsgaSs9NCkKICAgIHsKICAgICAgICByZXMgKz0gKHhbaV0gKiB5W2ldICsKICAgICAgICAgICAgICAgIHhbaSsxXSAqIHlbaSsxXSArCiAgICAgICAgICAgICAgICB4W2krMl0gKiB5W2krMl0gKwogICAgICAgICAgICAgICAgeFtpKzNdICogeVtpKzNdKTsKICAgIH0KICAgIGZvciAoOyBpIDwgbjsgaSsrKQogICAgewogICAgICAgIHJlcyArPSB4W2ldICogeVtpXTsKICAgIH0KICAgIHJldHVybiByZXM7Cn0KCnZvaWQgbWF0cml4X3ZlY3Rvcl9tdWx0Mihjb25zdCBkb3VibGUgKiptYXQsIGNvbnN0IGRvdWJsZSAqdmVjLAogICAgICAgICAgICAgICAgICAgICAgICAgZG91YmxlICpyZXN1bHQsIGludCByb3dzLCBpbnQgY29scykKeyAvLyBpbiBtYXRyaXggZm9ybTogcmVzdWx0ID0gbWF0ICogdmVjOwogICAgaW50IGk7CiAgICBmb3IgKGkgPSAwOyBpIDwgcm93czsgaSsrKQogICAgewogICAgICAgIHJlc3VsdFtpXSA9IHZlY3RvcnNfZG90X3Byb2QyKG1hdFtpXSwgdmVjLCBjb2xzKTsKICAgIH0KfQoKI2luY2x1ZGUgPHRpbWUuaD4KI2luY2x1ZGUgPHN0ZGlvLmg+CgppbnQgbWFpbihpbnQgYXJnYywgY29uc3QgY2hhciAqYXJndltdKQp7CiAgICBzdGF0aWMgZG91YmxlIG1hdFszMDBdWzUwXTsKICAgIGZvciAoaW50IGk9MDsgaTwzMDA7IGkrKykKICAgICAgICBmb3IgKGludCBqPTA7IGo8NTA7IGorKykKICAgICAgICAgICAgbWF0W2ldW2pdID0gKGkraik7CiAgICBzdGF0aWMgY29uc3QgZG91YmxlICptYXRwWzMwMF07CiAgICBmb3IgKGludCBpPTA7IGk8MzAwOyBpKyspCiAgICAgICAgbWF0cFtpXSA9ICZtYXRbaV1bMF07CgogICAgc3RhdGljIGRvdWJsZSB2ZWN0b3JbNTBdOwogICAgZm9yIChpbnQgaT0wOyBpPDUwOyBpKyspCiAgICAgICAgdmVjdG9yW2ldID0gaSppOwoKICAgIHN0YXRpYyBkb3VibGUgcmVzdWx0WzMwMF07CgogICAgY2xvY2tfdCBzdGFydCA9IGNsb2NrKCk7CiAgICBmb3IgKGludCBuPTA7IG48MTAwMDAwOyBuKyspCiAgICB7CiAgICAgICAgbWF0cml4X3ZlY3Rvcl9tdWx0MihtYXRwLCB2ZWN0b3IsIHJlc3VsdCwgMzAwLCA1MCk7CiAgICB9CiAgICBjbG9ja190IHN0b3AgPSBjbG9jaygpOwogICAgcHJpbnRmKCJDb21wdXRpbmcgdGltZSA9ICUwLjNmdXNcbiIsCiAgICAgICAgICAgZG91YmxlKHN0b3AgLSBzdGFydCkvQ0xPQ0tTX1BFUl9TRUMvMTAwMDAwKjEwMDAwMDApOwogICAgcmV0dXJuIDA7Cn0K