double vectors_dot_prod(const double *x, const double *y, int n)
{
    double res = 0.0;
    int i;
    for (i = 0; i < n; i++)
    {
        res += x[i] * y[i];
    }
    return res;
}

void matrix_vector_mult(const double **mat, const double *vec,
                        double *result, int rows, int cols)
{ // in matrix form: result = mat * vec;
    int i;
    for (i = 0; i < rows; i++)
    {
        result[i] = vectors_dot_prod(mat[i], vec, cols);
    }
}

double vectors_dot_prod2(const double *x, const double *y, int n)
{
    double res = 0.0;
    int i = 0;
    for (; i <= n-4; i+=4)
    {
        res += (x[i] * y[i] +
                x[i+1] * y[i+1] +
                x[i+2] * y[i+2] +
                x[i+3] * y[i+3]);
    }
    for (; i < n; i++)
    {
        res += x[i] * y[i];
    }
    return res;
}

void matrix_vector_mult2(const double **mat, const double *vec,
                         double *result, int rows, int cols)
{ // in matrix form: result = mat * vec;
    int i;
    for (i = 0; i < rows; i++)
    {
        result[i] = vectors_dot_prod2(mat[i], vec, cols);
    }
}

#include <time.h>
#include <stdio.h>

int main(int argc, const char *argv[])
{
    static double mat[300][50];
    for (int i=0; i<300; i++)
        for (int j=0; j<50; j++)
            mat[i][j] = (i+j);
    static const double *matp[300];
    for (int i=0; i<300; i++)
        matp[i] = &mat[i][0];

    static double vector[50];
    for (int i=0; i<50; i++)
        vector[i] = i*i;

    static double result[300];

    clock_t start = clock();
    for (int n=0; n<100000; n++)
    {
        matrix_vector_mult2(matp, vector, result, 300, 50);
    }
    clock_t stop = clock();
    printf("Computing time = %0.3fus\n",
           double(stop - start)/CLOCKS_PER_SEC/100000*1000000);
    return 0;
}
