1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | double vectors_dot_prod(const double *x, const double *y, int n) { double res = 0.0; int i; for (i = 0; i < n; i++) { res += x[i] * y[i]; } return res; } void matrix_vector_mult(const double **mat, const double *vec, double *result, int rows, int cols) { // in matrix form: result = mat * vec; int i; for (i = 0; i < rows; i++) { result[i] = vectors_dot_prod(mat[i], vec, cols); } } double vectors_dot_prod2(const double *x, const double *y, int n) { double res = 0.0; int i = 0; for (; i <= n-4; i+=4) { res += (x[i] * y[i] + x[i+1] * y[i+1] + x[i+2] * y[i+2] + x[i+3] * y[i+3]); } for (; i < n; i++) { res += x[i] * y[i]; } return res; } void matrix_vector_mult2(const double **mat, const double *vec, double *result, int rows, int cols) { // in matrix form: result = mat * vec; int i; for (i = 0; i < rows; i++) { result[i] = vectors_dot_prod2(mat[i], vec, cols); } } #include <time.h> #include <stdio.h> int main(int argc, const char *argv[]) { static double mat[300][50]; for (int i=0; i<300; i++) for (int j=0; j<50; j++) mat[i][j] = (i+j); static const double *matp[300]; for (int i=0; i<300; i++) matp[i] = &mat[i][0]; static double vector[50]; for (int i=0; i<50; i++) vector[i] = i*i; static double result[300]; clock_t start = clock(); for (int n=0; n<100000; n++) { matrix_vector_mult(matp, vector, result, 300, 50); } clock_t stop = clock(); printf("Computing time = %0.3fus\n", double(stop - start)/CLOCKS_PER_SEC/100000*1000000); return 0; } |
ZG91YmxlIHZlY3RvcnNfZG90X3Byb2QoY29uc3QgZG91YmxlICp4LCBjb25zdCBkb3VibGUgKnksIGludCBuKQp7CiAgICBkb3VibGUgcmVzID0gMC4wOwogICAgaW50IGk7CiAgICBmb3IgKGkgPSAwOyBpIDwgbjsgaSsrKQogICAgewogICAgICAgIHJlcyArPSB4W2ldICogeVtpXTsKICAgIH0KICAgIHJldHVybiByZXM7Cn0KCnZvaWQgbWF0cml4X3ZlY3Rvcl9tdWx0KGNvbnN0IGRvdWJsZSAqKm1hdCwgY29uc3QgZG91YmxlICp2ZWMsCiAgICAgICAgICAgICAgICAgICAgICAgIGRvdWJsZSAqcmVzdWx0LCBpbnQgcm93cywgaW50IGNvbHMpCnsgLy8gaW4gbWF0cml4IGZvcm06IHJlc3VsdCA9IG1hdCAqIHZlYzsKICAgIGludCBpOwogICAgZm9yIChpID0gMDsgaSA8IHJvd3M7IGkrKykKICAgIHsKICAgICAgICByZXN1bHRbaV0gPSB2ZWN0b3JzX2RvdF9wcm9kKG1hdFtpXSwgdmVjLCBjb2xzKTsKICAgIH0KfQoKZG91YmxlIHZlY3RvcnNfZG90X3Byb2QyKGNvbnN0IGRvdWJsZSAqeCwgY29uc3QgZG91YmxlICp5LCBpbnQgbikKewogICAgZG91YmxlIHJlcyA9IDAuMDsKICAgIGludCBpID0gMDsKICAgIGZvciAoOyBpIDw9IG4tNDsgaSs9NCkKICAgIHsKICAgICAgICByZXMgKz0gKHhbaV0gKiB5W2ldICsKICAgICAgICAgICAgICAgIHhbaSsxXSAqIHlbaSsxXSArCiAgICAgICAgICAgICAgICB4W2krMl0gKiB5W2krMl0gKwogICAgICAgICAgICAgICAgeFtpKzNdICogeVtpKzNdKTsKICAgIH0KICAgIGZvciAoOyBpIDwgbjsgaSsrKQogICAgewogICAgICAgIHJlcyArPSB4W2ldICogeVtpXTsKICAgIH0KICAgIHJldHVybiByZXM7Cn0KCnZvaWQgbWF0cml4X3ZlY3Rvcl9tdWx0Mihjb25zdCBkb3VibGUgKiptYXQsIGNvbnN0IGRvdWJsZSAqdmVjLAogICAgICAgICAgICAgICAgICAgICAgICAgZG91YmxlICpyZXN1bHQsIGludCByb3dzLCBpbnQgY29scykKeyAvLyBpbiBtYXRyaXggZm9ybTogcmVzdWx0ID0gbWF0ICogdmVjOwogICAgaW50IGk7CiAgICBmb3IgKGkgPSAwOyBpIDwgcm93czsgaSsrKQogICAgewogICAgICAgIHJlc3VsdFtpXSA9IHZlY3RvcnNfZG90X3Byb2QyKG1hdFtpXSwgdmVjLCBjb2xzKTsKICAgIH0KfQoKI2luY2x1ZGUgPHRpbWUuaD4KI2luY2x1ZGUgPHN0ZGlvLmg+CgppbnQgbWFpbihpbnQgYXJnYywgY29uc3QgY2hhciAqYXJndltdKQp7CiAgICBzdGF0aWMgZG91YmxlIG1hdFszMDBdWzUwXTsKICAgIGZvciAoaW50IGk9MDsgaTwzMDA7IGkrKykKICAgICAgICBmb3IgKGludCBqPTA7IGo8NTA7IGorKykKICAgICAgICAgICAgbWF0W2ldW2pdID0gKGkraik7CiAgICBzdGF0aWMgY29uc3QgZG91YmxlICptYXRwWzMwMF07CiAgICBmb3IgKGludCBpPTA7IGk8MzAwOyBpKyspCiAgICAgICAgbWF0cFtpXSA9ICZtYXRbaV1bMF07CgogICAgc3RhdGljIGRvdWJsZSB2ZWN0b3JbNTBdOwogICAgZm9yIChpbnQgaT0wOyBpPDUwOyBpKyspCiAgICAgICAgdmVjdG9yW2ldID0gaSppOwoKICAgIHN0YXRpYyBkb3VibGUgcmVzdWx0WzMwMF07CgogICAgY2xvY2tfdCBzdGFydCA9IGNsb2NrKCk7CiAgICBmb3IgKGludCBuPTA7IG48MTAwMDAwOyBuKyspCiAgICB7CiAgICAgICAgbWF0cml4X3ZlY3Rvcl9tdWx0KG1hdHAsIHZlY3RvciwgcmVzdWx0LCAzMDAsIDUwKTsKICAgIH0KICAgIGNsb2NrX3Qgc3RvcCA9IGNsb2NrKCk7CiAgICBwcmludGYoIkNvbXB1dGluZyB0aW1lID0gJTAuM2Z1c1xuIiwKICAgICAgICAgICBkb3VibGUoc3RvcCAtIHN0YXJ0KS9DTE9DS1NfUEVSX1NFQy8xMDAwMDAqMTAwMDAwMCk7CiAgICByZXR1cm4gMDsKfQo=
-
upload with new input
-
result: Success time: 2.95s memory: 2844 kB returned value: 0
Computing time = 29.700us


