fork download
  1. // bench.c
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <time.h>
  5. #include <stdlib.h>
  6. #include <stdarg.h>
  7. #include <pthread.h>
  8.  
  9. typedef int (*core_fn)(int index,int count,void* data);
  10.  
  11. typedef struct {
  12. pthread_t thread;
  13. int index,count;
  14. void *data;
  15. core_fn core;
  16. int rc,trc;
  17. struct timespec ts_start;
  18. double t_begin,t_end;
  19. } run_ncore_t;
  20.  
  21. static int fatal_error(const char *msg) {
  22. fprintf(stderr,"FATAL_ERROR: %s\n",msg);
  23. exit(1);
  24. }
  25. static double run_ncore_ts(run_ncore_t *prm) {
  26. struct timespec ts[1];
  27. clock_gettime(CLOCK_REALTIME,ts) && fatal_error("clock_gettime");
  28. ts->tv_sec-=prm->ts_start.tv_sec;
  29. if (ts->tv_nsec<prm->ts_start.tv_nsec) {
  30. ts->tv_sec--;
  31. ts->tv_nsec=1000000000-prm->ts_start.tv_nsec+ts->tv_nsec;
  32. } else {
  33. ts->tv_nsec-=prm->ts_start.tv_nsec;
  34. }
  35. return ts->tv_sec+1e-9*ts->tv_nsec;
  36. }
  37. static void* run_ncore_main(void* ctx) {
  38. struct timespec ts;
  39. run_ncore_t *prm=(run_ncore_t*)ctx;
  40. prm->t_begin=run_ncore_ts(prm);
  41. prm->rc=prm->core(prm->index,prm->count,prm->data);
  42. prm->t_end=run_ncore_ts(prm);
  43. return 0;
  44. }
  45. int run_ncore_en_stat=0;
  46. int run_ncore(int count,core_fn core,void *data,double *dt) {
  47. run_ncore_t *cores;
  48. unsigned size,i,nerr;
  49. struct timespec ts[1];
  50. double ts_start, ts_done;
  51.  
  52. if (count<1) return 1; // invalid argument
  53. size=sizeof(run_ncore_t)*count;
  54. cores=(run_ncore_t*)malloc(size);
  55. if (!cores) return 2; // unable to allocate memory
  56. nerr=0; memset(cores,0,size);
  57. clock_gettime(CLOCK_REALTIME,ts) && fatal_error("clock_gettime");
  58. for(i=0;i<count;i++) {
  59. cores[i].index=i;
  60. cores[i].count=count;
  61. cores[i].data=data;
  62. cores[i].core=core;
  63. cores[i].ts_start=*ts;
  64. cores[i].trc=i>0 ? pthread_create(&cores[i].thread,0,run_ncore_main,&cores[i]) : 0;
  65. if (cores[i].trc) nerr++;
  66. }
  67. ts_start=run_ncore_ts(cores);
  68. run_ncore_main(cores);
  69. for(i=1;i<count;i++) {
  70. void* tret=0;
  71. if (cores[i].trc==0) pthread_join(cores[i].thread,&tret);
  72. }
  73. ts_done=run_ncore_ts(cores);
  74. if (run_ncore_en_stat) {
  75. printf("creating threads dt=%.3fms\n",ts_start*1e3);
  76. printf("working time ts=%.3fms\n",ts_done*1e3);
  77. for(i=0;i<count;i++) {
  78. printf("%d. start=%.3fms done=%.6fms\n",i,
  79. cores[i].t_begin*1e3,cores[i].t_end*1e3);
  80. }
  81. }
  82. free((void*)cores); cores=0;
  83. if (nerr) return 3; // has problems
  84. if (dt) *dt=ts_done;
  85. return 0;
  86. }
  87.  
  88. typedef struct {
  89. void *buf;
  90. size_t buf_size;
  91. } memtest1_t;
  92.  
  93. int memtest1_init(memtest1_t *t,size_t buf_size) {
  94. t->buf=malloc(buf_size);
  95. t->buf_size=buf_size;
  96. return t->buf ? 0 : 1;
  97. }
  98. void memtest1_done(memtest1_t *t) {
  99. if (t->buf) { free(t->buf); t->buf=0; }
  100. }
  101.  
  102. int memtest1_memset(int index,int count,void *data) {
  103. size_t h,t,sz; void *p;
  104. memtest1_t *mt=(memtest1_t*)data;
  105. h=index*mt->buf_size/count; h=(h+63)/64*64;
  106. t=(index+1)*mt->buf_size/count; t=(t+63)/64*64;
  107. p=(void*)((char*)mt->buf+h); sz=t-h;
  108. memset(p,255,sz);
  109. return 0;
  110. }
  111. void my_memset8(void* data,size_t value,size_t size) {
  112. size=(size+7)>>3;
  113. asm ("rep stosq\n\t"
  114. :
  115. : "D"(data), "c"(size), "a"(value)
  116. );
  117. }
  118. int memtest1_memset8(int index,int count,void *data) {
  119. size_t h,t,sz; void *p;
  120. memtest1_t *mt=(memtest1_t*)data;
  121. h=index*mt->buf_size/count; h=(h+63)/64*64;
  122. t=(index+1)*mt->buf_size/count; t=(t+63)/64*64;
  123. p=(void*)((char*)mt->buf+h); sz=t-h;
  124. my_memset8(p,255,sz);
  125. return 0;
  126. }
  127. int memtest1_memmove(int index,int count,void *data) {
  128. size_t h,t,m,sz; void *p1, *p2;
  129. memtest1_t *mt=(memtest1_t*)data;
  130. h=index*mt->buf_size/count; h=(h+63)/64*64;
  131. t=(index+1)*mt->buf_size/count; t=(t+63)/64*64;
  132. m=(h+t)/2;
  133. p1=(void*)((char*)mt->buf+h); sz=m-h;
  134. p2=(void*)((char*)mt->buf+m);
  135. memmove(p1,p2,sz);
  136. return 0;
  137. }
  138. int memtest1_memcpy(int index,int count,void *data) {
  139. size_t h,t,m,sz; void *p1, *p2;
  140. memtest1_t *mt=(memtest1_t*)data;
  141. h=index*mt->buf_size/count; h=(h+63)/64*64;
  142. t=(index+1)*mt->buf_size/count; t=(t+63)/64*64;
  143. m=(h+t)/2;
  144. p1=(void*)((char*)mt->buf+h); sz=m-h;
  145. p2=(void*)((char*)mt->buf+m);
  146. memcpy(p1,p2,sz);
  147. return 0;
  148. }
  149.  
  150. enum { KB=1024, MB=1024*KB, GB=1024*MB };
  151. void test_write(int n,size_t size,core_fn core,const char* name) {
  152. int i,j; double dt,rate;
  153. memtest1_t mt[1];
  154. memtest1_init(mt,size);
  155. printf("%s %.3fMb\n",name,(double)size/MB);
  156. for(i=0;i<=n;i++) {
  157. run_ncore(i<1?1:i,core,mt,&dt);
  158. rate=size/dt;
  159. if (i==0)printf("\tfirst time one thread %.1fMb/s\n",rate/MB);
  160. else printf("\t%d\t%.1fMb/s\t%.2fms\n",i,rate/MB,dt*1e3);
  161. }
  162. memtest1_done(mt);
  163. }
  164.  
  165. void test_copy(int n,size_t size,core_fn core,const char* name) {
  166. int i,j; double dt,rate;
  167. memtest1_t mt[1];
  168. memtest1_init(mt,2*size);
  169. printf("%s %.3fMb->%.3fMb\n",name,(double)size/MB,(double)size/MB);
  170. for(i=0;i<=n;i++) {
  171. run_ncore(i<1?1:i,core,mt,&dt);
  172. rate=size/dt;
  173. if (i==0)printf("\tfirst time one thread %.1fMb/s\n",rate/MB);
  174. else printf("\t%d\t%.1fMb/s\t%.2fms\n",i,rate/MB,dt*1e3);
  175. }
  176. memtest1_done(mt);
  177. }
  178.  
  179. typedef struct {
  180. size_t count;
  181. } test0_t;
  182.  
  183. int test0_core0(size_t x) {
  184. int i,r=0;
  185. for(i=0;i<1900;i++) r^=i-x;
  186. return r;
  187. }
  188. int test0_core(int index,int count,void* data) {
  189. size_t h,t,i; int r=0;
  190. test0_t *ts=(test0_t*)data;
  191.  
  192. h=index*ts->count/count;
  193. t=(index+1)*ts->count/count;
  194. for(i=h;i<t;i++) {
  195. r+=test0_core0(i);
  196. }
  197. return r;
  198. }
  199.  
  200. void test0(int n,size_t count) {
  201. test0_t ts[1]; int i;
  202. double dt,rate,rate1,alpha,k;
  203. ts->count=count;
  204. printf("threading speedup test\n");
  205. for(i=1;i<=n;i++) {
  206. run_ncore(i,test0_core,ts,&dt);
  207. rate=count/dt;
  208. if (i==1) rate1=rate;
  209. k=rate1/rate;
  210. alpha=i>1 ? (k*i-1)/(i-1) : 0;
  211. printf("\t%d\t%.1fMHz/s\t%.2fms\t%.4f\n",i,rate/1000000,dt*1e3,alpha);
  212. }
  213. }
  214.  
  215. int main(int argc,char** argv) {
  216. int n=8; enum { N=512, M=200000 };
  217. if (argc>1) {
  218. n=atoi(argv[1]);
  219. if (n<1) n=1;
  220. if (n>256) n=256;
  221. }
  222. //run_ncore_en_stat=1;
  223. test0(n,M);
  224. test_write(n,N*MB,memtest1_memset,"memset");
  225. test_write(n,N*MB,memtest1_memset8,"rep stosq");
  226. test_copy(n,N*MB,memtest1_memcpy,"memcpy");
  227. test_copy(n,N*MB,memtest1_memmove,"memmove");
  228. return 0;
  229. }
  230.  
  231.  
Success #stdin #stdout 3.64s 984780KB
stdin
Standard input is empty
stdout
threading speedup test
	1	1.2MHz/s	173.86ms	0.0000
	2	1.1MHz/s	176.03ms	1.0249
	3	1.1MHz/s	174.88ms	1.0088
	4	1.2MHz/s	173.71ms	0.9988
	5	1.2MHz/s	173.61ms	0.9982
	6	1.2MHz/s	173.78ms	0.9994
	7	1.1MHz/s	174.12ms	1.0017
	8	1.1MHz/s	175.71ms	1.0121
memset 512.000Mb
	first time one thread 4920.2Mb/s
	1	15989.3Mb/s	32.02ms
	2	18695.2Mb/s	27.39ms
	3	18687.5Mb/s	27.40ms
	4	18723.3Mb/s	27.35ms
	5	18724.0Mb/s	27.34ms
	6	18538.4Mb/s	27.62ms
	7	18605.4Mb/s	27.52ms
	8	18604.4Mb/s	27.52ms
rep stosq 512.000Mb
	first time one thread 5160.3Mb/s
	1	18689.2Mb/s	27.40ms
	2	18790.7Mb/s	27.25ms
	3	18737.3Mb/s	27.33ms
	4	18737.9Mb/s	27.32ms
	5	18788.6Mb/s	27.25ms
	6	18695.7Mb/s	27.39ms
	7	18744.1Mb/s	27.32ms
	8	18662.5Mb/s	27.43ms
memcpy 512.000Mb->512.000Mb
	first time one thread 3535.4Mb/s
	1	11981.5Mb/s	42.73ms
	2	3388.1Mb/s	151.12ms
	3	5293.6Mb/s	96.72ms
	4	6739.2Mb/s	75.97ms
	5	7267.8Mb/s	70.45ms
	6	7764.8Mb/s	65.94ms
	7	7991.0Mb/s	64.07ms
	8	7900.7Mb/s	64.80ms
memmove 512.000Mb->512.000Mb
	first time one thread 3535.6Mb/s
	1	12041.7Mb/s	42.52ms
	2	3467.7Mb/s	147.65ms
	3	5847.7Mb/s	87.56ms
	4	6530.6Mb/s	78.40ms
	5	7409.1Mb/s	69.10ms
	6	7396.6Mb/s	69.22ms
	7	7192.8Mb/s	71.18ms
	8	7717.7Mb/s	66.34ms