fork download
  1. #include <windows.h>
  2. #include <stdio.h>
  3.  
  4. class StopWatch {
  5. LONGLONG start_time,lap_time,freq;
  6. public:
  7. StopWatch() {
  8. freq = 0;
  9. }
  10. ~StopWatch() {
  11. }
  12. bool Setup() {
  13. if (! QueryPerformanceFrequency((LARGE_INTEGER*)&freq)) {
  14. fprintf(stderr, "このハードウェアはパフォーマンスカウンタをサポートしていない為、計測できません。\n");
  15. return false;
  16. }
  17. return true;
  18. }
  19. void Start() {
  20. QueryPerformanceCounter((LARGE_INTEGER*)&start_time);
  21. }
  22. double LapTime() {
  23. QueryPerformanceCounter((LARGE_INTEGER*)&lap_time);
  24. return (double)(lap_time-start_time)/(double)freq;
  25. }
  26. };
  27.  
  28. class CpuFreq {
  29. StopWatch stopwatch;
  30. double freq;
  31. public:
  32. CpuFreq() {
  33. }
  34. ~CpuFreq() {
  35. }
  36. bool Setup() {
  37. if (!stopwatch.Setup()) {
  38. return false;
  39. }
  40. bool except_flag = false;
  41. __try {
  42. __asm {
  43. rdtsc
  44. }
  45. } __except (true) {
  46. except_flag = true;
  47. }
  48. if (except_flag) {
  49. fprintf(stderr, "このCPUはrdtsc命令をサポートしていない為、計測できません。\n");
  50. return false;
  51. }
  52.  
  53. HANDLE process = GetCurrentProcess();
  54. int i;
  55. double maxf=0.0;
  56. for (i=0; i<5; ++i) {
  57. Sleep(10);
  58. SetPriorityClass(process, REALTIME_PRIORITY_CLASS);
  59.  
  60. LONGLONG startcount, lapcount;
  61. double laptime;
  62.  
  63. stopwatch.Start();
  64.  
  65. __asm {
  66. rdtsc
  67. mov dword ptr startcount, eax
  68. mov dword ptr startcount+4, edx
  69. }
  70.  
  71. while ((laptime=stopwatch.LapTime()) < 0.1);
  72.  
  73. __asm {
  74. rdtsc
  75. mov dword ptr lapcount, eax
  76. mov dword ptr lapcount+4, edx
  77. }
  78.  
  79. SetPriorityClass(process, NORMAL_PRIORITY_CLASS);
  80.  
  81. double f = (double)(lapcount-startcount) / laptime;
  82. if (f > maxf) maxf = f;
  83. }
  84. freq = maxf;
  85.  
  86. return true;
  87. }
  88. double GetFreq() {
  89. return freq;
  90. }
  91. };
  92.  
  93.  
  94. #define MMX_FLAG 0x00800000
  95. #define SSE_FLAG 0x02000000
  96. #define SSE2_FLAG 0x04000000
  97.  
  98.  
  99. #define BUFSIZE (32*1024*1024)
  100.  
  101. int buf[(BUFSIZE+256)/4];
  102. int *bufa = (int*)((((int)buf)+255)&(-256));
  103.  
  104. void mk_buf(int size, int mode) {
  105. int i;
  106. switch (mode) {
  107. case 0:
  108. for (i=0; i<size/4; ++i) {
  109. bufa[i] = -1;
  110. }
  111. break;
  112. case 1:
  113. for (i=0; i<size/4; ++i) {
  114. *(float*)&bufa[i] = -1.0f;
  115. }
  116. break;
  117. case 2:
  118. for (i=0; i<size/4; i+=2) {
  119. *(double*)&bufa[i] = -1.0;
  120. }
  121. break;
  122. }
  123.  
  124. }
  125.  
  126. LONGLONG read_int(int count,int range) {
  127. // LONGLONG c;
  128. __asm {
  129. mov esi,bufa
  130.  
  131. mov ebx,range
  132. sub ebx,1
  133.  
  134. mov ecx,count
  135.  
  136. push ebp
  137. sub esp,8
  138.  
  139. rdtsc
  140. mov dword ptr [esp],eax
  141. mov dword ptr [esp+4],edx
  142.  
  143. xor edx,edx
  144. mov edi,-4*8
  145.  
  146. loop1:
  147. mov eax,1024*1024/(4*8*2)
  148. align 16
  149. loop2:
  150. mov ebp,[esi+edx+4*0]
  151. mov ebp,[esi+edx+4*1]
  152. add edi,4*8*2
  153. and edi,ebx
  154. mov ebp,[esi+edx+4*2]
  155. mov ebp,[esi+edx+4*3]
  156. mov ebp,[esi+edx+4*4]
  157. mov ebp,[esi+edx+4*5]
  158. mov ebp,[esi+edx+4*6]
  159. mov ebp,[esi+edx+4*7]
  160.  
  161. mov ebp,[esi+edi+4*0]
  162. mov ebp,[esi+edi+4*1]
  163. add edx,4*8*2
  164. and edx,ebx
  165. mov ebp,[esi+edi+4*2]
  166. mov ebp,[esi+edi+4*3]
  167. mov ebp,[esi+edi+4*4]
  168. mov ebp,[esi+edi+4*5]
  169. mov ebp,[esi+edi+4*6]
  170. mov ebp,[esi+edi+4*7]
  171.  
  172. sub eax,1
  173. jne loop2
  174.  
  175. sub ecx,1
  176. jne loop1
  177.  
  178. rdtsc
  179. sub eax,dword ptr [esp]
  180. sbb edx,dword ptr [esp+4]
  181.  
  182. add esp,8
  183. pop ebp
  184. }
  185. }
  186.  
  187. LONGLONG read_x87(int count,int range) {
  188. LONGLONG c;
  189. __asm {
  190. mov esi,bufa
  191. mov ebx,range
  192. sub ebx,1
  193.  
  194. rdtsc
  195. mov dword ptr c,eax
  196. mov dword ptr c+4,edx
  197.  
  198. xor edx,edx
  199. mov edi,-8*8
  200.  
  201. mov ecx,count
  202. loop1:
  203. mov eax,1024*1024/(8*8*2)
  204. align 16
  205. loop2:
  206. fld qword ptr [esi+edx+8*0]
  207. fstp st(0)
  208. fld qword ptr [esi+edx+8*1]
  209. fstp st(0)
  210. add edi,8*8*2
  211. and edi,ebx
  212. fld qword ptr [esi+edx+8*2]
  213. fstp st(0)
  214. fld qword ptr [esi+edx+8*3]
  215. fstp st(0)
  216. fld qword ptr [esi+edx+8*4]
  217. fstp st(0)
  218. fld qword ptr [esi+edx+8*5]
  219. fstp st(0)
  220. fld qword ptr [esi+edx+8*6]
  221. fstp st(0)
  222. fld qword ptr [esi+edx+8*7]
  223. fstp st(0)
  224.  
  225. fld qword ptr [esi+edi+8*0]
  226. fstp st(0)
  227. fld qword ptr [esi+edi+8*1]
  228. fstp st(0)
  229. add edx,8*8*2
  230. and edx,ebx
  231. fld qword ptr [esi+edi+8*2]
  232. fstp st(0)
  233. fld qword ptr [esi+edi+8*3]
  234. fstp st(0)
  235. fld qword ptr [esi+edi+8*4]
  236. fstp st(0)
  237. fld qword ptr [esi+edi+8*5]
  238. fstp st(0)
  239. fld qword ptr [esi+edi+8*6]
  240. fstp st(0)
  241. fld qword ptr [esi+edi+8*7]
  242. fstp st(0)
  243.  
  244. sub eax,1
  245. jne loop2
  246.  
  247. sub ecx,1
  248. jne loop1
  249.  
  250. rdtsc
  251. sub eax,dword ptr c
  252. sbb edx,dword ptr c+4
  253.  
  254. }
  255. }
  256.  
  257. LONGLONG read_mmx(int count,int range) {
  258. LONGLONG c;
  259. __asm {
  260. mov esi,bufa
  261. mov ebx,range
  262. sub ebx,1
  263.  
  264. rdtsc
  265. mov dword ptr c,eax
  266. mov dword ptr c+4,edx
  267.  
  268. xor edx,edx
  269. mov edi,-8*8
  270.  
  271. mov ecx,count
  272. loop1:
  273. mov eax,1024*1024/(8*8*2)
  274. align 16
  275. loop2:
  276. movq mm0,[esi+edx+8*0]
  277. movq mm0,[esi+edx+8*1]
  278. add edi,8*8*2
  279. and edi,ebx
  280. movq mm0,[esi+edx+8*2]
  281. movq mm0,[esi+edx+8*3]
  282. movq mm0,[esi+edx+8*4]
  283. movq mm0,[esi+edx+8*5]
  284. movq mm0,[esi+edx+8*6]
  285. movq mm0,[esi+edx+8*7]
  286.  
  287. movq mm0,[esi+edi+8*0]
  288. movq mm0,[esi+edi+8*1]
  289. add edx,8*8*2
  290. and edx,ebx
  291. movq mm0,[esi+edi+8*2]
  292. movq mm0,[esi+edi+8*3]
  293. movq mm0,[esi+edi+8*4]
  294. movq mm0,[esi+edi+8*5]
  295. movq mm0,[esi+edi+8*6]
  296. movq mm0,[esi+edi+8*7]
  297.  
  298. sub eax,1
  299. jne loop2
  300.  
  301. sub ecx,1
  302. jne loop1
  303.  
  304. rdtsc
  305. sub eax,dword ptr c
  306. sbb edx,dword ptr c+4
  307.  
  308. emms
  309.  
  310. }
  311. }
  312.  
  313. LONGLONG read_sse(int count,int range) {
  314. LONGLONG c;
  315. __asm {
  316. mov esi,bufa
  317. mov ebx,range
  318. sub ebx,1
  319.  
  320. rdtsc
  321. mov dword ptr c,eax
  322. mov dword ptr c+4,edx
  323.  
  324. xor edx,edx
  325. mov edi,-16*8
  326.  
  327. mov ecx,count
  328. loop1:
  329. mov eax,1024*1024/(16*8*2)
  330. align 16
  331. loop2:
  332. movaps xmm0,[esi+edx+16*0]
  333. movaps xmm0,[esi+edx+16*1]
  334. add edi,16*8*2
  335. and edi,ebx
  336. movaps xmm0,[esi+edx+16*2]
  337. movaps xmm0,[esi+edx+16*3]
  338. movaps xmm0,[esi+edx+16*4]
  339. movaps xmm0,[esi+edx+16*5]
  340. movaps xmm0,[esi+edx+16*6]
  341. movaps xmm0,[esi+edx+16*7]
  342.  
  343. movaps xmm0,[esi+edi+16*0]
  344. movaps xmm0,[esi+edi+16*1]
  345. add edx,16*8*2
  346. and edx,ebx
  347. movaps xmm0,[esi+edi+16*2]
  348. movaps xmm0,[esi+edi+16*3]
  349. movaps xmm0,[esi+edi+16*4]
  350. movaps xmm0,[esi+edi+16*5]
  351. movaps xmm0,[esi+edi+16*6]
  352. movaps xmm0,[esi+edi+16*7]
  353.  
  354. sub eax,1
  355. jne loop2
  356.  
  357. sub ecx,1
  358. jne loop1
  359.  
  360. rdtsc
  361. sub eax,dword ptr c
  362. sbb edx,dword ptr c+4
  363.  
  364. }
  365. }
  366.  
  367.  
  368. LONGLONG read_avx(int count,int range) {
  369. LONGLONG c;
  370. __asm {
  371. mov esi,bufa
  372. mov ebx,range
  373. sub ebx,1
  374.  
  375. rdtsc
  376. mov dword ptr c,eax
  377. mov dword ptr c+4,edx
  378.  
  379. xor edx,edx
  380. mov edi,-32*8
  381.  
  382. mov ecx,count
  383. loop1:
  384. mov eax,1024*1024/(32*8*2)
  385. align 16
  386. loop2:
  387. vmovaps ymm0,[esi+edx+32*0]
  388. vmovaps ymm0,[esi+edx+32*1]
  389. add edi,32*8*2
  390. and edi,ebx
  391. vmovaps ymm0,[esi+edx+32*2]
  392. vmovaps ymm0,[esi+edx+32*3]
  393. vmovaps ymm0,[esi+edx+32*4]
  394. vmovaps ymm0,[esi+edx+32*5]
  395. vmovaps ymm0,[esi+edx+32*6]
  396. vmovaps ymm0,[esi+edx+32*7]
  397.  
  398. vmovaps ymm0,[esi+edi+32*0]
  399. vmovaps ymm0,[esi+edi+32*1]
  400. add edx,32*8*2
  401. and edx,ebx
  402. vmovaps ymm0,[esi+edi+32*2]
  403. vmovaps ymm0,[esi+edi+32*3]
  404. vmovaps ymm0,[esi+edi+32*4]
  405. vmovaps ymm0,[esi+edi+32*5]
  406. vmovaps ymm0,[esi+edi+32*6]
  407. vmovaps ymm0,[esi+edi+32*7]
  408.  
  409. sub eax,1
  410. jne loop2
  411.  
  412. sub ecx,1
  413. jne loop1
  414.  
  415. rdtsc
  416. sub eax,dword ptr c
  417. sbb edx,dword ptr c+4
  418.  
  419. }
  420. }
  421.  
  422. void main() {
  423. printf("load 帯域 計測ツール v0.4+(AVXサポートCPU専用)\n");
  424.  
  425. Sleep(1000);
  426.  
  427. CpuFreq cpufreq;
  428. if (! cpufreq.Setup()) {
  429. exit(0);
  430. }
  431. printf("CPU動作クロック : %.1f MHz\n", cpufreq.GetFreq() / 1000000.0);
  432.  
  433. HANDLE process = GetCurrentProcess();
  434.  
  435. LONGLONG (*test_func[])(int,int) = {read_int, read_x87, read_mmx, read_sse, read_avx};
  436. char *test_name[] = {" Int32bit"," Float64bit", " MMX64bit", " SSE128bit", " AVX256bit"};
  437. int test_mode[] = {0,2,0,1};
  438.  
  439. printf("\n");
  440. printf("アクセス範囲 ");
  441. int nf = 2;
  442. int cpuid_edx;
  443.  
  444. int f;
  445. for (f=0; f<5; ++f) {
  446. printf(" %s", test_name[f]);
  447. }
  448.  
  449. printf("\n");
  450. int size;
  451. for (size=1024; size<=BUFSIZE; size+=size) {
  452.  
  453. printf(" %5dKB :", size/1024);
  454.  
  455. int f;
  456. for (f=0; f<5; ++f) {
  457.  
  458. mk_buf(size, test_mode[f]);
  459.  
  460. double minclk = 100000000000.0;
  461. int i;
  462. for (i=0; i<5; ++i) {
  463. double clk;
  464. int count = 10;
  465. for (;;) {
  466. Sleep(10);
  467. SetPriorityClass(process, REALTIME_PRIORITY_CLASS);
  468.  
  469. test_func[f](1, size);
  470. clk = test_func[f](count, size);
  471.  
  472. SetPriorityClass(process, NORMAL_PRIORITY_CLASS);
  473.  
  474. if (clk/cpufreq.GetFreq() >= 0.1) break;
  475.  
  476. count *= 2;
  477. }
  478. clk /= count;
  479. if (clk < minclk) minclk = clk;
  480. }
  481.  
  482. printf(" %6.0f MB/S", cpufreq.GetFreq()/minclk);
  483. }
  484.  
  485. printf("\n");
  486. }
  487.  
  488. fflush(stdin);
  489. fprintf(stderr, "\n終了します。Enterキーを押してください : ");
  490. scanf("%*c");
  491. }
  492.  
Compilation error #stdin compilation error #stdout 0s 0KB
stdin
Standard input is empty
compilation info
prog.cpp:1:21: fatal error: windows.h: No such file or directory
compilation terminated.
stdout
Standard output is empty