fork download
  1. /*
  2.   適当にmain.cppにコピペしてコンパイル
  3.   処理は下の方
  4. */
  5.  
  6. /*************************************
  7.   簡易ベンチマーク (BMT_/bmt_) (途中)
  8.   (.c/.cpp/Windows/Linux/x86/AMD64)
  9. *************************************/
  10. #define BMT_STD 0
  11. #define BMT_RTC 1
  12. #define BMT_TSC 2
  13. /*
  14.   BMT_TIMER_MODE
  15.   0 or BMT_STD = 標準クロック
  16.   1 or BMT_RTC = リアルタイムクロック(-lrt)
  17.   2 or BMT_TSC = タイムスタンプカウンタ
  18. */
  19. #define BMT_TIMER_MODE 2
  20.  
  21. /*
  22.   計測回数(BMT_MAXTRY)と処理回数(BMT_MAXLOOP)
  23.  
  24.   < 擬似コード >
  25.   for(BMT_MAXTRY) {
  26.   計測開始
  27.   for(BMT_MAXLOOP) {
  28.   処理
  29.   }
  30.   計測終了&ベストタイム更新
  31.   }
  32.  
  33.   結果が安定するまで適当に変更する
  34. */
  35. #if BMT_TIMER_MODE == BMT_STD
  36. #define BMT_MAXTRY 1
  37. #define BMT_MAXLOOP 1
  38. #elif BMT_TIMER_MODE == BMT_RTC
  39. #define BMT_MAXTRY 1
  40. #define BMT_MAXLOOP 1
  41. #elif BMT_TIMER_MODE == BMT_TSC
  42. #define BMT_MAXTRY 100
  43. #define BMT_MAXLOOP 1024
  44. #endif
  45.  
  46. /*
  47.   BMT_USE_THREAD
  48.   0 = スレッドを使わない
  49.   1 = スレッドを使う(-lpthread)
  50. */
  51. #define BMT_USE_THREAD 0
  52.  
  53. /*==================================*/
  54.  
  55. #include <stddef.h>
  56. #include <stdlib.h>
  57. #include <stdio.h>
  58. #include <stdarg.h>
  59. #include <limits.h>
  60. #include <memory.h>
  61. #include <malloc.h>
  62. #include <time.h>
  63. #if defined(_WIN32)
  64. #if !defined(NOMINMAX)
  65. #define NOMINMAX 1
  66. #endif
  67. #include <windows.h>
  68. #include <tchar.h>
  69. #if defined(__GNUC__)
  70. #define BMT_INLINE static inline
  71. #define BMT_NOINLINE __attribute__((noinline))
  72. #define BMT_ALIGNED(x) __attribute__((aligned(x)))
  73. #else
  74. #define BMT_INLINE __inline
  75. #define BMT_NOINLINE __declspec(noinline)
  76. #define BMT_ALIGNED(x) __declspec(align(x))
  77. #endif
  78.  
  79. #else /* Linux etc.. */
  80. #define _tmain main
  81. #define _T(x) x
  82. #define _TCHAR char
  83. #define BMT_INLINE static inline
  84. #define BMT_NOINLINE __attribute__((noinline))
  85. #define BMT_ALIGNED(x) __attribute__((aligned(x)))
  86. #endif
  87.  
  88. /* printf */
  89. void bmt_printf(_TCHAR const *fmt, ...) {
  90. va_list ap; va_start(ap,fmt);
  91. #if defined(_WIN32) && (defined(UNICODE)||defined(_UNICODE))
  92. vwprintf(fmt,ap);
  93. #else
  94. vprintf(fmt,ap);
  95. #endif
  96. #if defined(__MINGW32__) || defined(__MINGW64__)
  97. fflush(stdout);
  98. #endif
  99. }
  100.  
  101. /* timer */
  102. #if BMT_TIMER_MODE == BMT_STD
  103. typedef unsigned long bmt_nt; /* normalized type */
  104. typedef clock_t bmt_rt; /* raw type    */
  105. #elif BMT_TIMER_MODE == BMT_RTC
  106. typedef unsigned long long bmt_nt;
  107. #if defined(_WIN32)
  108. typedef LARGE_INTEGER bmt_rt;
  109. #else
  110. typedef struct timespec bmt_rt;
  111. #endif
  112. #elif BMT_TIMER_MODE == BMT_TSC
  113. typedef unsigned long long bmt_nt;
  114. typedef unsigned long long bmt_rt;
  115. #endif
  116. typedef struct { struct BMT_TIMER_INNER {
  117. double freq;
  118. bmt_rt st,et;
  119. bmt_nt elp,best;
  120. int lc,tc;
  121. } i; char pad[64-sizeof(struct BMT_TIMER_INNER)];
  122. } bmt_timer_t;
  123. #if BMT_TIMER_MODE == BMT_STD
  124. BMT_INLINE bmt_nt bmt_norm(bmt_rt *t) { return (bmt_nt)(*t); }
  125. BMT_INLINE void bmt_setfreq(bmt_timer_t *t)
  126. { t->i.freq=(double)CLOCKS_PER_SEC; }
  127. BMT_INLINE void bmt_setmax(bmt_timer_t *t) { t->i.best = ULONG_MAX; }
  128. BMT_INLINE void bmt_now(bmt_rt *t) { *t = clock(); }
  129. BMT_INLINE void bmt_result(_TCHAR const *title, bmt_timer_t *t) {
  130. bmt_printf(_T("%16s -> %10.6f(s) %.0f(t)\n"),title
  131. ,((double)t->i.best) / t->i.freq,(double)t->i.best);
  132. }
  133. #elif BMT_TIMER_MODE == BMT_RTC
  134. #if defined(_WIN32)
  135. BMT_INLINE bmt_nt bmt_norm(bmt_rt *t) { return (bmt_nt)(t->QuadPart); }
  136. BMT_INLINE int bmt_setfreq(bmt_timer_t *t) {
  137. int r; bmt_rt rt;
  138. r = (int)QueryPerformanceFrequency(&rt);
  139. return r?(t->i.freq=(double)bmt_norm(&rt),1):0;
  140. }
  141. BMT_INLINE void bmt_now(bmt_rt *t) { QueryPerformanceCounter(t); }
  142. #else
  143. #define BMT_NS (1000000000ull)
  144. BMT_INLINE bmt_nt bmt_norm(bmt_rt *t)
  145. { return (bmt_nt)(t->tv_nsec)+((bmt_nt)(t->tv_sec)*BMT_NS); }
  146. BMT_INLINE int bmt_setfreq(bmt_timer_t *t) {
  147. int r; bmt_rt rt;
  148. r = clock_getres(CLOCK_REALTIME,&rt);
  149. return r?0:(t->i.freq=(double)BMT_NS/(double)bmt_norm(&rt),1);
  150. }
  151. BMT_INLINE void bmt_now(bmt_rt *t) { clock_gettime(CLOCK_REALTIME,t); }
  152. #endif
  153. BMT_INLINE void bmt_setmax(bmt_timer_t *t) { t->i.best = 0xffffffffffffffffull /*ULLONG_MAX*/; }
  154. BMT_INLINE void bmt_result(_TCHAR const *title, bmt_timer_t *t) {
  155. bmt_printf(_T("%16s -> %10.6f(s) %.0f(t)\n"),title
  156. ,((double)t->i.best) / t->i.freq,(double)t->i.best);
  157. }
  158. #elif BMT_TIMER_MODE == BMT_TSC
  159. #define bmt_norm(x) (*(x))
  160. BMT_INLINE void bmt_setmax(bmt_timer_t *t) { t->i.best = 0xffffffffffffffffull /*ULLONG_MAX*/; }
  161. #if defined(_MSC_VER) && defined(_WIN32)
  162. BMT_NOINLINE void bmt_now(bmt_rt *t) { *t = (bmt_rt)__rdtsc(); }
  163. #else
  164. BMT_NOINLINE void bmt_now(bmt_rt *t) {
  165. unsigned int low, high;
  166. __asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
  167. *t = (bmt_rt)(high)<<32|low;
  168. }
  169. #endif
  170. BMT_INLINE void bmt_result(_TCHAR const *title, bmt_timer_t *t)
  171. { bmt_printf(_T("%16s -> %.0f(t)\n"),title,(double)t->i.best); }
  172. #endif
  173.  
  174. BMT_INLINE void bmt_end(bmt_timer_t *t) {
  175. bmt_now(&t->i.et);
  176. t->i.elp = bmt_norm(&t->i.et)-bmt_norm(&t->i.st);
  177. if(t->i.elp<t->i.best) t->i.best = t->i.elp;
  178. }
  179.  
  180. #define BMT_TIMER_BEGIN {\
  181.   bmt_setmax(&g_bmt_tdat);\
  182.   for(g_bmt_tdat.i.tc=BMT_MAXTRY;g_bmt_tdat.i.tc>0;--g_bmt_tdat.i.tc) {\
  183.   int bmt_lc = BMT_MAXLOOP;\
  184.   for(bmt_now(&g_bmt_tdat.i.st);bmt_lc>0;--bmt_lc) {
  185. #define BMT_TIMER_END(title)\
  186.   } bmt_end(&g_bmt_tdat);\
  187.   } bmt_result(title, &g_bmt_tdat);\
  188. }
  189. static BMT_ALIGNED(64) bmt_timer_t g_bmt_tdat;
  190.  
  191. /* aligned malloc/calloc/free */
  192. void *bmt_aligned_malloc(size_t const size,size_t const align/* 2**n */) {
  193. size_t ap = (size_t)NULL;
  194. size_t np = (size_t)malloc(size+sizeof(size_t)+align-1);
  195. if(np) {
  196. ap = ((np+sizeof(size_t)+align-1)&~(align-1));
  197. *(size_t*)(ap-sizeof(size_t)) = np;
  198. }
  199. return (void*)ap;
  200. }
  201. void *bmt_aligned_calloc(size_t size,size_t const align/* 2**n */) {
  202. size_t ap = (size_t)bmt_aligned_malloc(size,align);
  203. if(ap) {
  204. char volatile *p = (char volatile*)ap;
  205. while(size) { *p++ = 0; size--; }
  206. }
  207. return (void*)ap;
  208. }
  209. void bmt_aligned_free(void *p)
  210. { if(p) free((void*)*(size_t*)((size_t)p-sizeof(size_t))); }
  211.  
  212. #if BMT_USE_THREAD
  213. #if defined(_WIN32)
  214. #include <intrin.h>
  215. #include <process.h>
  216. typedef HANDLE bmt_thread_t;
  217. #else
  218. #include <xmmintrin.h>
  219. #include <pthread.h>
  220. typedef pthread_t bmt_thread_t;
  221. #endif
  222.  
  223. /* atomic */
  224. #if defined(__GNUC__)
  225. BMT_INLINE int bmt_exchg(int volatile *pval,int newval)
  226. { return (int)__sync_lock_test_and_set(pval,newval); }
  227. BMT_INLINE int bmt_exchg_if(int volatile *pval,int newval,int oldval)
  228. { return (int)__sync_val_compare_and_swap(pval,oldval,newval); }
  229. #elif defined(_WIN32)
  230. BMT_INLINE int bmt_exchg(int volatile *pval,int newval)
  231. { return (int)_InterlockedExchange((long*)pval,newval); }
  232. BMT_INLINE int bmt_exchg_if(int volatile *pval,int newval,int oldval)
  233. { return (int)_InterlockedCompareExchange((long*)pval,newval,oldval); }
  234. #endif
  235.  
  236. /* spin lock */
  237. typedef enum { BMT_UNLOCKED=0, BMT_LOCKED=1 } bmt_stat_t;
  238. BMT_INLINE void bmt_spin_init(int volatile *p,bmt_stat_t s)
  239. { bmt_exchg(p,s); }
  240. BMT_INLINE void bmt_spin_wait(int volatile *p)
  241. { do { _mm_pause(); } while(*p); }
  242. BMT_INLINE void bmt_spin_lock(int volatile *p) {
  243. if(bmt_exchg(p,BMT_LOCKED))
  244. do { bmt_spin_wait(p); } while(bmt_exchg(p,BMT_LOCKED));
  245. }
  246. BMT_INLINE void bmt_spin_unlock(int volatile *p) { bmt_exchg(p,BMT_UNLOCKED); }
  247.  
  248. /* thread */
  249. typedef struct {
  250. void (*func)(void*);
  251. void *arg;
  252. int *lock;
  253. } bmt_thread_data;
  254.  
  255. #if defined(_WIN32)
  256. unsigned __stdcall bmt_first(void *arg) {
  257. bmt_thread_data td = *(bmt_thread_data*)arg;
  258. bmt_spin_unlock(td.lock);
  259. (td.func)(td.arg);
  260. return 0;
  261. }
  262. int bmt_create(bmt_thread_t *th,void (*func)(void*),void *arg) {
  263. int lock;
  264. bmt_thread_data td;
  265. td.func = func;
  266. td.arg = arg;
  267. td.lock = &lock;
  268. bmt_spin_init(td.lock,BMT_LOCKED);
  269. *th = (bmt_thread_t)_beginthreadex(NULL,0,bmt_first,&td,0,NULL);
  270. return *th?(bmt_spin_wait(td.lock),1):0;
  271. }
  272. void bmt_join(bmt_thread_t th) {
  273. WaitForSingleObject(th,INFINITE);
  274. CloseHandle(th);
  275. }
  276. #else
  277. void *bmt_first(void *arg) {
  278. bmt_thread_data td = *(bmt_thread_data*)arg;
  279. bmt_spin_unlock(td.lock);
  280. (td.func)(td.arg);
  281. return 0;
  282. }
  283.  
  284. int bmt_create(bmt_thread_t *th,void (*func)(void*),void *arg) {
  285. int lock;
  286. bmt_thread_data td;
  287. td.func = func;
  288. td.arg = arg;
  289. td.lock = &lock;
  290. bmt_spin_init(td.lock,BMT_LOCKED);
  291. return pthread_create(th,NULL,bmt_first,&td)?0:(bmt_spin_wait(td.lock),1);
  292. }
  293. void bmt_join(BMT_THT th) { pthread_join(th,NULL); }
  294. #endif
  295. #endif /* BMT_USE_THREAD */
  296.  
  297. /* main */
  298. void BMT_NOINLINE run_benchmark();
  299. int _tmain() {
  300. #if BMT_TIMER_MODE == BMT_STD
  301. bmt_setfreq(&g_bmt_tdat);
  302. bmt_printf(_T("<< start >> clock , %.0fticks/sec\n"),g_bmt_tdat.i.freq);
  303. run_benchmark();
  304. bmt_printf(_T("<< end >>\n"));
  305. #elif BMT_TIMER_MODE == BMT_RTC
  306. if(!bmt_setfreq(&g_bmt_tdat)) {
  307. bmt_printf(_T("<< start >> rtc is not supported.\n"));
  308. } else {
  309. bmt_printf(_T("<< start >> rtc , %.0fticks/sec\n"),g_bmt_tdat.i.freq);
  310. run_benchmark();
  311. }
  312. bmt_printf(_T("<< end >>\n"));
  313. #elif BMT_TIMER_MODE == BMT_TSC
  314. bmt_printf(_T("<< start >> tsc\n"));
  315. run_benchmark();
  316. bmt_printf(_T("<< end >>\n"));
  317. #endif
  318.  
  319. #if defined(_MSC_VER)
  320. bmt_printf(_T("Press Enter to quit."));
  321. getchar();
  322. #endif
  323. return 0;
  324. }
  325.  
  326. /*************************************
  327.   ここから
  328. *************************************/
  329. #include <stdint.h>
  330.  
  331. struct BUF {
  332. int volatile one;
  333. int dmy1[(128/sizeof(int))-1];
  334. int volatile zero;
  335. int dmy2[(128/sizeof(int))-1];
  336. };
  337.  
  338. #define N (1000)
  339. #define DO_ONE p->one = 1
  340. #define DO_ZERO p->zero = 0
  341.  
  342. /*------------------------------------
  343.   851
  344. ------------------------------------*/
  345. void BMT_NOINLINE func_851(struct BUF *p) {
  346. uint_fast16_t bits = 0x49;
  347.  
  348. BMT_TIMER_BEGIN
  349. // 全体で 13bit * N回 の処理
  350. int i;
  351. for(i = 0; i<(13*N); ++i) { // 13bit * N回
  352. if (bits & 1) {
  353. DO_ONE; // ビットが 1 の時の処理
  354. bits |= 1 << 13;
  355. } else {
  356. DO_ZERO; // ビットが 0 の時の処理
  357. }
  358. bits >>= 1;
  359. }
  360. BMT_TIMER_END(_T("851"))
  361. }
  362.  
  363. /*------------------------------------
  364.   864-1
  365. ------------------------------------*/
  366. void BMT_NOINLINE func_864_1(struct BUF *p) {
  367. uint_fast16_t const bits = 0x49; // ビット列(下位13ビットが有効)
  368.  
  369. BMT_TIMER_BEGIN
  370. // 全体で 13bit * N回 の処理
  371. int i;
  372. for(i = 0; i<N; ++i) { // N回
  373. uint_fast16_t mask;
  374. for(mask = 1; mask & 0x1fff; mask<<=1) { // 13bit分ループ
  375. if(bits & mask) {
  376. DO_ONE; // ビットが 1 の時の処理
  377. } else {
  378. DO_ZERO; // ビットが 0 の時の処理
  379. }
  380. }
  381. }
  382. BMT_TIMER_END(_T("864-1"))
  383. }
  384.  
  385. /*------------------------------------
  386.   864-2
  387. ------------------------------------*/
  388. void BMT_NOINLINE func_864_2(struct BUF *p) {
  389. uint_fast16_t const bits = 0x1240; // ビット列(下位13ビットが有効)
  390.  
  391. BMT_TIMER_BEGIN
  392. // 全体で 13bit * N回 の処理
  393. int i;
  394. for(i = 0; i<N; ++i) { // N回
  395. uint_fast16_t mask;
  396. for(mask = 0x1000; mask; mask>>=1) { // 13bit分ループ
  397. if(bits & mask) {
  398. DO_ONE; // ビットが 1 の時の処理
  399. } else {
  400. DO_ZERO; // ビットが 0 の時の処理
  401. }
  402. }
  403. }
  404. BMT_TIMER_END(_T("864-2"))
  405. }
  406.  
  407. /*************************************
  408.   run_benchmark
  409. *************************************/
  410. void BMT_NOINLINE run_benchmark() {
  411. struct BUF *p = (struct BUF*)bmt_aligned_malloc(sizeof(struct BUF), 4096);
  412. if(p) {
  413. func_864_2(p);
  414. func_851(p);
  415. func_864_1(p);
  416.  
  417. func_864_2(p);
  418. func_851(p);
  419. func_864_1(p);
  420.  
  421. bmt_aligned_free(p);
  422. }
  423. }
  424.  
Time limit exceeded #stdin #stdout 5s 2860KB
stdin
Standard input is empty
stdout
Standard output is empty