/*
適当にmain.cppにコピペしてコンパイル
処理は下の方
*/
/*************************************
簡易ベンチマーク (BMT_/bmt_) (途中)
(.c/.cpp/Windows/Linux/x86/AMD64)
*************************************/
#define BMT_STD 0
#define BMT_RTC 1
#define BMT_TSC 2
/*
BMT_TIMER_MODE
0 or BMT_STD = 標準クロック
1 or BMT_RTC = リアルタイムクロック(-lrt)
2 or BMT_TSC = タイムスタンプカウンタ
*/
#define BMT_TIMER_MODE 2
/*
計測回数(BMT_MAXTRY)と処理回数(BMT_MAXLOOP)
< 擬似コード >
for(BMT_MAXTRY) {
計測開始
for(BMT_MAXLOOP) {
処理
}
計測終了&ベストタイム更新
}
結果が安定するまで適当に変更する
*/
#if BMT_TIMER_MODE == BMT_STD
#define BMT_MAXTRY 1
#define BMT_MAXLOOP 1
#elif BMT_TIMER_MODE == BMT_RTC
#define BMT_MAXTRY 1
#define BMT_MAXLOOP 1
#elif BMT_TIMER_MODE == BMT_TSC
#define BMT_MAXTRY 100
#define BMT_MAXLOOP 1024
#endif
/*
BMT_USE_THREAD
0 = スレッドを使わない
1 = スレッドを使う(-lpthread)
*/
#define BMT_USE_THREAD 0
/*==================================*/
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <limits.h>
#include <memory.h>
#include <malloc.h>
#include <time.h>
#if defined(_WIN32)
#if !defined(NOMINMAX)
#define NOMINMAX 1
#endif
#include <windows.h>
#include <tchar.h>
#if defined(__GNUC__)
#define BMT_INLINE static inline
#define BMT_NOINLINE __attribute__((noinline))
#define BMT_ALIGNED(x) __attribute__((aligned(x)))
#else
#define BMT_INLINE __inline
#define BMT_NOINLINE __declspec(noinline)
#define BMT_ALIGNED(x) __declspec(align(x))
#endif
#else /* Linux etc.. */
#define _tmain main
#define _T(x) x
#define _TCHAR char
#define BMT_INLINE static inline
#define BMT_NOINLINE __attribute__((noinline))
#define BMT_ALIGNED(x) __attribute__((aligned(x)))
#endif
/* printf */
void bmt_printf(_TCHAR const *fmt, ...) {
va_list ap; va_start(ap,fmt);
#if defined(_WIN32) && (defined(UNICODE)||defined(_UNICODE))
vwprintf(fmt,ap);
#else
vprintf(fmt,ap);
#endif
#if defined(__MINGW32__) || defined(__MINGW64__)
fflush(stdout);
#endif
}
/* timer */
#if BMT_TIMER_MODE == BMT_STD
typedef unsigned long bmt_nt; /* normalized type */
typedef clock_t bmt_rt; /* raw type */
#elif BMT_TIMER_MODE == BMT_RTC
typedef unsigned long long bmt_nt;
#if defined(_WIN32)
typedef LARGE_INTEGER bmt_rt;
#else
typedef struct timespec bmt_rt;
#endif
#elif BMT_TIMER_MODE == BMT_TSC
typedef unsigned long long bmt_nt;
typedef unsigned long long bmt_rt;
#endif
typedef struct { struct BMT_TIMER_INNER {
double freq;
bmt_rt st,et;
bmt_nt elp,best;
int lc,tc;
} i; char pad[64-sizeof(struct BMT_TIMER_INNER)];
} bmt_timer_t;
#if BMT_TIMER_MODE == BMT_STD
BMT_INLINE bmt_nt bmt_norm(bmt_rt *t) { return (bmt_nt)(*t); }
BMT_INLINE void bmt_setfreq(bmt_timer_t *t)
{ t->i.freq=(double)CLOCKS_PER_SEC; }
BMT_INLINE void bmt_setmax(bmt_timer_t *t) { t->i.best = ULONG_MAX; }
BMT_INLINE void bmt_now(bmt_rt *t) { *t = clock(); }
BMT_INLINE void bmt_result(_TCHAR const *title, bmt_timer_t *t) {
bmt_printf(_T("%16s -> %10.6f(s) %.0f(t)\n"),title
,((double)t->i.best) / t->i.freq,(double)t->i.best);
}
#elif BMT_TIMER_MODE == BMT_RTC
#if defined(_WIN32)
BMT_INLINE bmt_nt bmt_norm(bmt_rt *t) { return (bmt_nt)(t->QuadPart); }
BMT_INLINE int bmt_setfreq(bmt_timer_t *t) {
int r; bmt_rt rt;
r = (int)QueryPerformanceFrequency(&rt);
return r?(t->i.freq=(double)bmt_norm(&rt),1):0;
}
BMT_INLINE void bmt_now(bmt_rt *t) { QueryPerformanceCounter(t); }
#else
#define BMT_NS (1000000000ull)
BMT_INLINE bmt_nt bmt_norm(bmt_rt *t)
{ return (bmt_nt)(t->tv_nsec)+((bmt_nt)(t->tv_sec)*BMT_NS); }
BMT_INLINE int bmt_setfreq(bmt_timer_t *t) {
int r; bmt_rt rt;
r = clock_getres(CLOCK_REALTIME,&rt);
return r?0:(t->i.freq=(double)BMT_NS/(double)bmt_norm(&rt),1);
}
BMT_INLINE void bmt_now(bmt_rt *t) { clock_gettime(CLOCK_REALTIME,t); }
#endif
BMT_INLINE void bmt_setmax(bmt_timer_t *t) { t->i.best = 0xffffffffffffffffull /*ULLONG_MAX*/; }
BMT_INLINE void bmt_result(_TCHAR const *title, bmt_timer_t *t) {
bmt_printf(_T("%16s -> %10.6f(s) %.0f(t)\n"),title
,((double)t->i.best) / t->i.freq,(double)t->i.best);
}
#elif BMT_TIMER_MODE == BMT_TSC
#define bmt_norm(x) (*(x))
BMT_INLINE void bmt_setmax(bmt_timer_t *t) { t->i.best = 0xffffffffffffffffull /*ULLONG_MAX*/; }
#if defined(_MSC_VER) && defined(_WIN32)
BMT_NOINLINE void bmt_now(bmt_rt *t) { *t = (bmt_rt)__rdtsc(); }
#else
BMT_NOINLINE void bmt_now(bmt_rt *t) {
unsigned int low, high;
__asm__ volatile("rdtsc" : "=a"(low), "=d"(high));
*t = (bmt_rt)(high)<<32|low;
}
#endif
BMT_INLINE void bmt_result(_TCHAR const *title, bmt_timer_t *t)
{ bmt_printf(_T("%16s -> %.0f(t)\n"),title,(double)t->i.best); }
#endif
BMT_INLINE void bmt_end(bmt_timer_t *t) {
bmt_now(&t->i.et);
t->i.elp = bmt_norm(&t->i.et)-bmt_norm(&t->i.st);
if(t->i.elp<t->i.best) t->i.best = t->i.elp;
}
#define BMT_TIMER_BEGIN {\
bmt_setmax(&g_bmt_tdat);\
for(g_bmt_tdat.i.tc=BMT_MAXTRY;g_bmt_tdat.i.tc>0;--g_bmt_tdat.i.tc) {\
int bmt_lc = BMT_MAXLOOP;\
for(bmt_now(&g_bmt_tdat.i.st);bmt_lc>0;--bmt_lc) {
#define BMT_TIMER_END(title)\
} bmt_end(&g_bmt_tdat);\
} bmt_result(title, &g_bmt_tdat);\
}
static BMT_ALIGNED(64) bmt_timer_t g_bmt_tdat;
/* aligned malloc/calloc/free */
void *bmt_aligned_malloc(size_t const size,size_t const align/* 2**n */) {
size_t ap = (size_t)NULL;
size_t np = (size_t)malloc(size+sizeof(size_t)+align-1);
if(np) {
ap = ((np+sizeof(size_t)+align-1)&~(align-1));
*(size_t*)(ap-sizeof(size_t)) = np;
}
return (void*)ap;
}
void *bmt_aligned_calloc(size_t size,size_t const align/* 2**n */) {
size_t ap = (size_t)bmt_aligned_malloc(size,align);
if(ap) {
char volatile *p = (char volatile*)ap;
while(size) { *p++ = 0; size--; }
}
return (void*)ap;
}
void bmt_aligned_free(void *p)
{ if(p) free((void*)*(size_t*)((size_t)p-sizeof(size_t))); }
#if BMT_USE_THREAD
#if defined(_WIN32)
#include <intrin.h>
#include <process.h>
typedef HANDLE bmt_thread_t;
#else
#include <xmmintrin.h>
#include <pthread.h>
typedef pthread_t bmt_thread_t;
#endif
/* atomic */
#if defined(__GNUC__)
BMT_INLINE int bmt_exchg(int volatile *pval,int newval)
{ return (int)__sync_lock_test_and_set(pval,newval); }
BMT_INLINE int bmt_exchg_if(int volatile *pval,int newval,int oldval)
{ return (int)__sync_val_compare_and_swap(pval,oldval,newval); }
#elif defined(_WIN32)
BMT_INLINE int bmt_exchg(int volatile *pval,int newval)
{ return (int)_InterlockedExchange((long*)pval,newval); }
BMT_INLINE int bmt_exchg_if(int volatile *pval,int newval,int oldval)
{ return (int)_InterlockedCompareExchange((long*)pval,newval,oldval); }
#endif
/* spin lock */
typedef enum { BMT_UNLOCKED=0, BMT_LOCKED=1 } bmt_stat_t;
BMT_INLINE void bmt_spin_init(int volatile *p,bmt_stat_t s)
{ bmt_exchg(p,s); }
BMT_INLINE void bmt_spin_wait(int volatile *p)
{ do { _mm_pause(); } while(*p); }
BMT_INLINE void bmt_spin_lock(int volatile *p) {
if(bmt_exchg(p,BMT_LOCKED))
do { bmt_spin_wait(p); } while(bmt_exchg(p,BMT_LOCKED));
}
BMT_INLINE void bmt_spin_unlock(int volatile *p) { bmt_exchg(p,BMT_UNLOCKED); }
/* thread */
typedef struct {
void (*func)(void*);
void *arg;
int *lock;
} bmt_thread_data;
#if defined(_WIN32)
unsigned __stdcall bmt_first(void *arg) {
bmt_thread_data td = *(bmt_thread_data*)arg;
bmt_spin_unlock(td.lock);
(td.func)(td.arg);
return 0;
}
int bmt_create(bmt_thread_t *th,void (*func)(void*),void *arg) {
int lock;
bmt_thread_data td;
td.func = func;
td.arg = arg;
td.lock = &lock;
bmt_spin_init(td.lock,BMT_LOCKED);
*th = (bmt_thread_t)_beginthreadex(NULL,0,bmt_first,&td,0,NULL);
return *th?(bmt_spin_wait(td.lock),1):0;
}
void bmt_join(bmt_thread_t th) {
WaitForSingleObject(th,INFINITE);
CloseHandle(th);
}
#else
void *bmt_first(void *arg) {
bmt_thread_data td = *(bmt_thread_data*)arg;
bmt_spin_unlock(td.lock);
(td.func)(td.arg);
return 0;
}
int bmt_create(bmt_thread_t *th,void (*func)(void*),void *arg) {
int lock;
bmt_thread_data td;
td.func = func;
td.arg = arg;
td.lock = &lock;
bmt_spin_init(td.lock,BMT_LOCKED);
return pthread_create(th,NULL,bmt_first,&td)?0:(bmt_spin_wait(td.lock),1);
}
void bmt_join(BMT_THT th) { pthread_join(th,NULL); }
#endif
#endif /* BMT_USE_THREAD */
/* main */
void BMT_NOINLINE run_benchmark();
int _tmain() {
#if BMT_TIMER_MODE == BMT_STD
bmt_setfreq(&g_bmt_tdat);
bmt_printf(_T("<< start >> clock , %.0fticks/sec\n"),g_bmt_tdat.i.freq);
run_benchmark();
bmt_printf(_T("<< end >>\n"));
#elif BMT_TIMER_MODE == BMT_RTC
if(!bmt_setfreq(&g_bmt_tdat)) {
bmt_printf(_T("<< start >> rtc is not supported.\n"));
} else {
bmt_printf(_T("<< start >> rtc , %.0fticks/sec\n"),g_bmt_tdat.i.freq);
run_benchmark();
}
bmt_printf(_T("<< end >>\n"));
#elif BMT_TIMER_MODE == BMT_TSC
bmt_printf(_T("<< start >> tsc\n"));
run_benchmark();
bmt_printf(_T("<< end >>\n"));
#endif
#if defined(_MSC_VER)
bmt_printf(_T("Press Enter to quit."));
getchar();
#endif
return 0;
}
/*************************************
ここから
*************************************/
#include <stdint.h>
struct BUF {
int volatile one;
int dmy1[(128/sizeof(int))-1];
int volatile zero;
int dmy2[(128/sizeof(int))-1];
};
#define N (1000)
#define DO_ONE p->one = 1
#define DO_ZERO p->zero = 0
/*------------------------------------
851
------------------------------------*/
void BMT_NOINLINE func_851(struct BUF *p) {
uint_fast16_t bits = 0x49;
BMT_TIMER_BEGIN
// 全体で 13bit * N回 の処理
int i;
for(i = 0; i<(13*N); ++i) { // 13bit * N回
if (bits & 1) {
DO_ONE; // ビットが 1 の時の処理
bits |= 1 << 13;
} else {
DO_ZERO; // ビットが 0 の時の処理
}
bits >>= 1;
}
BMT_TIMER_END(_T("851"))
}
/*------------------------------------
864-1
------------------------------------*/
void BMT_NOINLINE func_864_1(struct BUF *p) {
uint_fast16_t const bits = 0x49; // ビット列(下位13ビットが有効)
BMT_TIMER_BEGIN
// 全体で 13bit * N回 の処理
int i;
for(i = 0; i<N; ++i) { // N回
uint_fast16_t mask;
for(mask = 1; mask & 0x1fff; mask<<=1) { // 13bit分ループ
if(bits & mask) {
DO_ONE; // ビットが 1 の時の処理
} else {
DO_ZERO; // ビットが 0 の時の処理
}
}
}
BMT_TIMER_END(_T("864-1"))
}
/*------------------------------------
864-2
------------------------------------*/
void BMT_NOINLINE func_864_2(struct BUF *p) {
uint_fast16_t const bits = 0x1240; // ビット列(下位13ビットが有効)
BMT_TIMER_BEGIN
// 全体で 13bit * N回 の処理
int i;
for(i = 0; i<N; ++i) { // N回
uint_fast16_t mask;
for(mask = 0x1000; mask; mask>>=1) { // 13bit分ループ
if(bits & mask) {
DO_ONE; // ビットが 1 の時の処理
} else {
DO_ZERO; // ビットが 0 の時の処理
}
}
}
BMT_TIMER_END(_T("864-2"))
}
/*************************************
run_benchmark
*************************************/
void BMT_NOINLINE run_benchmark() {
struct BUF *p = (struct BUF*)bmt_aligned_malloc(sizeof(struct BUF), 4096);
if(p) {
func_864_2(p);
func_851(p);
func_864_1(p);
func_864_2(p);
func_851(p);
func_864_1(p);
bmt_aligned_free(p);
}
}