fork download
  1. make
  2. mkdir -p build
  3. /usr/local/cuda/bin/nvcc -c -I/usr/local/cuda/include *.cu
  4. nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
  5. ptxas /var/folders/8v/1gw9hxtx40qdp2slztw3whq80000gn/T//tmpxft_00012625_00000000-5_CUDA_ConvNN.ptx, line 745; warning : Double is not supported. Demoting to float
  6. clang++ -std=c++11 -c -m32 -I/usr/local/cuda/include *.cpp
  7. /usr/local/cuda/bin/nvcc -m32 -L/usr/local/cuda/lib -lcuda -lcudart -lm -o build/main *.o
  8. nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
  9. clang: error: unknown argument: '-malign-double' [-Wunused-command-line-argument-hard-error-in-future]
  10. clang: note: this will be a hard error (cannot be downgraded to a warning) in the future
  11. make: *** [build] Error 1
  12.  
  13. here the makefile:
  14. ------------------
  15. ------------------
  16. PROJECT_NAME = main
  17.  
  18. # NVCC is path to nvcc. Here it is assumed /usr/local/cuda is on one's PATH.
  19. # CC is the compiler for C++ host code.
  20.  
  21. NVCC = /usr/local/cuda/bin/nvcc
  22. CC = clang++ -std=c++11
  23.  
  24. CUDAPATH = /usr/local/cuda
  25. # Directories to search for
  26. #CUDA_ROOT = /usr/local/cuda
  27. #SDK_HOME = /Developer/NVIDIA/CUDA-6.0/C
  28.  
  29. BUILD_DIR = build
  30. # note that nvcc defaults to 32-bit architecture. thus, force C/LFLAGS to comply.
  31. # you could also force nvcc to compile 64-bit with -m64 flag. (and remove -m32 instances)
  32.  
  33. CFLAGS = -c -m32 -I$(CUDAPATH)/include
  34. NVCCFLAGS = -c -I$(CUDAPATH)/include
  35. LFLAGS = -m32 -L$(CUDAPATH)/lib -lcuda -lcudart -lm
  36.  
  37. all: build clean
  38.  
  39. build: build_dir gpu cpu
  40. $(NVCC) $(LFLAGS) -o $(BUILD_DIR)/$(PROJECT_NAME) *.o
  41.  
  42. build_dir:
  43. mkdir -p $(BUILD_DIR)
  44.  
  45. gpu:
  46. $(NVCC) $(NVCCFLAGS) *.cu
  47.  
  48. cpu:
  49. $(CC) $(CFLAGS) *.cpp
  50.  
  51. clean:
  52. rm *.o
  53.  
  54. run:
  55. ./$(BUILD_DIR)/$(PROJECT_NAME)
  56.  
  57.  
  58.  
  59.  
  60.  
  61. #include<iostream>
  62. #include<time.h>
  63. #include<omp.h>
  64. #include<cstdlib>
  65.  
  66. using namespace std;
  67.  
  68. int main(){
  69. clock_t start, finish;
  70. int array[100000];
  71.  
  72. cout << "\n*********Parallel Execution**********\n";
  73.  
  74. int n = 100000;
  75. for(int i = 0; i < n; i++){
  76. array[i] = rand()%10;
  77. }
  78.  
  79. int min = 9999;
  80. int max = -9999;
  81. int min_index;
  82. int max_index;
  83. float sum = 0;
  84.  
  85. double start_parallel = omp_get_wtime( );
  86. #pragma omp parallel for
  87. for(int i = 0; i < n; i++){
  88. if(array[i] < min){
  89. min = array[i];
  90. min_index = i;
  91. }
  92. if(array[i] > max){
  93. max = array[i];
  94. max_index = i;
  95. }
  96. sum = sum + array[i];
  97. }
  98.  
  99. double finish_parallel = omp_get_wtime( );
  100. cout << "\nmin is :" << min;
  101. cout << "\nmax is :" << max;
  102. cout << "\nsum is :" << sum;
  103. cout << "\navg is :" << sum/n;
  104.  
  105.  
  106. cout << "\ntotal time taken " << finish_parallel - start_parallel << " sec\n";
  107.  
  108. cout << "\n*********Sequential Execution**********\n";
  109.  
  110. min = 9999;
  111. max = -9999;
  112. min_index;
  113. max_index;
  114. sum = 0;
  115.  
  116. start = clock();
  117. for(int i = 0; i < n; i++){
  118. if(array[i] < min){
  119. min = array[i];
  120. min_index = i;
  121. }
  122. if(array[i] > max){
  123. max = array[i];
  124. max_index = i;
  125. }
  126. sum = sum + array[i];
  127. }
  128.  
  129. finish = clock();
  130. cout << "\nmin is :" << min;
  131. cout << "\nmax is :" << max;
  132. cout << "\nsum is :" << sum;
  133. cout << "\navg is :" << sum/n;
  134.  
  135.  
  136. cout << "\ntotal time taken " << finish - start << " sec\n\n";
  137.  
  138.  
  139. cout << "Execution time of Parallel is " << (finish - start) / (finish_parallel - start_parallel) << " times faster than Sequential Processing \n\n";
  140.  
  141. return 0;
  142. }
  143.  
Not running #stdin #stdout 0s 0KB
stdin
Standard input is empty
stdout
Standard output is empty