make
mkdir -p build
/usr/local/cuda/bin/nvcc -c -I/usr/local/cuda/include *.cu
nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
ptxas /var/folders/8v/1gw9hxtx40qdp2slztw3whq80000gn/T//tmpxft_00012625_00000000-5_CUDA_ConvNN.ptx, line 745; warning : Double is not supported. Demoting to float
clang++ -std=c++11 -c -m32 -I/usr/local/cuda/include *.cpp
/usr/local/cuda/bin/nvcc -m32 -L/usr/local/cuda/lib -lcuda -lcudart -lm -o build/main *.o
nvcc warning : The 'compute_10' and 'sm_10' architectures are deprecated, and may be removed in a future release.
clang: error: unknown argument: '-malign-double' [-Wunused-command-line-argument-hard-error-in-future]
clang: note: this will be a hard error (cannot be downgraded to a warning) in the future
make: *** [build] Error 1
here the makefile:
------------------
------------------
PROJECT_NAME = main
# NVCC is path to nvcc. Here it is assumed /usr/local/cuda is on one's PATH.
# CC is the compiler for C++ host code.
NVCC = /usr/local/cuda/bin/nvcc
CC = clang++ -std=c++11
CUDAPATH = /usr/local/cuda
# Directories to search for
#CUDA_ROOT = /usr/local/cuda
#SDK_HOME = /Developer/NVIDIA/CUDA-6.0/C
BUILD_DIR = build
# note that nvcc defaults to 32-bit architecture. thus, force C/LFLAGS to comply.
# you could also force nvcc to compile 64-bit with -m64 flag. (and remove -m32 instances)
CFLAGS = -c -m32 -I$(CUDAPATH)/include
NVCCFLAGS = -c -I$(CUDAPATH)/include
LFLAGS = -m32 -L$(CUDAPATH)/lib -lcuda -lcudart -lm
all: build clean
build: build_dir gpu cpu
$(NVCC) $(LFLAGS) -o $(BUILD_DIR)/$(PROJECT_NAME) *.o
build_dir:
mkdir -p $(BUILD_DIR)
gpu:
$(NVCC) $(NVCCFLAGS) *.cu
cpu:
$(CC) $(CFLAGS) *.cpp
clean:
rm *.o
run:
./$(BUILD_DIR)/$(PROJECT_NAME)
#include<iostream>
#include<time.h>
#include<omp.h>
#include<cstdlib>
using namespace std;
int main(){
clock_t start, finish;
int array[100000];
cout << "\n*********Parallel Execution**********\n";
int n = 100000;
for(int i = 0; i < n; i++){
array[i] = rand()%10;
}
int min = 9999;
int max = -9999;
int min_index;
int max_index;
float sum = 0;
double start_parallel = omp_get_wtime( );
#pragma omp parallel for
for(int i = 0; i < n; i++){
if(array[i] < min){
min = array[i];
min_index = i;
}
if(array[i] > max){
max = array[i];
max_index = i;
}
sum = sum + array[i];
}
double finish_parallel = omp_get_wtime( );
cout << "\nmin is :" << min;
cout << "\nmax is :" << max;
cout << "\nsum is :" << sum;
cout << "\navg is :" << sum/n;
cout << "\ntotal time taken " << finish_parallel - start_parallel << " sec\n";
cout << "\n*********Sequential Execution**********\n";
min = 9999;
max = -9999;
min_index;
max_index;
sum = 0;
start = clock();
for(int i = 0; i < n; i++){
if(array[i] < min){
min = array[i];
min_index = i;
}
if(array[i] > max){
max = array[i];
max_index = i;
}
sum = sum + array[i];
}
finish = clock();
cout << "\nmin is :" << min;
cout << "\nmax is :" << max;
cout << "\nsum is :" << sum;
cout << "\navg is :" << sum/n;
cout << "\ntotal time taken " << finish - start << " sec\n\n";
cout << "Execution time of Parallel is " << (finish - start) / (finish_parallel - start_parallel) << " times faster than Sequential Processing \n\n";
return 0;
}