#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#define BLOCK_DIM 4
#define GRID_DIM 1
#define STEPS 4
__global__ void addKernel(int *output,int *input,int steps)
{
int idx = threadIdx.x + blockIdx.x * blockDim.x;
int ndx = threadIdx.x*steps + blockIdx.x * blockDim.x;
int n = 0;
if (idx < BLOCK_DIM * GRID_DIM)
{
for (int i = ndx; i < ndx+steps; i++)
{
output[idx] += input[i];
}
}
}
int main()
{
const int Data_size = BLOCK_DIM * GRID_DIM * STEPS;
const int PATHS = BLOCK_DIM * GRID_DIM;
int h_input[Data_size];
int h_output[PATHS] = { 0 };;
for (int i = 0; i < Data_size; i++)
{
h_input[i] = i;
}
cudaSetDevice(0);
int *d_input;
int *d_output;
cudaMalloc((void**)&d_input, sizeof(int) * Data_size);
cudaMalloc((void**)&d_output, sizeof(int) * PATHS);
cudaMemcpy(d_input, h_input, sizeof(int) * Data_size, cudaMemcpyHostToDevice);
addKernel <<<GRID_DIM, BLOCK_DIM,0 >>>(d_output, d_input, STEPS);
cudaDeviceSynchronize();
cudaMemcpy(h_output, d_output, sizeof(int) * PATHS, cudaMemcpyDeviceToHost);
for (int i = 0; i < PATHS; i++)
{
int k = i*STEPS;
printf("Output[%d]={%d+%d+%d+%d}=%d.\n",i
,k
,k
+1,k
+2,k
+3,h_output
[i
]); }
cudaFree(d_input);
cudaFree(d_output);
cudaDeviceReset();
return 0;
}
I2luY2x1ZGUgImN1ZGFfcnVudGltZS5oIgojaW5jbHVkZSAiZGV2aWNlX2xhdW5jaF9wYXJhbWV0ZXJzLmgiCgojaW5jbHVkZSA8c3RkaW8uaD4KCiNkZWZpbmUgQkxPQ0tfRElNIDQKI2RlZmluZSBHUklEX0RJTSAxCiNkZWZpbmUgU1RFUFMgNAoKX19nbG9iYWxfXyB2b2lkIGFkZEtlcm5lbChpbnQgKm91dHB1dCxpbnQgKmlucHV0LGludCBzdGVwcykKewogICAgaW50IGlkeCA9IHRocmVhZElkeC54ICsgYmxvY2tJZHgueCAqIGJsb2NrRGltLng7CglpbnQgbmR4ID0gdGhyZWFkSWR4Lngqc3RlcHMgKyBibG9ja0lkeC54ICogYmxvY2tEaW0ueDsKCWludCBuID0gMDsKCWlmIChpZHggPCBCTE9DS19ESU0gKiBHUklEX0RJTSkKCXsKCQlmb3IgKGludCBpID0gbmR4OyBpIDwgbmR4K3N0ZXBzOyBpKyspCgkJewoJCQlvdXRwdXRbaWR4XSArPSBpbnB1dFtpXTsKCQl9Cgl9Cn0KCmludCBtYWluKCkKewoJY29uc3QgaW50IERhdGFfc2l6ZSA9IEJMT0NLX0RJTSAqIEdSSURfRElNICogU1RFUFM7Cgljb25zdCBpbnQgUEFUSFMgPSBCTE9DS19ESU0gKiBHUklEX0RJTTsKICAgIGludCBoX2lucHV0W0RhdGFfc2l6ZV07CglpbnQgaF9vdXRwdXRbUEFUSFNdID0geyAwIH07OwoKCWZvciAoaW50IGkgPSAwOyBpIDwgRGF0YV9zaXplOyBpKyspCgl7CgkJaF9pbnB1dFtpXSA9IGk7Cgl9CgoJY3VkYVNldERldmljZSgwKTsKCWludCAqZF9pbnB1dDsKCWludCAqZF9vdXRwdXQ7CgoJY3VkYU1hbGxvYygodm9pZCoqKSZkX2lucHV0LCBzaXplb2YoaW50KSAqIERhdGFfc2l6ZSk7CgljdWRhTWFsbG9jKCh2b2lkKiopJmRfb3V0cHV0LCBzaXplb2YoaW50KSAqIFBBVEhTKTsKCgljdWRhTWVtY3B5KGRfaW5wdXQsIGhfaW5wdXQsIHNpemVvZihpbnQpICogRGF0YV9zaXplLCBjdWRhTWVtY3B5SG9zdFRvRGV2aWNlKTsKCglhZGRLZXJuZWwgPDw8R1JJRF9ESU0sIEJMT0NLX0RJTSwwID4+PihkX291dHB1dCwgZF9pbnB1dCwgU1RFUFMpOwoJY3VkYURldmljZVN5bmNocm9uaXplKCk7CgoJY3VkYU1lbWNweShoX291dHB1dCwgZF9vdXRwdXQsIHNpemVvZihpbnQpICogUEFUSFMsIGN1ZGFNZW1jcHlEZXZpY2VUb0hvc3QpOwoKCWZvciAoaW50IGkgPSAwOyBpIDwgUEFUSFM7IGkrKykKCXsKCQlpbnQgayA9IGkqU1RFUFM7CgkJcHJpbnRmKCJPdXRwdXRbJWRdPXslZCslZCslZCslZH09JWQuXG4iLGksayxrKzEsaysyLGsrMyxoX291dHB1dFtpXSk7Cgl9CgoJY3VkYUZyZWUoZF9pbnB1dCk7CgljdWRhRnJlZShkX291dHB1dCk7CiAgICBjdWRhRGV2aWNlUmVzZXQoKTsKICAgIHJldHVybiAwOwp9