#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#define BLOCK_DIM 2
#define GRID_DIM 2
#define STEPS 4
__global__ void addKernel(int *output,int *input,int paths,int steps)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int ndx = blockIdx.x * blockDim.x + threadIdx.x * steps;
int n = 0;
int sum = 0;
if (idx < paths)
{
for (int i = ndx; i < ndx+steps; i++)
{
sum += input[i];
}
output[idx] = sum;
}
}
int main()
{
const int Data_size = BLOCK_DIM * GRID_DIM * STEPS;
const int PATHS = BLOCK_DIM * GRID_DIM;
int h_input[Data_size];
int h_output[PATHS] = { 0 };
for (int i = 0; i < Data_size; i++)
{
h_input[i] = i;
}
cudaSetDevice(0);
int *d_input;
int *d_output;
cudaMalloc((void**)&d_input, sizeof(int) * Data_size);
cudaMalloc((void**)&d_output, sizeof(int) * PATHS);
cudaMemcpy(d_input, h_input, sizeof(int) * Data_size, cudaMemcpyHostToDevice);
cudaMemset(d_output, 0, sizeof(int)* PATHS);
addKernel <<<GRID_DIM, BLOCK_DIM,0 >>>(d_output, d_input,PATHS, STEPS);
cudaDeviceSynchronize();
cudaMemcpy(h_output, d_output, sizeof(int) * PATHS, cudaMemcpyDeviceToHost);
for (int i = 0; i < PATHS; i++)
{
int k = i*STEPS;
printf("Output[%d]={%d+%d+%d+%d}=%d.\n",i
,k
,k
+1,k
+2,k
+3,h_output
[i
]); }
cudaFree(d_input);
cudaFree(d_output);
cudaDeviceReset();
return 0;
}
CiNpbmNsdWRlICJjdWRhX3J1bnRpbWUuaCIKI2luY2x1ZGUgImRldmljZV9sYXVuY2hfcGFyYW1ldGVycy5oIgoKI2luY2x1ZGUgPHN0ZGlvLmg+CgojZGVmaW5lIEJMT0NLX0RJTSAyCiNkZWZpbmUgR1JJRF9ESU0gMgojZGVmaW5lIFNURVBTIDQKCl9fZ2xvYmFsX18gdm9pZCBhZGRLZXJuZWwoaW50ICpvdXRwdXQsaW50ICppbnB1dCxpbnQgcGF0aHMsaW50IHN0ZXBzKQp7CglpbnQgaWR4ID0gYmxvY2tJZHgueCAqIGJsb2NrRGltLnggKyB0aHJlYWRJZHgueDsKCWludCBuZHggPSBibG9ja0lkeC54ICogYmxvY2tEaW0ueCArIHRocmVhZElkeC54ICogc3RlcHM7CglpbnQgbiA9IDA7CglpbnQgc3VtID0gMDsKCWlmIChpZHggPCBwYXRocykKCXsKCQlmb3IgKGludCBpID0gbmR4OyBpIDwgbmR4K3N0ZXBzOyBpKyspCgkJewoJCQlzdW0gKz0gaW5wdXRbaV07CgkJfQoJCW91dHB1dFtpZHhdID0gc3VtOwoJfQp9CgppbnQgbWFpbigpCnsKCWNvbnN0IGludCBEYXRhX3NpemUgPSBCTE9DS19ESU0gKiBHUklEX0RJTSAqIFNURVBTOwoJY29uc3QgaW50IFBBVEhTID0gQkxPQ0tfRElNICogR1JJRF9ESU07CiAgICBpbnQgaF9pbnB1dFtEYXRhX3NpemVdOwoJaW50IGhfb3V0cHV0W1BBVEhTXSA9IHsgMCB9OwoKCWZvciAoaW50IGkgPSAwOyBpIDwgRGF0YV9zaXplOyBpKyspCgl7CgkJaF9pbnB1dFtpXSA9IGk7Cgl9CgoJY3VkYVNldERldmljZSgwKTsKCWludCAqZF9pbnB1dDsKCWludCAqZF9vdXRwdXQ7CgoJY3VkYU1hbGxvYygodm9pZCoqKSZkX2lucHV0LCBzaXplb2YoaW50KSAqIERhdGFfc2l6ZSk7CgljdWRhTWFsbG9jKCh2b2lkKiopJmRfb3V0cHV0LCBzaXplb2YoaW50KSAqIFBBVEhTKTsKCgljdWRhTWVtY3B5KGRfaW5wdXQsIGhfaW5wdXQsIHNpemVvZihpbnQpICogRGF0YV9zaXplLCBjdWRhTWVtY3B5SG9zdFRvRGV2aWNlKTsKCWN1ZGFNZW1zZXQoZF9vdXRwdXQsIDAsIHNpemVvZihpbnQpKiBQQVRIUyk7CgoJYWRkS2VybmVsIDw8PEdSSURfRElNLCBCTE9DS19ESU0sMCA+Pj4oZF9vdXRwdXQsIGRfaW5wdXQsUEFUSFMsIFNURVBTKTsKCWN1ZGFEZXZpY2VTeW5jaHJvbml6ZSgpOwoKCWN1ZGFNZW1jcHkoaF9vdXRwdXQsIGRfb3V0cHV0LCBzaXplb2YoaW50KSAqIFBBVEhTLCBjdWRhTWVtY3B5RGV2aWNlVG9Ib3N0KTsKCglmb3IgKGludCBpID0gMDsgaSA8IFBBVEhTOyBpKyspCgl7CgkJaW50IGsgPSBpKlNURVBTOwoJCXByaW50ZigiT3V0cHV0WyVkXT17JWQrJWQrJWQrJWR9PSVkLlxuIixpLGssaysxLGsrMixrKzMsaF9vdXRwdXRbaV0pOwoJfQoKCWN1ZGFGcmVlKGRfaW5wdXQpOwoJY3VkYUZyZWUoZF9vdXRwdXQpOwogICAgY3VkYURldmljZVJlc2V0KCk7CiAgICByZXR1cm4gMDsKfQo=