I2luY2x1ZGUgJmx0O3N0ZGlvLmgmZ3Q7CiNkZWZpbmUgU0laRQkxMDI0CgpfX2dsb2JhbF9fIHZvaWQgVmVjdG9yQWRkKGludCAqYSwgaW50ICpiLCBpbnQgKmMsIGludCBuKQp7CglpbnQgaT10aHJlYWRJZHgueDsKCglpZiAoaSAmbHQ7IG4pCgkJY1tpXSA9IGFbaV0gKyBiW2ldOwp9CgppbnQgbWFpbigpCnsKCWludCAqYSwgKmIsICpjOwoKCWN1ZGFNYWxsb2NNYW5hZ2VkKCZhbXA7YSwgU0laRSAqIHNpemVvZihpbnQpKTsKCWN1ZGFNYWxsb2NNYW5hZ2VkKCZhbXA7YiwgU0laRSAqIHNpemVvZihpbnQpKTsKCWN1ZGFNYWxsb2NNYW5hZ2VkKCZhbXA7YywgU0laRSAqIHNpemVvZihpbnQpKTsKCglmb3IgKGludCBpID0gMDsgaSAmbHQ7IFNJWkU7ICsraSkKCXsKCQlhW2ldID0gaTsKCQliW2ldID0gaTsKCQljW2ldID0gMDsKCX0KCglWZWN0b3JBZGQgJmx0OyZsdDsmbHQ7MSwgU0laRSZndDsmZ3Q7Jmd0OyAoYSwgYiwgYywgU0laRSk7CgoJY3VkYURldmljZVN5bmNocm9uaXplKCk7CgoJZm9yIChpbnQgaSA9IDA7IGkgJmx0OyAxMDsgKytpKQoJCXByaW50ZigmcXVvdDtjWyVkXSA9ICVkXG4mcXVvdDssIGksIGNbaV0pOwoKCWN1ZGFGcmVlKGEpOwoJY3VkYUZyZWUoYik7CgljdWRhRnJlZShjKTsKCglyZXR1cm4gMDsKfQ==
#include <stdio.h>
#define SIZE 1024
__global__ void VectorAdd(int *a, int *b, int *c, int n)
{
int i=threadIdx.x;
if (i < n)
c[i] = a[i] + b[i];
}
int main()
{
int *a, *b, *c;
cudaMallocManaged(&a, SIZE * sizeof(int));
cudaMallocManaged(&b, SIZE * sizeof(int));
cudaMallocManaged(&c, SIZE * sizeof(int));
for (int i = 0; i < SIZE; ++i)
{
a[i] = i;
b[i] = i;
c[i] = 0;
}
VectorAdd <<<1, SIZE>>> (a, b, c, SIZE);
cudaDeviceSynchronize();
for (int i = 0; i < 10; ++i)
printf("c[%d] = %d\n", i, c[i]);
cudaFree(a);
cudaFree(b);
cudaFree(c);
return 0;
}