Ideone.com

fork download

copy

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
 
#include <stdio.h>
 
#define BLOCK_DIM 4
#define GRID_DIM 1
#define STEPS 4
 
__global__ void addKernel(int *output,int *input,int steps)
{
    int idx = threadIdx.x + blockIdx.x * blockDim.x;
	int ndx = threadIdx.x*steps + blockIdx.x * blockDim.x;
	int n = 0;
	if (idx < BLOCK_DIM * GRID_DIM)
	{
		for (int i = ndx; i < ndx+steps; i++)
		{
			output[idx] += input[i];
		}
	}
}
 
int main()
{
	const int Data_size = BLOCK_DIM * GRID_DIM * STEPS;
	const int PATHS = BLOCK_DIM * GRID_DIM;
    int h_input[Data_size];
	int h_output[PATHS] = { 0 };;
 
	for (int i = 0; i < Data_size; i++)
	{
		h_input[i] = i;
	}
 
	cudaSetDevice(0);
	int *d_input;
	int *d_output;
 
	cudaMalloc((void**)&d_input, sizeof(int) * Data_size);
	cudaMalloc((void**)&d_output, sizeof(int) * PATHS);
 
	cudaMemcpy(d_input, h_input, sizeof(int) * Data_size, cudaMemcpyHostToDevice);
 
	addKernel <<<GRID_DIM, BLOCK_DIM,0 >>>(d_output, d_input, STEPS);
	cudaDeviceSynchronize();
 
	cudaMemcpy(h_output, d_output, sizeof(int) * PATHS, cudaMemcpyDeviceToHost);
 
	for (int i = 0; i < PATHS; i++)
	{
		int k = i*STEPS;
		printf("Output[%d]={%d+%d+%d+%d}=%d.\n",i,k,k+1,k+2,k+3,h_output[i]);
	}
 
	cudaFree(d_input);
	cudaFree(d_output);
    cudaDeviceReset();
    return 0;
}

Compilation error #stdin compilation error #stdout 0s 0KB

stdin

copy

Standard input is empty

compilation info

prog.c:1:26: fatal error: cuda_runtime.h: No such file or directory
 #include "cuda_runtime.h"
                          ^
compilation terminated.

stdout

copy

Standard output is empty

https://ideone.com/RwJCBZ

language:

C (gcc 8.3)

created:

visibility:

secret

Share or Embed source code

Discover > Sphere Engine API

The brand new service which powers Ideone!

Discover > IDE Widget

Widget for compiling and running the source code in a web browser!

Discover > Sphere Engine API

Discover > IDE Widget

Choose your language