[nVIDIA CUDA]Determining blocksize and Gridsize for allocating device memory

Sun Mar 14 16:16:06 GMT 2010

hi,

I have started learning CUDA with no prior knowledge in Parallel
programming.
I have a basic doubt in determining blocksize and gridsize.

For example I need to multiply two single dimensioned arrays like this:

__global__ void fmultiply(float *A,float *B,float *C )
{
	int idx = blockIdx.x * blockDim.x + threadIdx.x;
	if(idx<10)
		C[idx] = A[idx]*B[idx];
}

int main(void)
{
	float *a_h, *b_h, *c_h;
	float *a_d, *b_d, *c_d;

	int N=10;
	size_t size = N*sizeof(float);

	cudaMalloc((void **)&a_d, size);
	cudaMalloc((void **)&b_d, size);
	cudaMalloc((void **)&c_d, size);

	a_h = (float *)malloc(size);
	b_h = (float *)malloc(size);
	c_h = (float *)malloc(size);

	for(int i = 0; i<N;i++)
		a_h[i] = i;

	for(int j =10;j<20;j++)
		b_h[j]=j;

	cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice);
	cudaMemcpy(b_d, b_h, size, cudaMemcpyHostToDevice);

        SIZE?????????????
	fmultiply<<<2,10>>>(a_d, b_d, c_d);

	cudaMemcpy(c_h, c_d, size, cudaMemcpyDeviceToHost);

	for(int i=0;i<N;i++)
		cout<<c_h[i]<<endl;

	cudaFree(a_d);
	cudaFree(b_d);
	cudaFree(c_d);

	free(a_h);
	free(b_h);
	free(c_h);
	return 0;	
}

and store it in the third array and Pass it the same to Host Array.
Need suggestions.

Regs,

ABHISHEK DEY

-- 
View this message in context: http://old.nabble.com/-nVIDIA-CUDA-Determining-blocksize-and-Gridsize-for-allocating-device-memory-tp27895980p27895980.html
Sent from the ubuntu-devel mailing list archive at Nabble.com.