# [nVIDIA CUDA]Determining blocksize and Gridsize for allocating device memory

OpenGeek abhishek.s.dey at live.com
Sun Mar 14 16:16:06 GMT 2010

```hi,

I have started learning CUDA with no prior knowledge in Parallel
programming.
I have a basic doubt in determining blocksize and gridsize.

For example I need to multiply two single dimensioned arrays like this:

__global__ void fmultiply(float *A,float *B,float *C )
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if(idx<10)
C[idx] = A[idx]*B[idx];
}

int main(void)
{
float *a_h, *b_h, *c_h;
float *a_d, *b_d, *c_d;

int N=10;
size_t size = N*sizeof(float);

cudaMalloc((void **)&a_d, size);
cudaMalloc((void **)&b_d, size);
cudaMalloc((void **)&c_d, size);

a_h = (float *)malloc(size);
b_h = (float *)malloc(size);
c_h = (float *)malloc(size);

for(int i = 0; i<N;i++)
a_h[i] = i;

for(int j =10;j<20;j++)
b_h[j]=j;

cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice);
cudaMemcpy(b_d, b_h, size, cudaMemcpyHostToDevice);

SIZE?????????????
fmultiply<<<2,10>>>(a_d, b_d, c_d);

cudaMemcpy(c_h, c_d, size, cudaMemcpyDeviceToHost);

for(int i=0;i<N;i++)
cout<<c_h[i]<<endl;

cudaFree(a_d);
cudaFree(b_d);
cudaFree(c_d);

free(a_h);
free(b_h);
free(c_h);
return 0;
}

and store it in the third array and Pass it the same to Host Array.
Need suggestions.

Regs,

ABHISHEK DEY

--
View this message in context: http://old.nabble.com/-nVIDIA-CUDA-Determining-blocksize-and-Gridsize-for-allocating-device-memory-tp27895980p27895980.html
Sent from the ubuntu-devel mailing list archive at Nabble.com.

```