[nVIDIA CUDA]Determining blocksize and Gridsize for allocating device memory
OpenGeek
abhishek.s.dey at live.com
Sun Mar 14 16:16:06 GMT 2010
hi,
I have started learning CUDA with no prior knowledge in Parallel
programming.
I have a basic doubt in determining blocksize and gridsize.
For example I need to multiply two single dimensioned arrays like this:
__global__ void fmultiply(float *A,float *B,float *C )
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if(idx<10)
C[idx] = A[idx]*B[idx];
}
int main(void)
{
float *a_h, *b_h, *c_h;
float *a_d, *b_d, *c_d;
int N=10;
size_t size = N*sizeof(float);
cudaMalloc((void **)&a_d, size);
cudaMalloc((void **)&b_d, size);
cudaMalloc((void **)&c_d, size);
a_h = (float *)malloc(size);
b_h = (float *)malloc(size);
c_h = (float *)malloc(size);
for(int i = 0; i<N;i++)
a_h[i] = i;
for(int j =10;j<20;j++)
b_h[j]=j;
cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice);
cudaMemcpy(b_d, b_h, size, cudaMemcpyHostToDevice);
SIZE?????????????
fmultiply<<<2,10>>>(a_d, b_d, c_d);
cudaMemcpy(c_h, c_d, size, cudaMemcpyDeviceToHost);
for(int i=0;i<N;i++)
cout<<c_h[i]<<endl;
cudaFree(a_d);
cudaFree(b_d);
cudaFree(c_d);
free(a_h);
free(b_h);
free(c_h);
return 0;
}
and store it in the third array and Pass it the same to Host Array.
Need suggestions.
Regs,
ABHISHEK DEY
--
View this message in context: http://old.nabble.com/-nVIDIA-CUDA-Determining-blocksize-and-Gridsize-for-allocating-device-memory-tp27895980p27895980.html
Sent from the ubuntu-devel mailing list archive at Nabble.com.
More information about the ubuntu-devel
mailing list