GPU Workshop Sample Code
 All Data Structures Namespaces Files Functions Variables Macros Pages
vecAdd.cu
Go to the documentation of this file.
1 
4 
5 #include "../include/mycuda.h"
6 
7 using namespace mycuda;
8 
10 
11 __global__ void
12 vecAdd( float * z, float a, float * x, float b, float * y, int n ) {
13  int tid = blockIdx.x*blockDim.x + threadIdx.x;
14  int nthreads = blockDim.x*gridDim.x;
15  for (int i=tid; i<n; i+=nthreads) {
16  z[i] = a*x[i] + b*y[i];
17  }
18 }
19 
20 
21 
22 int main()
23 {
24  const int n = 8192;
25  const int gridsize = 32;
26  const int blocksize = 32;
27 
28 
30 // const int n1 = 1<<30;
31 // float *x_d;
32 // cudaMalloc( &x_d, n1*sizeof(float) );
33 // cudaMalloc( &x_d, n1*sizeof(float) ); CUDA_CHECK_ERROR();
34 // CUDA_SAFE_CALL( cudaMalloc( &x_d, n1*sizeof(float) ));
35 // x_d = device_malloc<float>(n1);
36 
37 
38  float *x_h = host_malloc<float>(n);
39  float *y_h = host_malloc<float>(n);
40  float *z_h = host_malloc<float>(n);
41 
42  float *x_d = device_malloc<float>(n);
43  float *y_d = device_malloc<float>(n);
44  float *z_d = device_malloc<float>(n);
45 
46  for (int i=0; i<n; i++) x_h[i] = i;
47  for (int i=0; i<n; i++) y_h[i] = 2*i;
48  float a=3.0f;
49  float b=4.0f;
50 
51  copy_host_to_device( x_d, x_h, n );
52  copy_host_to_device( y_d, y_h, n );
53  vecAdd <<< blocksize, gridsize >>> ( z_d, a, x_d, b, y_d, n );
54  copy_device_to_host( z_h, z_d, n );
55 
56 
57  // Show a few elements
58  for (int i=0; i<50; i++) printf( "%8.2f = %8.2f * %8.2f + %8.2f * %8.2f\n",
59  z_h[i], a, x_h[i], b, y_h[i] );
60 
61 
62  // Check results
63  for (int i=0;i<n;i++) assert( fabs( z_h[i] - (a*x_h[i] + b*y_h[i]) ) < 0.00001 );
64 
65  // Free memory
66  device_free( x_d );
67  device_free( y_d );
68  device_free( z_d );
69  host_free(x_h);
70  host_free(y_h);
71  host_free(z_h);
72 }
73 
74 
75 
76 
77