9 #include "../include/mycuda.h"
28 int tid = blockIdx.x*blockDim.x + threadIdx.x;
29 int nthreads = blockDim.x*gridDim.x;
30 for (
int i=tid; i<
n; i+=nthreads) {
39 const int gridsize=64;
41 float *x_ptr, *y_ptr, *z_ptr;
42 cudaGetSymbolAddress( (
void**)&x_ptr,
x_d );
43 cudaGetSymbolAddress( (
void**)&y_ptr,
y_d );
44 cudaGetSymbolAddress( (
void**)&z_ptr,
z_d );
46 mycuda::seq <<< gridsize, blocksize >>> ( x_ptr, 1.0, 2.0,
n );
47 mycuda::seq <<< gridsize, blocksize >>> ( y_ptr, 3.0,
n );
49 XplusY <<< gridsize, blocksize >>> ();
51 float *x_h = mycuda::host_malloc<float>(
n );
52 float *y_h = mycuda::host_malloc<float>(
n );
53 float *z_h = mycuda::host_malloc<float>(
n );
59 for (
int i=0;i<50;i++) printf(
"%8.2f + %8.2f = %8.2f\n", x_h[i], y_h[i], z_h[i]);