13 #include "../include/mycuda.h"
15 using namespace mycuda;
20 const int gridsize = 32;
23 cudaSetDeviceFlags( cudaDeviceMapHost );
25 float *x = mapped_malloc<float>(
n);
26 float *y = mapped_malloc<float>(
n);
27 float *z = mapped_malloc<float>(
n);
29 for (
int i=0; i<
n; i++) {
36 mycuda::aX_plus_bY <<< gridsize, blocksize >>> ( z, a, x, b, y,
n );
41 for (
int i=0; i<50; i++) printf(
"%8.2f = %8.2f * %8.2f + %8.2f * %8.2f\n",
42 z[i], a, x[i], b, y[i] );
46 for (
int i=0;i<
n;i++) assert( fabs( z[i] - (a*x[i] + b*y[i]) ) < 0.00001 );