5 #include "../include/mycuda.h"
7 using namespace mycuda;
14 const int gridsize = 32;
18 float *x_h = host_malloc<float>(
n);
19 float *y_h = host_malloc<float>(
n);
20 float *z_h = host_malloc<float>(
n);
22 float *
x_d = device_malloc<float>(
n);
23 float *
y_d = device_malloc<float>(
n);
24 float *
z_d = device_malloc<float>(
n);
28 fill <<< gridsize, blocksize >>> (
x_d, 3.0f,
n );
29 seq <<< gridsize, blocksize >>> (
y_d, 5.0f, 2.0f );
34 exp_X <<< gridsize, blocksize >>> (
x_d,
x_d,
n );
35 aX_plus_bY <<< gridsize, blocksize >>> (
z_d, a,
x_d, b,
y_d,
n );
41 for (
int i=0; i<50; i++) printf(
"%8.2f = %8.2f * exp(%8.2f) + %8.2f * %8.2f\n",
42 z_h[i], a, x_h[i], b, y_h[i] );
46 for (
int i=0;i<
n;i++) assert( fabs( z_h[i] - (a*exp(x_h[i]) + b*y_h[i]) ) < 0.00001 );