/** \file  mappedMemory.cu
 *  \brief This sample demonstrates the use of mapped variables.
 *
 *    #### Notes
 *    * Mapped variables are accessible from both host and device.
 *    * Remember to call cudaDeviceSynchronize() before reading on host
 *      if variable was modified on device.
 *      
 *    #### Examples 
 *    > float *x = mapped_malloc<float>(n);
 */

#include "../include/mycuda.h"

using namespace mycuda;

int main() {

    const int n         = 8192;
    const int gridsize  = 32;
    const int blocksize = 32;

    cudaSetDeviceFlags( cudaDeviceMapHost ); 

    float *x = mapped_malloc<float>(n);
    float *y = mapped_malloc<float>(n);
    float *z = mapped_malloc<float>(n);

    for (int i=0; i<n; i++) {
        x[i] = 3.0f;
        y[i] = 2*i + 7.0f;
        z[i] = 0.0f;
    }    

    float a=3.0f, b=5.0f;
    mycuda::aX_plus_bY <<< gridsize, blocksize >>> ( z, a, x, b, y, n );


    // Show a few elements
    device_synchronize();
    for (int i=0; i<50; i++) printf( "%8.2f = %8.2f * %8.2f + %8.2f * %8.2f\n", 
                                     z[i], a, x[i], b, y[i] ); 

    
    // Check results
    for (int i=0;i<n;i++) assert( fabs( z[i] - (a*x[i] + b*y[i]) ) < 0.00001 );

    // Free memory
    mapped_free( x );
    mapped_free( y );
    mapped_free( z );
}   




 
