GPU Workshop Sample Code
 All Data Structures Namespaces Files Functions Variables Macros Pages
Data Structures | Functions
mycuda Namespace Reference

Core macros and utilities. More...

Data Structures

class  exception
 Exception class for CUDA errors. More...
 

Functions

void check_error (const char *file, const int line)
 Terminate if cudaGetLastError() returns error. More...
 
void async_check_error (const char *file, const int line)
 Terminate if cudaGetLastError() returns error. More...
 
void safe_call (cudaError_t err, const char *file, const int line)
 Wrapper to check error code for any cuda call that returns one. Terminates if error is returned. More...
 
void handler (std::exception &e, const char *file, const int line)
 Simple exception handler. More...
 
template<typename T >
T * device_malloc (int n) throw (exception)
 Allocate memory on device. More...
 
template<typename T >
T * mapped_malloc (int n) throw (exception)
 Allocate mapped memory on host. More...
 
template<typename T >
T * host_malloc (int n) throw ( exception )
 Allocate host memory. More...
 
template<typename T >
void device_free (T *ptr) throw ( exception )
 Free device memory. More...
 
template<typename T >
void mapped_free (T *ptr) throw ( exception )
 Free mapped memory. More...
 
template<typename T >
void host_free (T *ptr)
 Free host memory (included just for consistency). More...
 
template<typename T >
void copy_device_to_host (T *dest, T *src, int n) throw ( exception )
 Copy from device to host. More...
 
template<typename T >
void copy_host_to_device (T *dest, T *src, int n) throw ( exception )
 Copy from host to device. More...
 
template<typename T >
void copy_device_to_device (T *dest, T *src, int n) throw ( exception )
 Copy from device to device. More...
 
template<typename T >
void copy_host_to_host (T *dest, T *src, int n) throw ( exception )
 Copy from host to host. More...
 
void device_synchronize () throw ( exception )
 Wrapper for cudaDeviceSynchronize() More...
 
void get_memory_info () throw ( exception )
 Wrapper for cudaMemGetInfo() More...
 
template<typename T1 , typename T2 >
__global__ void fill (T1 *z, T2 a, int n)
 Fill device array z with constant a. More...
 
template<typename T1 , typename T2 >
__global__ void seq (T1 *z, T2 first, int n)
 Fill device array z with sequence. More...
 
template<typename T1 , typename T2 >
__global__ void seq (T1 *z, T2 first, T2 inc, int n)
 Fill device array z with sequence. More...
 
template<typename T1 , typename T2 >
__global__ void rep (T1 *z, int n, T2 *x, int nx, int ncopies)
 Fill device array z with values from x, repeated ncopies times. More...
 
template<typename T1 , typename T2 >
__global__ void tile (T1 *z, int n, T2 *x, int nx, int ncopies)
 Fill device array z with values from x, tiled ncopies times. More...
 
__global__ void aX_plus_bY (float *z, float a, float *x, float b, float *y, int n)
 Elementwise operation, z = a*x + b*y. More...
 
__global__ void max_X_Y (float *z, float *x, float *y, int n)
 Elementwise operation, z = max( x, y ) More...
 
__global__ void min_X_Y (float *z, float *x, float *y, int n)
 Elementwise operation, z = min( x, y) More...
 
__global__ void log_X (float *z, float *x, int n)
 Elementwise operation, z = log( x ) More...
 
__global__ void exp_X (float *z, float *x, int n)
 Elementwise operation, z = exp( x ) More...
 

Detailed Description

Core macros and utilities.

Function Documentation

void mycuda::async_check_error ( const char *  file,
const int  line 
)

Terminate if cudaGetLastError() returns error.

Notes

Definition at line 66 of file mycuda.h.

__global__ void mycuda::aX_plus_bY ( float *  z,
float  a,
float *  x,
float  b,
float *  y,
int  n 
)

Elementwise operation, z = a*x + b*y.

Definition at line 411 of file mycuda.h.

void mycuda::check_error ( const char *  file,
const int  line 
)

Terminate if cudaGetLastError() returns error.

Notes

Definition at line 47 of file mycuda.h.

template<typename T >
void mycuda::copy_device_to_device ( T *  dest,
T *  src,
int  n 
)
throw (exception
)

Copy from device to device.

Example

copy_device_to_device( dest, src, n );

Definition at line 276 of file mycuda.h.

template<typename T >
void mycuda::copy_device_to_host ( T *  dest,
T *  src,
int  n 
)
throw (exception
)

Copy from device to host.

Example

copy_device_to_host( dest, src, n );

Definition at line 245 of file mycuda.h.

template<typename T >
void mycuda::copy_host_to_device ( T *  dest,
T *  src,
int  n 
)
throw (exception
)

Copy from host to device.

Example

copy_host_to_device( dest, src, n );

Definition at line 261 of file mycuda.h.

template<typename T >
void mycuda::copy_host_to_host ( T *  dest,
T *  src,
int  n 
)
throw (exception
)

Copy from host to host.

Notes

  • Use for mapped memory copies.

Example

copy_host_to_host( dest, src, n );

Definition at line 294 of file mycuda.h.

template<typename T >
void mycuda::device_free ( T *  ptr)
throw (exception
)

Free device memory.

Definition at line 209 of file mycuda.h.

template<typename T >
T* mycuda::device_malloc ( int  n)
throw (exception
)

Allocate memory on device.

Example

float *x = device_malloc<float>( 1024 );

Definition at line 155 of file mycuda.h.

void mycuda::device_synchronize ( )
throw (exception
)

Wrapper for cudaDeviceSynchronize()

Definition at line 304 of file mycuda.h.

__global__ void mycuda::exp_X ( float *  z,
float *  x,
int  n 
)

Elementwise operation, z = exp( x )

Definition at line 471 of file mycuda.h.

template<typename T1 , typename T2 >
__global__ void mycuda::fill ( T1 *  z,
T2  a,
int  n 
)

Fill device array z with constant a.

Definition at line 328 of file mycuda.h.

void mycuda::get_memory_info ( )
throw (exception
)

Wrapper for cudaMemGetInfo()

Definition at line 314 of file mycuda.h.

void mycuda::handler ( std::exception &  e,
const char *  file,
const int  line 
)

Simple exception handler.

Example

void bar() { try { foo(); } catch( std::exception& e ) { mycuda::handler( e ); } }

Definition at line 139 of file mycuda.h.

template<typename T >
void mycuda::host_free ( T *  ptr)

Free host memory (included just for consistency).

Definition at line 231 of file mycuda.h.

template<typename T >
T* mycuda::host_malloc ( int  n)
throw (exception
)

Allocate host memory.

Notes

  • Included for consistency. Better:

    float * x = new float[1024]; delete x;

Definition at line 197 of file mycuda.h.

__global__ void mycuda::log_X ( float *  z,
float *  x,
int  n 
)

Elementwise operation, z = log( x )

Definition at line 458 of file mycuda.h.

template<typename T >
void mycuda::mapped_free ( T *  ptr)
throw (exception
)

Free mapped memory.

Definition at line 220 of file mycuda.h.

template<typename T >
T* mycuda::mapped_malloc ( int  n)
throw (exception
)

Allocate mapped memory on host.

Example:

float *x = mapped_malloc<float>( 1024 );

Notes

  • call cudaDeviceSynchronize() before accessing data written on device from host.

Definition at line 177 of file mycuda.h.

__global__ void mycuda::max_X_Y ( float *  z,
float *  x,
float *  y,
int  n 
)

Elementwise operation, z = max( x, y )

Definition at line 424 of file mycuda.h.

__global__ void mycuda::min_X_Y ( float *  z,
float *  x,
float *  y,
int  n 
)

Elementwise operation, z = min( x, y)

Definition at line 441 of file mycuda.h.

template<typename T1 , typename T2 >
__global__ void mycuda::rep ( T1 *  z,
int  n,
T2 *  x,
int  nx,
int  ncopies 
)

Fill device array z with values from x, repeated ncopies times.

E.g, if x={1,2,3} and ncopies=2, then z={1,1,2,2,3,3}

Definition at line 381 of file mycuda.h.

void mycuda::safe_call ( cudaError_t  err,
const char *  file,
const int  line 
)

Wrapper to check error code for any cuda call that returns one. Terminates if error is returned.

Notes

  • Call using macro, e.g., CUDA_SAFE_CALL( cudaMalloc( &ptr, n*sizeof(float) )).

Definition at line 84 of file mycuda.h.

template<typename T1 , typename T2 >
__global__ void mycuda::seq ( T1 *  z,
T2  first,
int  n 
)

Fill device array z with sequence.

E.g., z = {first, first+1, ..., first+n-1 }

Definition at line 346 of file mycuda.h.

template<typename T1 , typename T2 >
__global__ void mycuda::seq ( T1 *  z,
T2  first,
T2  inc,
int  n 
)

Fill device array z with sequence.

E.g., z = {first, first+inc, ..., first+(n-1)*inc }

Definition at line 364 of file mycuda.h.

template<typename T1 , typename T2 >
__global__ void mycuda::tile ( T1 *  z,
int  n,
T2 *  x,
int  nx,
int  ncopies 
)

Fill device array z with values from x, tiled ncopies times.

E.g, if x={1,2,3} and ncopies=2, then z={1,2,3,1,2,3}

Definition at line 399 of file mycuda.h.