GPU Workshop Sample Code
Main Page
Namespaces
Data Structures
Files
File List
Globals
All
Data Structures
Namespaces
Files
Functions
Variables
Macros
Pages
include
mycuda_scan.h
Go to the documentation of this file.
1
9
namespace
mycuda_scan
10
{
11
12
14
21
template
<
typename
T>
22
__device__
void
scan_dev
( T * r,
23
T x,
24
int
i,
25
int
n
)
26
{
27
r[i] = 0.0f;
28
i +=
n
;
29
r[i] = x;
30
31
for
( uint offset=1; offset<
n
; offset<<=1){
32
__syncthreads();
33
T t = r[i] + r[i - offset];
34
__syncthreads();
35
r[i] = t;
36
}
37
__syncthreads();
38
}
39
40
41
42
43
45
61
template
<
typename
T>
62
__global__
63
void
scan_blocks
( T* w,
64
T* x,
65
T* blocksum,
66
int
inclusive )
67
{
68
int
n
= blockDim.x;
69
int
j = blockIdx.x;
70
int
tid = threadIdx.x;
71
int
i = j*n + tid;
72
73
extern
__shared__ T r[];
74
75
scan_dev
( r, x[i], tid, n );
76
if
( inclusive ) {
77
w[i] = r[tid+
n
];
78
}
79
else
{
80
w[i] = ( tid==0 ? 0.0f : r[tid+n-1] );
81
}
82
if
(tid==0) blocksum[j] = r[2*n-1];
83
}
84
}
// end namespace mycuda_scan
Generated on Sun Jul 14 2013 17:09:40 for GPU Workshop Sample Code by
1.8.4