GENIE
scan.h
Go to the documentation of this file.
1 /*
2  * This module contains source code provided by NVIDIA Corporation.
3  */
4 
5 #ifndef SCAN_H
6 #define SCAN_H
7 
8 #include <stdlib.h>
9 
10 #define GPUGENIE_SCAN_THREADBLOCK_SIZE (256)
11 #define GPUGENIE_SCAN_MIN_SHORT_ARRAY_SIZE (4)
12 #define GPUGENIE_SCAN_MAX_SHORT_ARRAY_SIZE (4*GPUGENIE_SCAN_THREADBLOCK_SIZE) // 1024
13 #define GPUGENIE_SCAN_MIN_LARGE_ARRAY_SIZE (8*GPUGENIE_SCAN_THREADBLOCK_SIZE) // 2048
14 #define GPUGENIE_SCAN_MAX_LARGE_ARRAY_SIZE (4*GPUGENIE_SCAN_THREADBLOCK_SIZE*GPUGENIE_SCAN_THREADBLOCK_SIZE) // 262144
15 
16 namespace genie
17 {
18 namespace utility
19 {
20 
21 
22 extern const unsigned int SCAN_THREADBLOCK_SIZE;
23 extern const unsigned int SCAN_MIN_SHORT_ARRAY_SIZE;
24 extern const unsigned int SCAN_MAX_SHORT_ARRAY_SIZE;
25 extern const unsigned int SCAN_MIN_LARGE_ARRAY_SIZE;
26 extern const unsigned int SCAN_MAX_LARGE_ARRAY_SIZE;
27 
28 void initScan(void);
29 void closeScan(void);
30 
31 extern __device__ uint d_pow2ceil_32(uint x);
32 uint h_pow2ceil_32(uint x);
33 
34 extern __global__ void g_scanExclusiveShared(
35  uint4 *d_Dst,
36  uint4 *d_Src,
37  uint activeThreads,
38  uint pow2size);
39 
40 extern __device__ void d_scanExclusiveShared(
41  uint4 *d_Dst,
42  uint4 *d_Src,
43  uint activeThreads,
44  uint pow2size);
45 
46 extern __device__ void d_scanExclusivePerBlockShared(
47  uint4 *d_Dst,
48  uint4 *d_Src,
49  uint activeThreads,
50  uint pow2size);
51 
52 extern __global__ void g_scanInclusiveShared(
53  uint4 *d_Dst,
54  uint4 *d_Src,
55  uint activeThreads,
56  uint pow2size);
57 
58 extern __device__ void d_scanInclusiveShared(
59  uint4 *d_Dst,
60  uint4 *d_Src,
61  uint activeThreads,
62  uint pow2size);
63 
64 extern __device__ void d_scanInclusivePerBlockShared(
65  uint4 *d_Dst,
66  uint4 *d_Src,
67  uint activeThreads,
68  uint pow2size);
69 
70 size_t scanExclusiveShort(
71  unsigned int *d_Dst,
72  unsigned int *d_Src,
73  unsigned int arrayLength);
74 
75 size_t scanExclusiveLarge(
76  unsigned int *d_Dst,
77  unsigned int *d_Src,
78  unsigned int arrayLength);
79 
81  unsigned int *dst,
82  unsigned int *src,
83  unsigned int arrayLength);
84 
85 } // namespace utility
86 } // namesapce genie
87 
88 #endif
const unsigned int SCAN_THREADBLOCK_SIZE
Definition: scan.cu:12
__device__ void d_scanExclusivePerBlockShared(uint4 *d_Dst, uint4 *d_Src, uint activeThreads, uint pow2size)
Definition: scan.cu:95
This is the top-level namespace of the project.
__device__ void d_scanInclusivePerBlockShared(uint4 *d_Dst, uint4 *d_Src, uint activeThreads, uint pow2size)
Definition: scan.cu:158
void initScan(void)
Definition: scan.cu:267
const unsigned int SCAN_MIN_LARGE_ARRAY_SIZE
Definition: scan.cu:15
uint h_pow2ceil_32(uint x)
Definition: scan.cu:293
size_t scanExclusiveShort(unsigned int *d_Dst, unsigned int *d_Src, unsigned int arrayLength)
__device__ void d_scanExclusiveShared(uint4 *d_Dst, uint4 *d_Src, uint activeThreads, uint pow2size)
Definition: scan.cu:116
size_t scanExclusiveLarge(unsigned int *d_Dst, unsigned int *d_Src, unsigned int arrayLength)
__global__ void g_scanExclusiveShared(uint4 *d_Dst, uint4 *d_Src, uint activeThreads, uint pow2size)
Definition: scan.cu:74
void closeScan(void)
Definition: scan.cu:272
__device__ uint d_pow2ceil_32(uint x)
Definition: scan.cu:279
__device__ void d_scanInclusiveShared(uint4 *d_Dst, uint4 *d_Src, uint activeThreads, uint pow2size)
Definition: scan.cu:179
__global__ void g_scanInclusiveShared(uint4 *d_Dst, uint4 *d_Src, uint activeThreads, uint pow2size)
Definition: scan.cu:137
const unsigned int SCAN_MAX_SHORT_ARRAY_SIZE
Definition: scan.cu:14
void scanExclusiveHost(unsigned int *dst, unsigned int *src, unsigned int arrayLength)
const unsigned int SCAN_MIN_SHORT_ARRAY_SIZE
Definition: scan.cu:13
const unsigned int SCAN_MAX_LARGE_ARRAY_SIZE
Definition: scan.cu:16