GENIE
DeviceSerialCodec.h
Go to the documentation of this file.
1 #ifndef DEVICE_SERIAL_CODEC_H_
2 #define DEVICE_SERIAL_CODEC_H_
3 
4 #include <algorithm>
5 #include <string>
6 
7 #include "DeviceCompositeCodec.h"
8 
9 
10 // threadblock size is 256, same for all codecs (can be up to 1024 for compute capability >= 2.0)
11 #define GPUGENIE_CODEC_SERIAL_THREADBLOCK_SIZE (256)
12 
13 // number of integers decoded by a single thread
14 #define GPUGENIE_CODEC_SERIAL_THREAD_LOAD (4)
15 
16 // maximum uncompressed length -- read from the first uint32_t of compressed word
17 #define GPUGENIE_CODEC_SERIAL_MAX_UNCOMPR_LENGTH (GPUGENIE_CODEC_VARINT_THREAD_LOAD * \
18  GPUGENIE_CODEC_VARINT_THREADBLOCK_SIZE)
19 
20 
21 
22 namespace genie
23 {
24 namespace compression
25 {
26 
27 template <class Codec1, class Codec2>
29 public:
30 
31  __device__ __host__
33  // Check both codecs have the same parallel model for decompression on GPU
34  assert(codec1.decodeArrayParallel_lengthPerBlock() == codec2.decodeArrayParallel_lengthPerBlock());
35  assert(codec1.decodeArrayParallel_threadsPerBlock() == codec2.decodeArrayParallel_threadsPerBlock());
36  assert(codec1.decodeArrayParallel_threadLoad() == codec2.decodeArrayParallel_threadLoad());
37  assert(codec1.decodeArrayParallel_maxBlocks() == codec2.decodeArrayParallel_maxBlocks());
38  }
39 
40  Codec1 codec1;
41  Codec2 codec2;
42 
43  void
44  encodeArray(uint32_t *in, const size_t length, uint32_t *out, size_t &nvalue);
45 
46  const uint32_t*
47  decodeArray(const uint32_t *in, const size_t length, uint32_t *out, size_t &nvalue);
48 
49  __device__ uint32_t*
50  decodeArraySequential(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue);
51 
52  __device__ uint32_t*
53  decodeArrayParallel(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue);
54 
55  std::string
56  name() const {
57  std::ostringstream convert;
58  convert << "Serial(" << codec1.name() << "-" << codec2.name() << ")";
59  return convert.str();
60  }
61 
62  __device__ __host__ int
64  return max(codec1.decodeArrayParallel_minEffectiveLength(),
65  codec2.decodeArrayParallel_minEffectiveLength());
66  }
67 
68  __device__ __host__ int
70  return codec1.decodeArrayParallel_maxBlocks();
71  }
72 
73  __device__ __host__ int
75  return codec1.decodeArrayParallel_lengthPerBlock();
76  }
77 
78  __device__ __host__ int
80  return codec1.decodeArrayParallel_threadsPerBlock();
81  }
82 
83  __device__ __host__ int
85  return codec1.decodeArrayParallel_threadLoad();
86  }
87 };
88 
89 } // namespace compression
90 } // namespace genie
91 
92 #endif
__device__ __host__ int decodeArrayParallel_threadLoad()
void encodeArray(uint32_t *in, const size_t length, uint32_t *out, size_t &nvalue)
This is the top-level namespace of the project.
__device__ __host__ int decodeArrayParallel_maxBlocks()
__device__ uint32_t * decodeArrayParallel(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue)
__device__ uint32_t * decodeArraySequential(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue)
__device__ __host__ int decodeArrayParallel_lengthPerBlock()
const uint32_t * decodeArray(const uint32_t *in, const size_t length, uint32_t *out, size_t &nvalue)
__device__ __host__ int decodeArrayParallel_threadsPerBlock()
__device__ __host__ int decodeArrayParallel_minEffectiveLength()