GENIE
DeviceVarintCodec.h
Go to the documentation of this file.
1 #ifndef DEVICE_VARINT_CODEC_H_
2 #define DEVICE_VARINT_CODEC_H_
3 
4 #include "DeviceCodecs.h"
5 
6 namespace genie
7 {
8 namespace compression
9 {
10 
11 // threadblock size is 256, same for all codecs (can be up to 1024 for compute capability >= 2.0)
12 #define GPUGENIE_CODEC_VARINT_THREADBLOCK_SIZE (256)
13 
14 // number of integers decoded by a single thread
15 #define GPUGENIE_CODEC_VARINT_THREAD_LOAD (4)
16 
17 // maximum uncompressed length -- read from the first uint32_t of compressed word
18 #define GPUGENIE_CODEC_VARINT_MAX_UNCOMPR_LENGTH (GPUGENIE_CODEC_VARINT_THREAD_LOAD * \
19  GPUGENIE_CODEC_VARINT_THREADBLOCK_SIZE)
20 
21 
30 
31 public:
32 
33  void
34  encodeArray(uint32_t *in, const size_t length, uint32_t *out, size_t &nvalue);
35 
36  const uint32_t*
37  decodeArray(const uint32_t *in, const size_t /*length*/, uint32_t *out, size_t &nvalue);
38 
39  __device__ uint32_t*
40  decodeArraySequential(uint32_t *d_in, const size_t /*length*/, uint32_t *d_out, size_t &nvalue);
41 
42  __device__ uint32_t*
43  decodeArrayParallel(uint32_t *d_in, size_t /* comprLength */, uint32_t *d_out, size_t &capacity);
44 
45  std::string
46  name() const { return "Varint"; }
47 
48  __device__ __host__ int decodeArrayParallel_maxBlocks() { return 1; }
49  __device__ __host__ int decodeArrayParallel_minEffectiveLength() { return 1; }
53 
54 private:
55 
56  void
57  encodeToByteArray(uint32_t *in, const size_t length, uint8_t *bout, size_t &nvalue);
58 
59  const uint8_t*
60  decodeFromByteArray(const uint8_t *inbyte, const size_t length, uint32_t *out, size_t &nvalue);
61 
62  template <uint32_t i> uint8_t
63  extract7bits(const uint32_t val) {
64  return static_cast<uint8_t>((val >> (7 * i)) & ((1U << 7) - 1));
65  }
66 
67  template <uint32_t i> uint8_t
68  extract7bitsmaskless(const uint32_t val) {
69  return static_cast<uint8_t>((val >> (7 * i)));
70  }
71 
72  template <class T> inline bool
73  needPaddingTo32Bits(const T *inbyte) {
74  return (reinterpret_cast<uintptr_t>(inbyte) & 3) != 0;
75  }
76 
77  __device__ int
78  numIntsStartingHere(uint32_t *d_in, int idxUnpack, int comprLength);
79 
80 };
81 
82 } // namespace compression
83 } // namespace genie
84 
85 #endif
This is the top-level namespace of the project.
__device__ __host__ int decodeArrayParallel_maxBlocks()
const uint32_t * decodeArray(const uint32_t *in, const size_t, uint32_t *out, size_t &nvalue)
__device__ uint32_t * decodeArrayParallel(uint32_t *d_in, size_t, uint32_t *d_out, size_t &capacity)
__device__ __host__ int decodeArrayParallel_threadsPerBlock()
__device__ __host__ int decodeArrayParallel_lengthPerBlock()
#define GPUGENIE_CODEC_VARINT_THREAD_LOAD
__device__ __host__ int decodeArrayParallel_minEffectiveLength()
void encodeArray(uint32_t *in, const size_t length, uint32_t *out, size_t &nvalue)
#define GPUGENIE_CODEC_VARINT_MAX_UNCOMPR_LENGTH
__device__ uint32_t * decodeArraySequential(uint32_t *d_in, const size_t, uint32_t *d_out, size_t &nvalue)
__device__ __host__ int decodeArrayParallel_threadLoad()
#define GPUGENIE_CODEC_VARINT_THREADBLOCK_SIZE