GENIE
DeviceCodecs.cu
Go to the documentation of this file.
1 #include <genie/utility/scan.h>
2 
3 // This includes the implementation of g_decodeArrayParallel and decodeArrayParallel wrapper. Every implementation file
4 // that includes templates definictions of these functions needs to include their implementation as well
5 #include "DeviceDeltaHelper.h"
6 
7 #include "DeviceCodecs.h"
8 
9 #include "DeviceCodecTemplatesImpl.hpp"
10 
11 using namespace genie::compression;
12 
13 // Explicit template instances for CPU decoding wrapper function of simple codecs
14 // NOTE: This is intentionally separated into mutliple codec implementation files in order to facilitiate separate
15 // compilation units, as opposed to defining all these templates in one place.
16 template void
17 genie::compression::decodeArrayParallel<DeviceCopyCodec>(int, int, uint32_t*, size_t, uint32_t*, size_t, size_t*);
18 template void
19 genie::compression::decodeArrayParallel<DeviceCopyMultiblockCodec>(int, int, uint32_t*, size_t, uint32_t*, size_t, size_t*);
20 template void
21 genie::compression::decodeArrayParallel<DeviceDeltaCodec>(int, int, uint32_t*, size_t, uint32_t*, size_t, size_t*);
22 
23 
24 __device__ uint32_t*
25 genie::compression::DeviceCopyMultiblockCodec::decodeArraySequential(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue)
26 {
27  if (length > nvalue){
28  // We do not have enough capacity in the decompressed array!
29  nvalue = length;
30  return d_in;
31  }
32  for (int i = 0; i < (int)length; i++)
33  d_out[i] = d_in[i];
34  nvalue = length;
35  return d_in + length;
36 }
37 
38 __device__ uint32_t*
39 genie::compression::DeviceCopyMultiblockCodec::decodeArrayParallel(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue)
40 {
41  assert(length <= gridDim.x * blockDim.x); // 1 thread copies one value
42  assert(length <= nvalue); // not enough capacity in the decompressed array!
43 
44  int idx = blockIdx.x * blockDim.x + threadIdx.x;
45  if (idx < length)
46  d_out[idx] = d_in[idx];
47  __syncthreads();
48 
49  nvalue = length;
50  return d_in + length;
51 }
52 
53 __device__ uint32_t*
54 genie::compression::DeviceCopyCodec::decodeArraySequential(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue)
55 {
56  if (length > nvalue){
57  // We do not have enough capacity in the decompressed array!
58  nvalue = length;
59  return d_in;
60  }
61  for (int i = 0; i < length; i++)
62  d_out[i] = d_in[i];
63  nvalue = length;
64  return d_in + length;
65 }
66 
67 
68 __device__ uint32_t*
69 genie::compression::DeviceCopyCodec::decodeArrayParallel(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue)
70 {
71  assert(length <= decodeArrayParallel_lengthPerBlock());
72  assert(length <= nvalue); // not enough capacity in the decompressed array!
73 
74  int idx = threadIdx.x;
75  int fullThreadBlockLimit = length - decodeArrayParallel_threadsPerBlock();
76  int i = 0;
77  for (; i <= fullThreadBlockLimit; i += decodeArrayParallel_threadsPerBlock())
78  {
79  d_out[idx + i] = d_in[idx + i];
80  }
81  if (idx + i < length)
82  d_out[idx + i] = d_in[idx + i];
83  __syncthreads();
84 
85  nvalue = length;
86  return d_in + length;
87 }
88 
89 void
90 genie::compression::DeviceDeltaCodec::encodeArray(uint32_t *in, const size_t length, uint32_t *out, size_t &nvalue)
91 {
92  std::memcpy(out, in, sizeof(uint32_t) * length);
93  DeviceDeltaHelper<uint32_t>::delta(0, out, length);
94  nvalue = length;
95 }
96 
97 const uint32_t*
98 genie::compression::DeviceDeltaCodec::decodeArray(const uint32_t *in, const size_t length, uint32_t *out, size_t &nvalue)
99 {
100  std::memcpy(out, in, sizeof(uint32_t) * length);
102  nvalue = length;
103  return in + length;
104 }
105 
106 __device__ const uint32_t*
107 genie::compression::DeviceDeltaCodec::decodeArraySequential(const uint32_t *d_in, const size_t length, uint32_t *d_out, size_t &nvalue)
108 {
109  if (length > nvalue){
110  // We do not have enough capacity in the decompressed array!
111  nvalue = length;
112  return d_in;
113  }
114  for (int i = 0; i < length; i++)
115  d_out[i] = d_in[i];
117  nvalue = length;
118  return d_in + length;
119 }
120 
121 __device__ uint32_t*
122 genie::compression::DeviceDeltaCodec::decodeArrayParallel(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue)
123 {
124  assert(length <= nvalue); // not enough capacity in the decompressed array!
125  assert(length > 0 && length <= decodeArrayParallel_lengthPerBlock());
126  uint pow2arrayLength = genie::utility::d_pow2ceil_32(length);
127  uint arrayLength = (length + 3) / 4;
128 
129  // Check supported size range
130  // Check parallel model compatibility
131  assert(blockDim.x == GPUGENIE_SCAN_THREADBLOCK_SIZE);
132 
133  __syncthreads();
134  genie::utility::d_scanInclusivePerBlockShared((uint4 *)d_out, (uint4 *)d_in, arrayLength, pow2arrayLength);
135  __syncthreads();
136 
137  nvalue = length;
138  return d_in + length;
139 }
140 
141 
__device__ uint32_t * decodeArrayParallel(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue)
Definition: DeviceCodecs.cu:39
__device__ void d_scanInclusivePerBlockShared(uint4 *d_Dst, uint4 *d_Src, uint activeThreads, uint pow2size)
Definition: scan.cu:158
const uint32_t * decodeArray(const uint32_t *in, const size_t length, uint32_t *out, size_t &nvalue)
Definition: DeviceCodecs.cu:98
#define GPUGENIE_SCAN_THREADBLOCK_SIZE
Definition: scan.h:10
__device__ uint32_t * decodeArraySequential(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue)
Definition: DeviceCodecs.cu:25
__device__ uint32_t * decodeArrayParallel(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue)
static void delta(const T initoffset, T *data, const size_t size)
__device__ uint32_t * decodeArrayParallel(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue)
Definition: DeviceCodecs.cu:69
__device__ const uint32_t * decodeArraySequential(const uint32_t *d_in, const size_t length, uint32_t *d_out, size_t &nvalue)
static __device__ void inverseDeltaOnGPU(const T initoffset, T *d_data, const size_t size)
static void inverseDelta(const T initoffset, T *data, const size_t size)
__device__ uint d_pow2ceil_32(uint x)
Definition: scan.cu:279
void encodeArray(uint32_t *in, const size_t length, uint32_t *out, size_t &nvalue)
Definition: DeviceCodecs.cu:90
__device__ uint32_t * decodeArraySequential(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue)
Definition: DeviceCodecs.cu:54