5 #include "DeviceCodecTemplatesImpl.hpp" 11 genie::compression::decodeArrayParallel<genie::compression::DeviceVarintCodec>(
12 int, int, uint32_t*, size_t, uint32_t*, size_t,
size_t*);
18 uint8_t *bout =
reinterpret_cast<uint8_t *
>(out);
19 const uint8_t *
const initbout =
reinterpret_cast<uint8_t *
>(out);
20 size_t bytenvalue = nvalue *
sizeof(uint32_t);
21 encodeToByteArray(in, length, bout, bytenvalue);
23 while (needPaddingTo32Bits(bout)) {
26 const size_t storageinbytes = bout - initbout;
27 assert((storageinbytes % 4) == 0);
28 nvalue = storageinbytes / 4;
33 genie::compression::DeviceVarintCodec::encodeToByteArray(uint32_t *in,
const size_t length, uint8_t *bout,
size_t &nvalue) {
34 const uint8_t *
const initbout = bout;
35 for (
size_t k = 0; k < length; ++k) {
36 const uint32_t val = in[k];
38 if (val < (1U << 7)) {
39 *bout =
static_cast<uint8_t
>(val | (1U << 7));
41 }
else if (val < (1U << 14)) {
42 *bout = extract7bits<0>(val);
44 *bout = extract7bitsmaskless<1>(val) | (1U << 7);
46 }
else if (val < (1U << 21)) {
47 *bout = extract7bits<0>(val);
49 *bout = extract7bits<1>(val);
51 *bout = extract7bitsmaskless<2>(val) | (1U << 7);
53 }
else if (val < (1U << 28)) {
54 *bout = extract7bits<0>(val);
56 *bout = extract7bits<1>(val);
58 *bout = extract7bits<2>(val);
60 *bout = extract7bitsmaskless<3>(val) | (1U << 7);
63 *bout = extract7bits<0>(val);
65 *bout = extract7bits<1>(val);
67 *bout = extract7bits<2>(val);
69 *bout = extract7bits<3>(val);
71 *bout = extract7bitsmaskless<4>(val) | (1U << 7);
75 nvalue = bout - initbout;
81 decodeFromByteArray((
const uint8_t *)in, length *
sizeof(uint32_t), out, nvalue);
88 genie::compression::DeviceVarintCodec::decodeFromByteArray(
const uint8_t *inbyte,
const size_t length, uint32_t *out,
95 const uint8_t *
const endbyte = inbyte + length;
96 const uint32_t *
const initout(out);
98 while (endbyte > inbyte + 5) {
112 v |= (c & 0x7F) << 7;
120 v |= (c & 0x7F) << 14;
128 v |= (c & 0x7F) << 21;
137 v |= (c & 0x0F) << 28;
140 while (endbyte > inbyte) {
141 unsigned int shift = 0;
142 for (uint32_t v = 0; endbyte > inbyte; shift += 7) {
143 uint8_t c = *inbyte++;
144 v += ((c & 127) << shift);
151 nvalue = out - initout;
158 uint32_t *d_in,
size_t comprLength, uint32_t *d_out,
size_t &nvalue)
165 uint32_t *d_in,
size_t comprLength, uint32_t *d_out,
size_t &capacity)
167 int idx = threadIdx.x;
169 assert(comprLength > 0);
182 if (idxUnpack < comprLength)
183 s_numInts[idxUnpack] = numIntsStartingHere(d_in, idxUnpack, comprLength);
185 s_numInts[idxUnpack] = 0;
190 uint comprLength4 = (comprLength + 3) / 4;
195 int decomprLength = s_numIntsScanned[comprLength-1] + s_numInts[comprLength-1];
196 assert(decomprLength <= capacity);
205 if (idxUnpack >= comprLength)
208 uint8_t* myBytes =
reinterpret_cast<uint8_t*
>(d_in);
209 int myBytesIdx = idxUnpack * 4;
210 uint8_t myCurrByte = myBytes[myBytesIdx++];
211 uint8_t myPrevByte = idxUnpack > 0 ? (d_in[idxUnpack-1] >> 24) : 0xFF;
213 int myNumInts = (int)s_numInts[idxUnpack];
214 int myOutIdx = (int)s_numIntsScanned[idxUnpack];
216 assert(myNumInts <= 4);
217 assert(myOutIdx < decomprLength || myNumInts == 0);
221 while (myNumInts && !(myPrevByte & 128))
223 myPrevByte = myCurrByte;
224 assert(myBytesIdx < comprLength * 4);
225 myCurrByte = myBytes[myBytesIdx++];
228 for (
int j = 0; j < myNumInts; j++)
230 uint32_t decoded = 0;
231 for (
unsigned int shift = 0; ; shift += 7)
233 decoded += (myCurrByte & 127) << shift;
234 if (myBytesIdx == comprLength * 4)
236 d_out[myOutIdx + j] = decoded;
237 assert(j == myNumInts - 1);
240 else if (myCurrByte & 128)
242 d_out[myOutIdx + j] = decoded;
244 assert(myBytesIdx < comprLength * 4);
245 myCurrByte = myBytes[myBytesIdx++];
248 assert(myBytesIdx < comprLength * 4);
249 myCurrByte = myBytes[myBytesIdx++];
254 capacity = decomprLength;
255 return d_in + comprLength;
260 genie::compression::DeviceVarintCodec::numIntsStartingHere(uint32_t *d_in,
int idxUnpack,
int comprLength)
264 uint8_t* nextBytePtr =
reinterpret_cast<uint8_t*
>(d_in + idxUnpack);
265 uint8_t prevByte = idxUnpack > 0 ? (d_in[idxUnpack-1] >> 24) : 0xFF;
266 int numIntsStartingHere = 0;
268 for (
int i = 0; i < 4; i++)
271 numIntsStartingHere++;
273 prevByte = *nextBytePtr;
277 if (idxUnpack == comprLength - 1 && !(prevByte & 128))
278 numIntsStartingHere--;
279 return numIntsStartingHere;
__device__ void d_scanExclusivePerBlockShared(uint4 *d_Dst, uint4 *d_Src, uint activeThreads, uint pow2size)
const uint32_t * decodeArray(const uint32_t *in, const size_t, uint32_t *out, size_t &nvalue)
__device__ uint32_t * decodeArrayParallel(uint32_t *d_in, size_t, uint32_t *d_out, size_t &capacity)
void encodeArray(uint32_t *in, const size_t length, uint32_t *out, size_t &nvalue)
__device__ uint d_pow2ceil_32(uint x)
#define GPUGENIE_CODEC_VARINT_MAX_UNCOMPR_LENGTH
__device__ uint32_t * decodeArraySequential(uint32_t *d_in, const size_t, uint32_t *d_out, size_t &nvalue)
#define GPUGENIE_CODEC_VARINT_THREADBLOCK_SIZE