9 #include "DeviceCodecTemplatesImpl.hpp" 17 genie::compression::decodeArrayParallel<DeviceCopyCodec>(int, int, uint32_t*, size_t, uint32_t*, size_t,
size_t*);
19 genie::compression::decodeArrayParallel<DeviceCopyMultiblockCodec>(int, int, uint32_t*, size_t, uint32_t*, size_t,
size_t*);
21 genie::compression::decodeArrayParallel<DeviceDeltaCodec>(int, int, uint32_t*, size_t, uint32_t*, size_t,
size_t*);
32 for (
int i = 0; i < (int)length; i++)
41 assert(length <= gridDim.x * blockDim.x);
42 assert(length <= nvalue);
44 int idx = blockIdx.x * blockDim.x + threadIdx.x;
46 d_out[idx] = d_in[idx];
61 for (
int i = 0; i < length; i++)
71 assert(length <= decodeArrayParallel_lengthPerBlock());
72 assert(length <= nvalue);
74 int idx = threadIdx.x;
75 int fullThreadBlockLimit = length - decodeArrayParallel_threadsPerBlock();
77 for (; i <= fullThreadBlockLimit; i += decodeArrayParallel_threadsPerBlock())
79 d_out[idx + i] = d_in[idx + i];
82 d_out[idx + i] = d_in[idx + i];
92 std::memcpy(out, in,
sizeof(uint32_t) * length);
100 std::memcpy(out, in,
sizeof(uint32_t) * length);
106 __device__
const uint32_t*
109 if (length > nvalue){
114 for (
int i = 0; i < length; i++)
118 return d_in + length;
124 assert(length <= nvalue);
125 assert(length > 0 && length <= decodeArrayParallel_lengthPerBlock());
127 uint arrayLength = (length + 3) / 4;
138 return d_in + length;
__device__ uint32_t * decodeArrayParallel(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue)
__device__ void d_scanInclusivePerBlockShared(uint4 *d_Dst, uint4 *d_Src, uint activeThreads, uint pow2size)
const uint32_t * decodeArray(const uint32_t *in, const size_t length, uint32_t *out, size_t &nvalue)
#define GPUGENIE_SCAN_THREADBLOCK_SIZE
__device__ uint32_t * decodeArraySequential(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue)
__device__ uint32_t * decodeArrayParallel(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue)
static void delta(const T initoffset, T *data, const size_t size)
__device__ uint32_t * decodeArrayParallel(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue)
__device__ const uint32_t * decodeArraySequential(const uint32_t *d_in, const size_t length, uint32_t *d_out, size_t &nvalue)
static __device__ void inverseDeltaOnGPU(const T initoffset, T *d_data, const size_t size)
static void inverseDelta(const T initoffset, T *data, const size_t size)
__device__ uint d_pow2ceil_32(uint x)
void encodeArray(uint32_t *in, const size_t length, uint32_t *out, size_t &nvalue)
__device__ uint32_t * decodeArraySequential(uint32_t *d_in, size_t length, uint32_t *d_out, size_t &nvalue)