GENIE
DeviceBitPackingHelpers.h
Go to the documentation of this file.
1 
6 #ifndef DEVICE_BIT_PACKING_HELPERS_H_
7 #define DEVICE_BIT_PACKING_HELPERS_H_
8 
9 #include <cstdint>
10 #include <stdexcept>
11 #include <vector>
12 
13 #include "DeviceDeltaHelper.h"
14 
15 namespace genie
16 {
17 namespace compression
18 {
19 
20 void __device__ __host__ __fastunpack0 (const uint32_t * in, uint32_t * out);
21 void __device__ __host__ __fastunpack1 (const uint32_t * in, uint32_t * out);
22 void __device__ __host__ __fastunpack2 (const uint32_t * in, uint32_t * out);
23 void __device__ __host__ __fastunpack3 (const uint32_t * in, uint32_t * out);
24 void __device__ __host__ __fastunpack4 (const uint32_t * in, uint32_t * out);
25 void __device__ __host__ __fastunpack5 (const uint32_t * in, uint32_t * out);
26 void __device__ __host__ __fastunpack6 (const uint32_t * in, uint32_t * out);
27 void __device__ __host__ __fastunpack7 (const uint32_t * in, uint32_t * out);
28 void __device__ __host__ __fastunpack8 (const uint32_t * in, uint32_t * out);
29 void __device__ __host__ __fastunpack9 (const uint32_t * in, uint32_t * out);
30 void __device__ __host__ __fastunpack10(const uint32_t * in, uint32_t * out);
31 void __device__ __host__ __fastunpack11(const uint32_t * in, uint32_t * out);
32 void __device__ __host__ __fastunpack12(const uint32_t * in, uint32_t * out);
33 void __device__ __host__ __fastunpack13(const uint32_t * in, uint32_t * out);
34 void __device__ __host__ __fastunpack14(const uint32_t * in, uint32_t * out);
35 void __device__ __host__ __fastunpack15(const uint32_t * in, uint32_t * out);
36 void __device__ __host__ __fastunpack16(const uint32_t * in, uint32_t * out);
37 void __device__ __host__ __fastunpack17(const uint32_t * in, uint32_t * out);
38 void __device__ __host__ __fastunpack18(const uint32_t * in, uint32_t * out);
39 void __device__ __host__ __fastunpack19(const uint32_t * in, uint32_t * out);
40 void __device__ __host__ __fastunpack20(const uint32_t * in, uint32_t * out);
41 void __device__ __host__ __fastunpack21(const uint32_t * in, uint32_t * out);
42 void __device__ __host__ __fastunpack22(const uint32_t * in, uint32_t * out);
43 void __device__ __host__ __fastunpack23(const uint32_t * in, uint32_t * out);
44 void __device__ __host__ __fastunpack24(const uint32_t * in, uint32_t * out);
45 void __device__ __host__ __fastunpack25(const uint32_t * in, uint32_t * out);
46 void __device__ __host__ __fastunpack26(const uint32_t * in, uint32_t * out);
47 void __device__ __host__ __fastunpack27(const uint32_t * in, uint32_t * out);
48 void __device__ __host__ __fastunpack28(const uint32_t * in, uint32_t * out);
49 void __device__ __host__ __fastunpack29(const uint32_t * in, uint32_t * out);
50 void __device__ __host__ __fastunpack30(const uint32_t * in, uint32_t * out);
51 void __device__ __host__ __fastunpack31(const uint32_t * in, uint32_t * out);
52 void __device__ __host__ __fastunpack32(const uint32_t * in, uint32_t * out);
53 
54 void __fastpack0 (const uint32_t * in, uint32_t * out);
55 void __fastpack1 (const uint32_t * in, uint32_t * out);
56 void __fastpack2 (const uint32_t * in, uint32_t * out);
57 void __fastpack3 (const uint32_t * in, uint32_t * out);
58 void __fastpack4 (const uint32_t * in, uint32_t * out);
59 void __fastpack5 (const uint32_t * in, uint32_t * out);
60 void __fastpack6 (const uint32_t * in, uint32_t * out);
61 void __fastpack7 (const uint32_t * in, uint32_t * out);
62 void __fastpack8 (const uint32_t * in, uint32_t * out);
63 void __fastpack9 (const uint32_t * in, uint32_t * out);
64 void __fastpack10(const uint32_t * in, uint32_t * out);
65 void __fastpack11(const uint32_t * in, uint32_t * out);
66 void __fastpack12(const uint32_t * in, uint32_t * out);
67 void __fastpack13(const uint32_t * in, uint32_t * out);
68 void __fastpack14(const uint32_t * in, uint32_t * out);
69 void __fastpack15(const uint32_t * in, uint32_t * out);
70 void __fastpack16(const uint32_t * in, uint32_t * out);
71 void __fastpack17(const uint32_t * in, uint32_t * out);
72 void __fastpack18(const uint32_t * in, uint32_t * out);
73 void __fastpack19(const uint32_t * in, uint32_t * out);
74 void __fastpack20(const uint32_t * in, uint32_t * out);
75 void __fastpack21(const uint32_t * in, uint32_t * out);
76 void __fastpack22(const uint32_t * in, uint32_t * out);
77 void __fastpack23(const uint32_t * in, uint32_t * out);
78 void __fastpack24(const uint32_t * in, uint32_t * out);
79 void __fastpack25(const uint32_t * in, uint32_t * out);
80 void __fastpack26(const uint32_t * in, uint32_t * out);
81 void __fastpack27(const uint32_t * in, uint32_t * out);
82 void __fastpack28(const uint32_t * in, uint32_t * out);
83 void __fastpack29(const uint32_t * in, uint32_t * out);
84 void __fastpack30(const uint32_t * in, uint32_t * out);
85 void __fastpack31(const uint32_t * in, uint32_t * out);
86 void __fastpack32(const uint32_t * in, uint32_t * out);
87 
88 void __fastpackwithoutmask0 (const uint32_t * in, uint32_t * out);
89 void __fastpackwithoutmask1 (const uint32_t * in, uint32_t * out);
90 void __fastpackwithoutmask2 (const uint32_t * in, uint32_t * out);
91 void __fastpackwithoutmask3 (const uint32_t * in, uint32_t * out);
92 void __fastpackwithoutmask4 (const uint32_t * in, uint32_t * out);
93 void __fastpackwithoutmask5 (const uint32_t * in, uint32_t * out);
94 void __fastpackwithoutmask6 (const uint32_t * in, uint32_t * out);
95 void __fastpackwithoutmask7 (const uint32_t * in, uint32_t * out);
96 void __fastpackwithoutmask8 (const uint32_t * in, uint32_t * out);
97 void __fastpackwithoutmask9 (const uint32_t * in, uint32_t * out);
98 void __fastpackwithoutmask10(const uint32_t * in, uint32_t * out);
99 void __fastpackwithoutmask11(const uint32_t * in, uint32_t * out);
100 void __fastpackwithoutmask12(const uint32_t * in, uint32_t * out);
101 void __fastpackwithoutmask13(const uint32_t * in, uint32_t * out);
102 void __fastpackwithoutmask14(const uint32_t * in, uint32_t * out);
103 void __fastpackwithoutmask15(const uint32_t * in, uint32_t * out);
104 void __fastpackwithoutmask16(const uint32_t * in, uint32_t * out);
105 void __fastpackwithoutmask17(const uint32_t * in, uint32_t * out);
106 void __fastpackwithoutmask18(const uint32_t * in, uint32_t * out);
107 void __fastpackwithoutmask19(const uint32_t * in, uint32_t * out);
108 void __fastpackwithoutmask20(const uint32_t * in, uint32_t * out);
109 void __fastpackwithoutmask21(const uint32_t * in, uint32_t * out);
110 void __fastpackwithoutmask22(const uint32_t * in, uint32_t * out);
111 void __fastpackwithoutmask23(const uint32_t * in, uint32_t * out);
112 void __fastpackwithoutmask24(const uint32_t * in, uint32_t * out);
113 void __fastpackwithoutmask25(const uint32_t * in, uint32_t * out);
114 void __fastpackwithoutmask26(const uint32_t * in, uint32_t * out);
115 void __fastpackwithoutmask27(const uint32_t * in, uint32_t * out);
116 void __fastpackwithoutmask28(const uint32_t * in, uint32_t * out);
117 void __fastpackwithoutmask29(const uint32_t * in, uint32_t * out);
118 void __fastpackwithoutmask30(const uint32_t * in, uint32_t * out);
119 void __fastpackwithoutmask31(const uint32_t * in, uint32_t * out);
120 void __fastpackwithoutmask32(const uint32_t * in, uint32_t * out);
121 
122 void __device__ __host__ __integratedfastunpack0(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
123 void __device__ __host__ __integratedfastunpack1(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
124 void __device__ __host__ __integratedfastunpack2(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
125 void __device__ __host__ __integratedfastunpack3(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
126 void __device__ __host__ __integratedfastunpack4(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
127 void __device__ __host__ __integratedfastunpack5(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
128 void __device__ __host__ __integratedfastunpack6(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
129 void __device__ __host__ __integratedfastunpack7(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
130 void __device__ __host__ __integratedfastunpack8(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
131 void __device__ __host__ __integratedfastunpack9(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
132 void __device__ __host__ __integratedfastunpack10(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
133 void __device__ __host__ __integratedfastunpack11(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
134 void __device__ __host__ __integratedfastunpack12(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
135 void __device__ __host__ __integratedfastunpack13(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
136 void __device__ __host__ __integratedfastunpack14(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
137 void __device__ __host__ __integratedfastunpack15(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
138 void __device__ __host__ __integratedfastunpack16(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
139 void __device__ __host__ __integratedfastunpack17(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
140 void __device__ __host__ __integratedfastunpack18(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
141 void __device__ __host__ __integratedfastunpack19(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
142 void __device__ __host__ __integratedfastunpack20(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
143 void __device__ __host__ __integratedfastunpack21(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
144 void __device__ __host__ __integratedfastunpack22(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
145 void __device__ __host__ __integratedfastunpack23(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
146 void __device__ __host__ __integratedfastunpack24(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
147 void __device__ __host__ __integratedfastunpack25(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
148 void __device__ __host__ __integratedfastunpack26(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
149 void __device__ __host__ __integratedfastunpack27(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
150 void __device__ __host__ __integratedfastunpack28(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
151 void __device__ __host__ __integratedfastunpack29(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
152 void __device__ __host__ __integratedfastunpack30(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
153 void __device__ __host__ __integratedfastunpack31(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
154 void __device__ __host__ __integratedfastunpack32(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
155 
156 void __integratedfastpack0 (const uint32_t initoffset, const uint32_t * in, uint32_t * out);
157 void __integratedfastpack1 (const uint32_t initoffset, const uint32_t * in, uint32_t * out);
158 void __integratedfastpack2 (const uint32_t initoffset, const uint32_t * in, uint32_t * out);
159 void __integratedfastpack3 (const uint32_t initoffset, const uint32_t * in, uint32_t * out);
160 void __integratedfastpack4 (const uint32_t initoffset, const uint32_t * in, uint32_t * out);
161 void __integratedfastpack5 (const uint32_t initoffset, const uint32_t * in, uint32_t * out);
162 void __integratedfastpack6 (const uint32_t initoffset, const uint32_t * in, uint32_t * out);
163 void __integratedfastpack7 (const uint32_t initoffset, const uint32_t * in, uint32_t * out);
164 void __integratedfastpack8 (const uint32_t initoffset, const uint32_t * in, uint32_t * out);
165 void __integratedfastpack9 (const uint32_t initoffset, const uint32_t * in, uint32_t * out);
166 void __integratedfastpack10(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
167 void __integratedfastpack11(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
168 void __integratedfastpack12(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
169 void __integratedfastpack13(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
170 void __integratedfastpack14(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
171 void __integratedfastpack15(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
172 void __integratedfastpack16(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
173 void __integratedfastpack17(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
174 void __integratedfastpack18(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
175 void __integratedfastpack19(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
176 void __integratedfastpack20(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
177 void __integratedfastpack21(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
178 void __integratedfastpack22(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
179 void __integratedfastpack23(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
180 void __integratedfastpack24(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
181 void __integratedfastpack25(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
182 void __integratedfastpack26(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
183 void __integratedfastpack27(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
184 void __integratedfastpack28(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
185 void __integratedfastpack29(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
186 void __integratedfastpack30(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
187 void __integratedfastpack31(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
188 void __integratedfastpack32(const uint32_t initoffset, const uint32_t * in, uint32_t * out);
189 
190 
192  const static unsigned BlockSize = 32;
193 
194  __device__ __host__ static void inline
195  fastunpack(const uint32_t * in, uint32_t * out, const uint32_t bit)
196  {
197  // Could have used function pointers instead of switch.
198  // Switch calls do offer the compiler more opportunities for optimization in
199  // theory. In this case, it makes no difference with a good compiler.
200  switch (bit) {
201  case 0:
202  __fastunpack0(in, out);
203  break;
204  case 1:
205  __fastunpack1(in, out);
206  break;
207  case 2:
208  __fastunpack2(in, out);
209  break;
210  case 3:
211  __fastunpack3(in, out);
212  break;
213  case 4:
214  __fastunpack4(in, out);
215  break;
216  case 5:
217  __fastunpack5(in, out);
218  break;
219  case 6:
220  __fastunpack6(in, out);
221  break;
222  case 7:
223  __fastunpack7(in, out);
224  break;
225  case 8:
226  __fastunpack8(in, out);
227  break;
228  case 9:
229  __fastunpack9(in, out);
230  break;
231  case 10:
232  __fastunpack10(in, out);
233  break;
234  case 11:
235  __fastunpack11(in, out);
236  break;
237  case 12:
238  __fastunpack12(in, out);
239  break;
240  case 13:
241  __fastunpack13(in, out);
242  break;
243  case 14:
244  __fastunpack14(in, out);
245  break;
246  case 15:
247  __fastunpack15(in, out);
248  break;
249  case 16:
250  __fastunpack16(in, out);
251  break;
252  case 17:
253  __fastunpack17(in, out);
254  break;
255  case 18:
256  __fastunpack18(in, out);
257  break;
258  case 19:
259  __fastunpack19(in, out);
260  break;
261  case 20:
262  __fastunpack20(in, out);
263  break;
264  case 21:
265  __fastunpack21(in, out);
266  break;
267  case 22:
268  __fastunpack22(in, out);
269  break;
270  case 23:
271  __fastunpack23(in, out);
272  break;
273  case 24:
274  __fastunpack24(in, out);
275  break;
276  case 25:
277  __fastunpack25(in, out);
278  break;
279  case 26:
280  __fastunpack26(in, out);
281  break;
282  case 27:
283  __fastunpack27(in, out);
284  break;
285  case 28:
286  __fastunpack28(in, out);
287  break;
288  case 29:
289  __fastunpack29(in, out);
290  break;
291  case 30:
292  __fastunpack30(in, out);
293  break;
294  case 31:
295  __fastunpack31(in, out);
296  break;
297  case 32:
298  __fastunpack32(in, out);
299  break;
300  default:
301  break;
302  }
303  }
304 
305  static void inline
306  fastpack(const uint32_t * in, uint32_t * out, const uint32_t bit)
307  {
308  // Could have used function pointers instead of switch.
309  // Switch calls do offer the compiler more opportunities for optimization in
310  // theory. In this case, it makes no difference with a good compiler.
311  switch (bit) {
312  case 0:
313  __fastpack0(in, out);
314  break;
315  case 1:
316  __fastpack1(in, out);
317  break;
318  case 2:
319  __fastpack2(in, out);
320  break;
321  case 3:
322  __fastpack3(in, out);
323  break;
324  case 4:
325  __fastpack4(in, out);
326  break;
327  case 5:
328  __fastpack5(in, out);
329  break;
330  case 6:
331  __fastpack6(in, out);
332  break;
333  case 7:
334  __fastpack7(in, out);
335  break;
336  case 8:
337  __fastpack8(in, out);
338  break;
339  case 9:
340  __fastpack9(in, out);
341  break;
342  case 10:
343  __fastpack10(in, out);
344  break;
345  case 11:
346  __fastpack11(in, out);
347  break;
348  case 12:
349  __fastpack12(in, out);
350  break;
351  case 13:
352  __fastpack13(in, out);
353  break;
354  case 14:
355  __fastpack14(in, out);
356  break;
357  case 15:
358  __fastpack15(in, out);
359  break;
360  case 16:
361  __fastpack16(in, out);
362  break;
363  case 17:
364  __fastpack17(in, out);
365  break;
366  case 18:
367  __fastpack18(in, out);
368  break;
369  case 19:
370  __fastpack19(in, out);
371  break;
372  case 20:
373  __fastpack20(in, out);
374  break;
375  case 21:
376  __fastpack21(in, out);
377  break;
378  case 22:
379  __fastpack22(in, out);
380  break;
381  case 23:
382  __fastpack23(in, out);
383  break;
384  case 24:
385  __fastpack24(in, out);
386  break;
387  case 25:
388  __fastpack25(in, out);
389  break;
390  case 26:
391  __fastpack26(in, out);
392  break;
393  case 27:
394  __fastpack27(in, out);
395  break;
396  case 28:
397  __fastpack28(in, out);
398  break;
399  case 29:
400  __fastpack29(in, out);
401  break;
402  case 30:
403  __fastpack30(in, out);
404  break;
405  case 31:
406  __fastpack31(in, out);
407  break;
408  case 32:
409  __fastpack32(in, out);
410  break;
411  default:
412  break;
413  }
414  }
415 
416  /*assumes that integers fit in the prescribed number of bits*/
417  static void inline
418  fastpackwithoutmask(const uint32_t * in, uint32_t * out, const uint32_t bit) {
419  // Could have used function pointers instead of switch.
420  // Switch calls do offer the compiler more opportunities for optimization in
421  // theory. In this case, it makes no difference with a good compiler.
422  switch (bit) {
423  case 0:
424  __fastpackwithoutmask0(in, out);
425  break;
426  case 1:
427  __fastpackwithoutmask1(in, out);
428  break;
429  case 2:
430  __fastpackwithoutmask2(in, out);
431  break;
432  case 3:
433  __fastpackwithoutmask3(in, out);
434  break;
435  case 4:
436  __fastpackwithoutmask4(in, out);
437  break;
438  case 5:
439  __fastpackwithoutmask5(in, out);
440  break;
441  case 6:
442  __fastpackwithoutmask6(in, out);
443  break;
444  case 7:
445  __fastpackwithoutmask7(in, out);
446  break;
447  case 8:
448  __fastpackwithoutmask8(in, out);
449  break;
450  case 9:
451  __fastpackwithoutmask9(in, out);
452  break;
453  case 10:
454  __fastpackwithoutmask10(in, out);
455  break;
456  case 11:
457  __fastpackwithoutmask11(in, out);
458  break;
459  case 12:
460  __fastpackwithoutmask12(in, out);
461  break;
462  case 13:
463  __fastpackwithoutmask13(in, out);
464  break;
465  case 14:
466  __fastpackwithoutmask14(in, out);
467  break;
468  case 15:
469  __fastpackwithoutmask15(in, out);
470  break;
471  case 16:
472  __fastpackwithoutmask16(in, out);
473  break;
474  case 17:
475  __fastpackwithoutmask17(in, out);
476  break;
477  case 18:
478  __fastpackwithoutmask18(in, out);
479  break;
480  case 19:
481  __fastpackwithoutmask19(in, out);
482  break;
483  case 20:
484  __fastpackwithoutmask20(in, out);
485  break;
486  case 21:
487  __fastpackwithoutmask21(in, out);
488  break;
489  case 22:
490  __fastpackwithoutmask22(in, out);
491  break;
492  case 23:
493  __fastpackwithoutmask23(in, out);
494  break;
495  case 24:
496  __fastpackwithoutmask24(in, out);
497  break;
498  case 25:
499  __fastpackwithoutmask25(in, out);
500  break;
501  case 26:
502  __fastpackwithoutmask26(in, out);
503  break;
504  case 27:
505  __fastpackwithoutmask27(in, out);
506  break;
507  case 28:
508  __fastpackwithoutmask28(in, out);
509  break;
510  case 29:
511  __fastpackwithoutmask29(in, out);
512  break;
513  case 30:
514  __fastpackwithoutmask30(in, out);
515  break;
516  case 31:
517  __fastpackwithoutmask31(in, out);
518  break;
519  case 32:
520  __fastpackwithoutmask32(in, out);
521  break;
522  default:
523  break;
524  }
525  }
526 
527  __device__ __host__ static void inline
528  integratedfastunpack(const uint32_t initoffset, const uint32_t * in, uint32_t * out,
529  const uint32_t bit)
530  {
531  // Could have used function pointers instead of switch.
532  // Switch calls do offer the compiler more opportunities for optimization in
533  // theory. In this case, it makes no difference with a good compiler.
534  switch (bit) {
535  case 0:
536  __integratedfastunpack0(initoffset, in, out);
537  break;
538  case 1:
539  __integratedfastunpack1(initoffset, in, out);
540  break;
541  case 2:
542  __integratedfastunpack2(initoffset, in, out);
543  break;
544  case 3:
545  __integratedfastunpack3(initoffset, in, out);
546  break;
547  case 4:
548  __integratedfastunpack4(initoffset, in, out);
549  break;
550  case 5:
551  __integratedfastunpack5(initoffset, in, out);
552  break;
553  case 6:
554  __integratedfastunpack6(initoffset, in, out);
555  break;
556  case 7:
557  __integratedfastunpack7(initoffset, in, out);
558  break;
559  case 8:
560  __integratedfastunpack8(initoffset, in, out);
561  break;
562  case 9:
563  __integratedfastunpack9(initoffset, in, out);
564  break;
565  case 10:
566  __integratedfastunpack10(initoffset, in, out);
567  break;
568  case 11:
569  __integratedfastunpack11(initoffset, in, out);
570  break;
571  case 12:
572  __integratedfastunpack12(initoffset, in, out);
573  break;
574  case 13:
575  __integratedfastunpack13(initoffset, in, out);
576  break;
577  case 14:
578  __integratedfastunpack14(initoffset, in, out);
579  break;
580  case 15:
581  __integratedfastunpack15(initoffset, in, out);
582  break;
583  case 16:
584  __integratedfastunpack16(initoffset, in, out);
585  break;
586  case 17:
587  __integratedfastunpack17(initoffset, in, out);
588  break;
589  case 18:
590  __integratedfastunpack18(initoffset, in, out);
591  break;
592  case 19:
593  __integratedfastunpack19(initoffset, in, out);
594  break;
595  case 20:
596  __integratedfastunpack20(initoffset, in, out);
597  break;
598  case 21:
599  __integratedfastunpack21(initoffset, in, out);
600  break;
601  case 22:
602  __integratedfastunpack22(initoffset, in, out);
603  break;
604  case 23:
605  __integratedfastunpack23(initoffset, in, out);
606  break;
607  case 24:
608  __integratedfastunpack24(initoffset, in, out);
609  break;
610  case 25:
611  __integratedfastunpack25(initoffset, in, out);
612  break;
613  case 26:
614  __integratedfastunpack26(initoffset, in, out);
615  break;
616  case 27:
617  __integratedfastunpack27(initoffset, in, out);
618  break;
619  case 28:
620  __integratedfastunpack28(initoffset, in, out);
621  break;
622  case 29:
623  __integratedfastunpack29(initoffset, in, out);
624  break;
625  case 30:
626  __integratedfastunpack30(initoffset, in, out);
627  break;
628  case 31:
629  __integratedfastunpack31(initoffset, in, out);
630  break;
631  case 32:
632  __integratedfastunpack32(initoffset, in, out);
633  break;
634  default:
635  break;
636  }
637  }
638 
639  /*assumes that integers fit in the prescribed number of bits*/
640  static void inline
641  integratedfastpackwithoutmask(const uint32_t initoffset, const uint32_t * in,
642  uint32_t * out, const uint32_t bit)
643  {
644  // Could have used function pointers instead of switch.
645  // Switch calls do offer the compiler more opportunities for optimization in
646  // theory. In this case, it makes no difference with a good compiler.
647  switch (bit) {
648  case 0:
649  __integratedfastpack0(initoffset, in, out);
650  break;
651  case 1:
652  __integratedfastpack1(initoffset, in, out);
653  break;
654  case 2:
655  __integratedfastpack2(initoffset, in, out);
656  break;
657  case 3:
658  __integratedfastpack3(initoffset, in, out);
659  break;
660  case 4:
661  __integratedfastpack4(initoffset, in, out);
662  break;
663  case 5:
664  __integratedfastpack5(initoffset, in, out);
665  break;
666  case 6:
667  __integratedfastpack6(initoffset, in, out);
668  break;
669  case 7:
670  __integratedfastpack7(initoffset, in, out);
671  break;
672  case 8:
673  __integratedfastpack8(initoffset, in, out);
674  break;
675  case 9:
676  __integratedfastpack9(initoffset, in, out);
677  break;
678  case 10:
679  __integratedfastpack10(initoffset, in, out);
680  break;
681  case 11:
682  __integratedfastpack11(initoffset, in, out);
683  break;
684  case 12:
685  __integratedfastpack12(initoffset, in, out);
686  break;
687  case 13:
688  __integratedfastpack13(initoffset, in, out);
689  break;
690  case 14:
691  __integratedfastpack14(initoffset, in, out);
692  break;
693  case 15:
694  __integratedfastpack15(initoffset, in, out);
695  break;
696  case 16:
697  __integratedfastpack16(initoffset, in, out);
698  break;
699  case 17:
700  __integratedfastpack17(initoffset, in, out);
701  break;
702  case 18:
703  __integratedfastpack18(initoffset, in, out);
704  break;
705  case 19:
706  __integratedfastpack19(initoffset, in, out);
707  break;
708  case 20:
709  __integratedfastpack20(initoffset, in, out);
710  break;
711  case 21:
712  __integratedfastpack21(initoffset, in, out);
713  break;
714  case 22:
715  __integratedfastpack22(initoffset, in, out);
716  break;
717  case 23:
718  __integratedfastpack23(initoffset, in, out);
719  break;
720  case 24:
721  __integratedfastpack24(initoffset, in, out);
722  break;
723  case 25:
724  __integratedfastpack25(initoffset, in, out);
725  break;
726  case 26:
727  __integratedfastpack26(initoffset, in, out);
728  break;
729  case 27:
730  __integratedfastpack27(initoffset, in, out);
731  break;
732  case 28:
733  __integratedfastpack28(initoffset, in, out);
734  break;
735  case 29:
736  __integratedfastpack29(initoffset, in, out);
737  break;
738  case 30:
739  __integratedfastpack30(initoffset, in, out);
740  break;
741  case 31:
742  __integratedfastpack31(initoffset, in, out);
743  break;
744  case 32:
745  __integratedfastpack32(initoffset, in, out);
746  break;
747  default:
748  break;
749  }
750  }
751 
752  template <class T> static
753  void delta(const T initoffset, T *data, const size_t size) {
754  if (size == 0)
755  return; // nothing to do
756  if (size > 1)
757  for (size_t i = size - 1; i > 0; --i) {
758  data[i] -= data[i - 1];
759  }
760  data[0] -= initoffset;
761  }
762 
763  template <size_t size, class T>
764  static void delta(const T initoffset, T *data) {
765  if (size == 0)
766  return; // nothing to do
767  if (size > 1)
768  for (size_t i = size - 1; i > 0; --i) {
769  data[i] -= data[i - 1];
770  }
771  data[0] -= initoffset;
772  }
773 
774  template <class T>
775  static void inverseDelta(const T initoffset, T *data, const size_t size) {
776  if (size == 0)
777  return; // nothing to do
778  data[0] += initoffset;
779  const size_t UnrollQty = 4;
780  const size_t sz0 =
781  (size / UnrollQty) * UnrollQty; // equal to 0, if size < UnrollQty
782  size_t i = 1;
783  if (sz0 >= UnrollQty) {
784  T a = data[0];
785  for (; i < sz0 - UnrollQty; i += UnrollQty) {
786  a = data[i] += a;
787  a = data[i + 1] += a;
788  a = data[i + 2] += a;
789  a = data[i + 3] += a;
790  }
791  }
792  for (; i != size; ++i) {
793  data[i] += data[i - 1];
794  }
795  }
796  template <size_t size, class T>
797  static void inverseDelta(const T initoffset, T *data) {
798  if (size == 0)
799  return; // nothing to do
800  data[0] += initoffset;
801  const size_t UnrollQty = 4;
802  const size_t sz0 =
803  (size / UnrollQty) * UnrollQty; // equal to 0, if size < UnrollQty
804  size_t i = 1;
805  if (sz0 >= UnrollQty) {
806  T a = data[0];
807  for (; i < sz0 - UnrollQty; i += UnrollQty) {
808  a = data[i] += a;
809  a = data[i + 1] += a;
810  a = data[i + 2] += a;
811  a = data[i + 3] += a;
812  }
813  }
814  for (; i != size; ++i) {
815  data[i] += data[i - 1];
816  }
817  }
818 
819  static void inline ipackwithoutmask(const uint32_t *in, const size_t Qty,
820  uint32_t *out, const uint32_t bit) {
821  if (Qty % BlockSize) {
822  throw std::logic_error("Incorrect # of entries.");
823  }
824  uint32_t initoffset = 0;
825 
826  for (size_t k = 0; k < Qty / BlockSize; ++k) {
827  integratedfastpackwithoutmask(initoffset, in + k * BlockSize,
828  out + k * bit, bit);
829  initoffset = *(in + k * BlockSize + BlockSize - 1);
830  }
831  }
832 
833  static void inline pack(uint32_t *in, const size_t Qty, uint32_t *out,
834  const uint32_t bit) {
835  if (Qty % BlockSize) {
836  throw std::logic_error("Incorrect # of entries.");
837  }
838  uint32_t initoffset = 0;
839 
840  for (size_t k = 0; k < Qty / BlockSize; ++k) {
841  const uint32_t nextoffset = *(in + k * BlockSize + BlockSize - 1);
842  if (bit < 32)
843  delta<BlockSize,uint32_t>(initoffset, in + k * BlockSize);
844  fastpack(in + k * BlockSize, out + k * bit, bit);
845  initoffset = nextoffset;
846  }
847  }
848 
849  static void inline packWithoutDelta(uint32_t *in, const size_t Qty,
850  uint32_t *out, const uint32_t bit) {
851  for (size_t k = 0; k < Qty / BlockSize; ++k) {
852  fastpack(in + k * BlockSize, out + k * bit, bit);
853  }
854  }
855 
856  static void inline unpack(const uint32_t *in, const size_t Qty, uint32_t *out,
857  const uint32_t bit) {
858  if (Qty % BlockSize) {
859  throw std::logic_error("Incorrect # of entries.");
860  }
861  uint32_t initoffset = 0;
862 
863  for (size_t k = 0; k < Qty / BlockSize; ++k) {
864  fastunpack(in + k * bit, out + k * BlockSize, bit);
865  if (bit < 32)
866  inverseDelta<BlockSize,uint32_t>(initoffset, out + k * BlockSize);
867  initoffset = *(out + k * BlockSize + BlockSize - 1);
868  }
869  }
870 
871  static void inline unpackWithoutDelta(const uint32_t *in, const size_t Qty,
872  uint32_t *out, const uint32_t bit) {
873  for (size_t k = 0; k < Qty / BlockSize; ++k) {
874  fastunpack(in + k * bit, out + k * BlockSize, bit);
875  }
876  }
877 
878  static void inline packwithoutmask(uint32_t *in, const size_t Qty,
879  uint32_t *out, const uint32_t bit) {
880  if (Qty % BlockSize) {
881  throw std::logic_error("Incorrect # of entries.");
882  }
883  uint32_t initoffset = 0;
884 
885  for (size_t k = 0; k < Qty / BlockSize; ++k) {
886  const uint32_t nextoffset = *(in + k * BlockSize + BlockSize - 1);
887  if (bit < 32)
888  delta<BlockSize,uint32_t>(initoffset, in + k * BlockSize);
889  fastpackwithoutmask(in + k * BlockSize, out + k * bit, bit);
890  initoffset = nextoffset;
891  }
892  }
893 
894  static void inline packwithoutmaskWithoutDelta(uint32_t *in, const size_t Qty,
895  uint32_t *out,
896  const uint32_t bit) {
897  for (size_t k = 0; k < Qty / BlockSize; ++k) {
898  fastpackwithoutmask(in + k * BlockSize, out + k * bit, bit);
899  }
900  }
901 
902  static void inline iunpack(const uint32_t *in, const size_t Qty,
903  uint32_t *out, const uint32_t bit) {
904  if (Qty % BlockSize) {
905  throw std::logic_error("Incorrect # of entries.");
906  }
907 
908  uint32_t initoffset = 0;
909  for (size_t k = 0; k < Qty / BlockSize; ++k) {
910  integratedfastunpack(initoffset, in + k * bit, out + k * BlockSize, bit);
911  initoffset = *(out + k * BlockSize + BlockSize - 1);
912  }
913  }
914 
915  static void CheckMaxDiff(const std::vector<uint32_t> &refdata, unsigned bit) {
916  for (size_t i = 1; i < refdata.size(); ++i) {
917  if (gccbits(refdata[i] - refdata[i - 1]) > bit)
918  throw std::runtime_error("bug");
919  }
920  }
921 
922  static inline uint32_t gccbits(const uint32_t v) {
923  return v == 0 ? 0 : 32 - __builtin_clz(v);
924  }
925 };
926 
927 } // namespace compression
928 } // namespace genie
929 
930 #endif
void __device__ __host__ __integratedfastunpack28(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack2(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
static void delta(const T initoffset, T *data)
void __fastpack29(const uint32_t *in, uint32_t *out)
void __integratedfastpack14(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack20(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack12(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack5(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack32(const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack14(const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask3(const uint32_t *in, uint32_t *out)
void __fastpack20(const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack31(const uint32_t *in, uint32_t *out)
__device__ static __host__ void integratedfastunpack(const uint32_t initoffset, const uint32_t *in, uint32_t *out, const uint32_t bit)
void __integratedfastpack1(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
__device__ static __host__ void fastunpack(const uint32_t *in, uint32_t *out, const uint32_t bit)
void __fastpack14(const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack25(const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack32(const uint32_t, const uint32_t *in, uint32_t *out)
static void ipackwithoutmask(const uint32_t *in, const size_t Qty, uint32_t *out, const uint32_t bit)
void __device__ __host__ __fastunpack15(const uint32_t *in, uint32_t *out)
This is the top-level namespace of the project.
void __device__ __host__ __fastunpack23(const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack18(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack4(const uint32_t *in, uint32_t *out)
void __integratedfastpack10(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
static void delta(const T initoffset, T *data, const size_t size)
void __integratedfastpack18(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack17(const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack7(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask6(const uint32_t *in, uint32_t *out)
void __fastpack7(const uint32_t *in, uint32_t *out)
static void packWithoutDelta(uint32_t *in, const size_t Qty, uint32_t *out, const uint32_t bit)
void __device__ __host__ __fastunpack20(const uint32_t *in, uint32_t *out)
void __fastpack22(const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask1(const uint32_t *in, uint32_t *out)
void __integratedfastpack23(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask16(const uint32_t *in, uint32_t *out)
void __integratedfastpack28(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
static void packwithoutmaskWithoutDelta(uint32_t *in, const size_t Qty, uint32_t *out, const uint32_t bit)
void __device__ __host__ __integratedfastunpack4(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack27(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
static void packwithoutmask(uint32_t *in, const size_t Qty, uint32_t *out, const uint32_t bit)
void __fastpackwithoutmask23(const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask24(const uint32_t *in, uint32_t *out)
void __fastpack21(const uint32_t *in, uint32_t *out)
void __integratedfastpack19(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask22(const uint32_t *in, uint32_t *out)
void __fastpack30(const uint32_t *in, uint32_t *out)
void __fastpack5(const uint32_t *in, uint32_t *out)
void __fastpack9(const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack8(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpack19(const uint32_t *in, uint32_t *out)
void __integratedfastpack15(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack1(const uint32_t *in, uint32_t *out)
void __fastpack23(const uint32_t *in, uint32_t *out)
static void inverseDelta(const T initoffset, T *data)
static void fastpack(const uint32_t *in, uint32_t *out, const uint32_t bit)
void __device__ __host__ __fastunpack12(const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack26(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __integratedfastpack27(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack21(const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack19(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __integratedfastpack12(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask11(const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack25(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask18(const uint32_t *in, uint32_t *out)
void __fastpack26(const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack26(const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask9(const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack16(const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack30(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpack15(const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack17(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
static void fastpackwithoutmask(const uint32_t *in, uint32_t *out, const uint32_t bit)
void __device__ __host__ __fastunpack24(const uint32_t *in, uint32_t *out)
void __fastpack32(const uint32_t *in, uint32_t *out)
static void unpackWithoutDelta(const uint32_t *in, const size_t Qty, uint32_t *out, const uint32_t bit)
void __device__ __host__ __integratedfastunpack11(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask31(const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack27(const uint32_t *in, uint32_t *out)
void __integratedfastpack24(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack3(const uint32_t *in, uint32_t *out)
void __integratedfastpack5(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack13(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack15(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack13(const uint32_t *in, uint32_t *out)
void __integratedfastpack22(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack2(const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack11(const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack18(const uint32_t *in, uint32_t *out)
static void iunpack(const uint32_t *in, const size_t Qty, uint32_t *out, const uint32_t bit)
void __fastpack10(const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask4(const uint32_t *in, uint32_t *out)
void __fastpack2(const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack29(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask19(const uint32_t *in, uint32_t *out)
void __fastpack4(const uint32_t *in, uint32_t *out)
static void pack(uint32_t *in, const size_t Qty, uint32_t *out, const uint32_t bit)
void __fastpackwithoutmask8(const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack14(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpack11(const uint32_t *in, uint32_t *out)
void __integratedfastpack11(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpack31(const uint32_t *in, uint32_t *out)
void __integratedfastpack20(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask28(const uint32_t *in, uint32_t *out)
void __integratedfastpack29(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpack8(const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask13(const uint32_t *in, uint32_t *out)
void __fastpack16(const uint32_t *in, uint32_t *out)
void __fastpack1(const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack22(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __integratedfastpack30(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
static void unpack(const uint32_t *in, const size_t Qty, uint32_t *out, const uint32_t bit)
void __fastpackwithoutmask12(const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack31(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack3(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack5(const uint32_t *in, uint32_t *out)
void __integratedfastpack25(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpack17(const uint32_t *in, uint32_t *out)
void __integratedfastpack32(const uint32_t, const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask2(const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask14(const uint32_t *in, uint32_t *out)
void __fastpack12(const uint32_t *in, uint32_t *out)
static void inverseDelta(const T initoffset, T *data, const size_t size)
void __fastpack18(const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask0(const uint32_t *, uint32_t *)
void __device__ __host__ __integratedfastunpack24(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack23(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask20(const uint32_t *in, uint32_t *out)
void __integratedfastpack7(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpack3(const uint32_t *in, uint32_t *out)
void __fastpack13(const uint32_t *in, uint32_t *out)
void __integratedfastpack0(const uint32_t, const uint32_t *, uint32_t *)
void __device__ __host__ __fastunpack22(const uint32_t *in, uint32_t *out)
void __integratedfastpack17(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __integratedfastpack21(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask15(const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask7(const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask29(const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack10(const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask32(const uint32_t *in, uint32_t *out)
void __integratedfastpack8(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack0(const uint32_t *, uint32_t *out)
void __fastpackwithoutmask30(const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack1(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __integratedfastpack3(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack9(const uint32_t *in, uint32_t *out)
void __fastpack6(const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask5(const uint32_t *in, uint32_t *out)
void __integratedfastpack31(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __integratedfastpack26(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask26(const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack6(const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack19(const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask27(const uint32_t *in, uint32_t *out)
void __integratedfastpack9(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask21(const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack7(const uint32_t *in, uint32_t *out)
static void CheckMaxDiff(const std::vector< uint32_t > &refdata, unsigned bit)
void __device__ __host__ __fastunpack29(const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack8(const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask10(const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack16(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack28(const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack21(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
static void integratedfastpackwithoutmask(const uint32_t initoffset, const uint32_t *in, uint32_t *out, const uint32_t bit)
void __device__ __host__ __integratedfastunpack9(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask17(const uint32_t *in, uint32_t *out)
void __integratedfastpack6(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpack25(const uint32_t *in, uint32_t *out)
void __device__ __host__ __integratedfastunpack6(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpack24(const uint32_t *in, uint32_t *out)
void __integratedfastpack4(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __device__ __host__ __fastunpack30(const uint32_t *in, uint32_t *out)
void __host__ __device__ __integratedfastunpack0(const uint32_t initoffset, const uint32_t *__restrict__, uint32_t *__restrict__ out)
void __fastpack0(const uint32_t *, uint32_t *)
void __device__ __host__ __integratedfastunpack10(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __integratedfastpack16(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpackwithoutmask25(const uint32_t *in, uint32_t *out)
void __integratedfastpack13(const uint32_t initoffset, const uint32_t *in, uint32_t *out)
void __fastpack28(const uint32_t *in, uint32_t *out)
void __fastpack27(const uint32_t *in, uint32_t *out)
void __integratedfastpack2(const uint32_t initoffset, const uint32_t *in, uint32_t *out)