GENIE
inv_compr_table.cu
Go to the documentation of this file.
1 #include <fstream>
2 #include <iostream>
3 #include <map>
4 #include <memory>
5 
6 #include <genie/configure.h>
9 #include <genie/utility/Logger.h>
10 #include <genie/utility/Timing.h>
15 
16 #include "inv_compr_table.h"
17 
18 using namespace genie::compression;
19 using namespace genie::utility;
20 
21 BOOST_CLASS_EXPORT_IMPLEMENT(genie::table::inv_compr_table)
22 
23 #ifndef GENIE_COMPR
25 std::vector<int>* genie::table::inv_compr_table::inv() {return nullptr;}
26 std::vector<int>* genie::table::inv_compr_table::inv_pos() {return nullptr;}
27 void genie::table::inv_compr_table::build(size_t max_length, bool use_load_balance) {}
28 #else
29 
30 void
31 genie::table::inv_compr_table::build(size_t max_length, bool use_load_balance)
32 {
33  Logger::log(Logger::DEBUG, "Bulding uncompressed inv_table...");
34  inv_table::build(max_length, use_load_balance);
35 
36 
37  Logger::log(Logger::DEBUG, "Bulding compressed inv_table...");
38 
39  std::vector<int> &inv = *(inv_table::inv());
40  std::vector<int> &invPos = *(inv_table::inv_pos());
41  std::vector<uint32_t> &compressedInv = m_comprInv;
42  std::vector<int> &compressedInvPos = m_comprInvPos;
43 
44  // make uint32_t copy of uncompressed inv array
45  // codecs are expecting unsigned integer arrays, but inv_table uses int by default
46  std::vector<uint32_t> inv_u32(inv.begin(), inv.end());
47 
48  uint64_t compressionStartTime = getTime();
49 
50  // Retrieve coded instance
51  std::shared_ptr<DeviceIntegerCODEC> codec = DeviceCodecFactory::getCodec(m_compression);
52  if(!codec.get()) {
53  Logger::log(Logger::ALERT, "No matching function for %s compression!",
54  DeviceCodecFactory::getCompressionName(m_compression).c_str());
55  throw std::logic_error("No compression codec available!");
56  }
57  // Check if codec will be able to decompress an inverted list of any length
58  assert(codec->decodeArrayParallel_lengthPerBlock() >= (int)max_length);
59 
60  compressedInv.resize(inv.size()*8);
61  compressedInvPos.clear();
62  compressedInvPos.reserve(invPos.size());
63  compressedInvPos.push_back(0);
64 
65  int compressedInvSize = 0;
66  int64_t compressedInvCapacity = compressedInv.size();
67  int badCompressedLists = 0;
68 
69  uint32_t *out = compressedInv.data();
70  for (int pos = 0; pos < (int)invPos.size()-1; pos++)
71  {
72  int invStart = invPos[pos];
73  int invEnd = invPos[pos+1];
74  assert(invEnd - invStart > 0 && invEnd - invStart <= (int)max_length);
75 
76  // Check if we run out of capacity
77  assert(compressedInvCapacity > 0);
78  // We cannot have more capacity then there is free space in the output vector compressedInv, plus at the same
79  // time we cannot have negative compression overflow our max_length constraint on inverted list
80  size_t nvalue = std::min((size_t)compressedInvCapacity, max_length);
81 
82  uint32_t * data = inv_u32.data() + invStart;
83  codec->encodeArray(data, invEnd - invStart, out, nvalue);
84 
85  // Check if the compressed length (nvalue) from encodeArray(...) does not exceed the max_length constraint
86  // of the compressed list
87  assert(nvalue > 0 && nvalue <= max_length);
88 
89  out += nvalue; // shift compression output pointer
90  compressedInvCapacity -= nvalue;
91  compressedInvSize += nvalue;
92 
93  compressedInvPos.push_back(compressedInvSize);
94 
95  if ((int)nvalue >= invEnd - invStart)
96  badCompressedLists++;
97  }
98 
99  for (size_t i = 1; i < compressedInvPos.size(); i++){
100  assert(compressedInvPos[i] > compressedInvPos[i-1]); // Check if there was no int overflow in compressedInvPos
101  }
102 
103  compressedInv.resize(compressedInvSize); // shrink to used space only
104  compressedInv.shrink_to_fit();
105  assert(compressedInvSize == compressedInvPos.back());
106 
107  uint64_t compressionEndTime = getTime();
108  double compressionRatio = 32.0 * static_cast<double>(compressedInv.size()) / static_cast<double>(inv.size());
109 
110  Logger::log(Logger::DEBUG, "Done bulding compressed inv_compr_table in time %f",
111  getInterval(compressionStartTime, compressionEndTime));
112 
113  Logger::log(Logger::INFO, "Compression %s, codec: %s, compression ratio: %f",
114  DeviceCodecFactory::getCompressionName(m_compression).c_str(), codec->name().c_str(), compressionRatio);
115 
116  if (compressionRatio > 16.0 || badCompressedLists)
117  Logger::log(Logger::ALERT, "Bad compression! Bad compressed lists: %d / %d, compression ratio: %f",
118  badCompressedLists, compressedInvPos.size()-1, compressionRatio);
119 }
120 
121 
123 {
124  clear_gpu_mem();
125 }
126 
127 
130 {
131  return m_compression;
132 }
133 
134 double
136 {
137  if (this->build_status() != builded)
138  {
139  Logger::log(Logger::ALERT, "Unknown compression ratio: table is not built!");
140  return -1;
141  }
142  assert(m_comprInv.size());
143  assert(uncompressedInv()->size());
144  return 32.0 * static_cast<double>(m_comprInv.size()) / static_cast<double>(uncompressedInv()->size());
145 }
146 
147 void
149 {
150  if (this->build_status() == builded)
151  {
152  Logger::log(Logger::ALERT, "ERROR: Attempting to change compression type on already built table!");
153  return;
154  }
155  m_compression = compression;
156 }
157 
158 size_t
160 {
161  return m_uncompressedInvListsMaxLength;
162 }
163 
164 void
166 {
167  this->m_uncompressedInvListsMaxLength = length;
168 }
169 
170 std::vector<int>*
172 {
173  return reinterpret_cast<std::vector<int>*>(&m_comprInv);
174 }
175 
176 std::vector<uint32_t>*
178 {
179  return &m_comprInv;
180 }
181 
182 std::vector<int>*
184 {
185  return inv_table::inv();
186 }
187 
188 std::vector<int>*
190 {
191  return &m_comprInvPos;
192 }
193 std::vector<int>*
195 {
196  return &m_comprInvPos;
197 }
198 
199 std::vector<int>*
201 {
202  return inv_table::inv_pos();;
203 }
204 
205 uint32_t*
207 {
208  return m_d_compr_inv_p;
209 }
210 
212 {
213  try{
214  if(m_d_compr_inv_p == NULL)
215  cudaCheckErrors(cudaMalloc(&m_d_compr_inv_p, sizeof(uint32_t) * m_comprInv.size()));
216  cudaCheckErrors(cudaMemcpy(m_d_compr_inv_p, &m_comprInv[0], sizeof(uint32_t) * m_comprInv.size(),
217  cudaMemcpyHostToDevice));
218  } catch(std::bad_alloc &e){
219  throw(genie::exception::gpu_bad_alloc(e.what()));
220  }
221 
222  return true;
223 }
224 
226 {
227  inv_table::clear();
228 
229  ck()->clear();
230  m_comprInv.clear();
231  m_comprInvPos.clear();
232 }
233 
235 {
236  if (m_d_compr_inv_p == NULL)
237  return;
238 
239  std::cout << "cudaFreeTime: " ;
240  u64 t1 = getTime();
241  cudaCheckErrors(cudaFree(m_d_compr_inv_p));
242  u64 t2 = getTime();
243  std::cout << getInterval(t1, t2) << " ms."<< std::endl;
244 
245 }
246 
247 #endif
std::vector< int > * compressedInvPos()
void setCompression(genie::compression::COMPRESSION_TYPE compression)
unsigned long long getTime()
Get system time.
Definition: Timing.cc:22
virtual std::vector< int > * inv()
std::vector< uint32_t > * compressedInv()
genie::compression::COMPRESSION_TYPE getCompression() const
virtual void build(size_t max_length, bool use_load_balance)
Build the inv_table.
static std::shared_ptr< DeviceIntegerCODEC > getCodec(COMPRESSION_TYPE type)
virtual std::vector< int > * inv_pos()
std::vector< int > * uncompressedInvPos()
bool cpy_data_to_gpu()
Copy vector _inv to gpu memory which is referenced by d_inv_p.
size_t getUncompressedPostingListMaxLength() const
Record run-time information.
void clear_gpu_mem()
clear the corresponding gpu memory referenced by d_inv_p
unsigned long long u64
A type definition for a 64-bit unsigned integer.
Definition: match_common.h:19
double getInterval(unsigned long long start, unsigned long long stop)
Calculate time interval from start to end.
Definition: Timing.cc:36
Functions about getting system time.
std::vector< int > * uncompressedInv()
void setUncompressedPostingListMaxLength(size_t length)
define class inv_compre_table
uint32_t * deviceCompressedInv() const
static std::string getCompressionName(COMPRESSION_TYPE type)
#define cudaCheckErrors(err)
The wrapper function to validate CUDA calls.
Definition: cuda_macros.h:23