GENIE
inv_table.cu
Go to the documentation of this file.
1 
7 #include <stdio.h>
8 #include <fstream>
9 #include <exception>
10 #include <iostream>
11 #include <utility>
12 #include <boost/archive/binary_iarchive.hpp>
13 #include <boost/archive/binary_oarchive.hpp>
14 #include <boost/serialization/serialization.hpp>
15 #include <boost/serialization/unordered_map.hpp>
16 
18 #include <genie/utility/Logger.h>
19 #include <genie/utility/Timing.h>
21 
22 #include "inv_table.h"
23 
24 using namespace std;
25 using namespace genie::utility;
26 
27 //int* inv_table::d_inv_p = NULL;
29 
31 {
32  try
33  {
34  if(d_inv_p == NULL)
35  cudaCheckErrors(cudaMalloc(&d_inv_p, sizeof(int) * _inv.size()));
36  cudaCheckErrors(cudaMemcpy(d_inv_p, &_inv[0], sizeof(int) * _inv.size(), cudaMemcpyHostToDevice));
37  is_stored_in_gpu = true;
38  }
39  catch(std::bad_alloc &e)
40  {
41  throw(genie::exception::gpu_bad_alloc(e.what()));
42  }
43 
44  return true;
45 }
46 
48 {
49  _build_status = not_builded;
50  _inv_lists.clear();
51  _ck.clear();
52  _inv.clear();
53 }
54 
56 {
57  if(d_inv_p != NULL)
58  {
59 
60  cout << "Program end ---- cudaFreeTime: " ;
61  u64 t1 = getTime();
62  cudaCheckErrors(cudaFree(d_inv_p));
63  d_inv_p = NULL;
64 
65  u64 t2 = getTime();
66  cout << getInterval(t1, t2) << " ms."<< endl;
67  }
68 }
70 {
71  if (d_inv_p == NULL)
72  return;
73 
74  cout << "cudaFreeTime: " ;
75  u64 t1 = getTime();
76  cudaCheckErrors(cudaFree(d_inv_p));
77  u64 t2 = getTime();
78  cout << getInterval(t1, t2) << " ms."<< endl;
79 }
80 
82 {
83  return _size == -1;
84 }
85 
87 {
88  return _dim_size;
89  //return _inv_lists.size();
90 }
91 
93 {
94  return _size <= -1 ? 0 : _size;
95 }
96 
98 {
99  return _shifter;
100 }
101 
102 unordered_map<int, int>* genie::table::inv_table::get_distinct_map(int dim)
103 {
104  return &_distinct_map[dim];
105 }
106 
108 {
109  if (_size == -1 || _size == inv.size())
110  {
111  _build_status = not_builded;
112  _size = inv.size();
113  _inv_lists.push_back(inv);
114 
115  _dim_size = _inv_lists.size();
116 
117  //get size for every posting list
118 
119  vector<int> line;
120  for(int i = 0 ; i < inv.value_range() ; ++i)
121  line.push_back(inv.index(i+inv.min())->size());
122  posting_list_size.push_back(line);
123 
124  //get upperbound and lowerbound for every inv_list
125  inv_list_upperbound.push_back(inv.max());
126  inv_list_lowerbound.push_back(inv.min());
127  }
128 }
129 
131 {
132  // As for now, we built all data sequence together. Therefore, the _distanct_map is always of size 1
133  if(_size == -1)
134  _size = 0;
135  _build_status = not_builded;
136  _size += inv.size();
137  _inv_lists.push_back(inv);
138  _dim_size = _inv_lists.size();
139 
140  //get size for every posting list
141 
142  vector<int> line;
143  for(int i = 0 ; i < inv.value_range() ; ++i)
144  line.push_back(inv.index(i+inv.min())->size());
145  posting_list_size.push_back(line);
146 
147  //get upperbound and lowerbound for every inv_list
148  inv_list_upperbound.push_back(inv.max());
149  inv_list_lowerbound.push_back(inv.min());
150  //distinct_value.push_back(inv.distinct_value_sequence);
151  _distinct_map.push_back(inv._distinct);
152 
153 }
154 
156 {
157  if (inv != NULL)
158  {
159  append(*inv);
160  }
161 }
162 
163 int
165 {
166  if((unsigned int)attr_index<posting_list_size.size() && value>=inv_list_lowerbound[attr_index] && value<=inv_list_upperbound[attr_index])
167  return posting_list_size[attr_index][value-inv_list_lowerbound[attr_index]];
168  else
169  return 0;
170 }
171 
172 bool
173 genie::table::inv_table::list_contain(int attr_index, int value)
174 {
175  if(value <= inv_list_upperbound[attr_index] && value >= inv_list_lowerbound[attr_index])
176  return true;
177  else
178  return false;
179 }
180 
181 
182 
183 int
185 {
186  if((unsigned int)attr_index < inv_list_upperbound.size())
187  return inv_list_upperbound[attr_index];
188  else
189  return -1;
190 }
191 
192 int
194 {
195  if((unsigned int)attr_index < inv_list_lowerbound.size())
196  return inv_list_lowerbound[attr_index];
197  else
198  return -1;
199 }
200 
201 unsigned int
203 {
204  return shift_bits_subsequence;
205 }
206 
207 void
209 {
210  table_index = attr_index;
211 }
212 void
214 {
215  total_num_of_table = num;
216 }
217 
218 int
220 {
221  return table_index;
222 }
223 
224 int
226 {
227  return total_num_of_table;
228 }
229 
231 {
232  return _build_status;
233 }
234 
235 std::vector<genie::table::inv_list>*
237 {
238  return &_inv_lists;
239 }
240 
241 vector<int>*
243 {
244  return &_ck;
245 }
246 
247 vector<int>*
249 {
250  return &_inv;
251 }
252 
253 
254 unordered_map<size_t, int>*
256 {
257  return &_inv_index_map;
258 }
259 
260 vector<int>*
262 {
263  return &_inv_pos;
264 }
265 
266 
267 void
268 genie::table::inv_table::build(size_t max_length, bool use_load_balance)
269 {
270  u64 table_start = getTime();
271  vector<int> _inv_index;
272  _ck.clear();
273  _inv.clear();
274  _inv_pos.clear();
275  if(!use_load_balance)
276  {
277  max_length = (size_t)0 - (size_t)1;
278  }
279  unsigned int last;
280  int key, dim, value;
281  for (unsigned int i = 0; i < _inv_lists.size(); i++)
282  {
283  dim = i << _shifter;
284  for (value = _inv_lists[i].min(); value <= _inv_lists[i].max(); value++)
285  {
286  key = dim + value - _inv_lists[i].min();
287 
288  vector<int>* _index;
289 
290  _index = _inv_lists[i].index(value);
291 
292  vector<int> index;
293  index.clear();
294  if(_index != NULL)
295  index = *_index;
296  if(_inv_lists.size() <= 1)//used int subsequence search
297  shift_bits_subsequence = _inv_lists[i]._shift_bits_subsequence();
298 
299  if (_ck.size() <= (unsigned int) key)
300  {
301  last = _ck.size();
302  _ck.resize(key + 1);
303  _inv_index.resize(key + 1);
304  for (; last < _ck.size(); ++last)
305  {
306  _ck[last] = _inv.size();
307  _inv_index[last] = _inv_pos.size();
308  }
309  }
310  for (unsigned int j = 0; j < index.size(); ++j)
311  {
312  if (j % max_length == 0)
313  {
314  _inv_pos.push_back(_inv.size());
315  }
316  _inv.push_back(index[j]);
317  _ck[key] = _inv.size();
318  }
319 
320  }
321 
322  }
323  _inv_index.push_back(_inv_pos.size());
324  _inv_pos.push_back(_inv.size());
325 
326  /* unordered map impl of inv_index */
327  _inv_index_map.clear();
328  for (size_t i = 0; i < _inv_lists.size(); ++i)
329  {
330  dim = i << _shifter;
331  for (int j = _inv_lists[i].min(); j <= _inv_lists[i].max() + 1; ++j)
332  {
333  key = dim + j - _inv_lists[i].min();
334  size_t unsigned_key = static_cast<size_t>(key);
335  _inv_index_map.insert(make_pair(unsigned_key, _inv_index.at(unsigned_key)));
336  }
337  }
338 
339  max_inv_size = (int)_inv.size() > max_inv_size?(int)_inv.size():max_inv_size;
340 
341  _build_status = builded;
342  u64 table_end = getTime();
343  cout<<"build table time = "<<getInterval(table_start, table_end)<<"ms."<<endl;
344  //Logger::log(Logger::DEBUG, "inv_index size %d:", _inv_index.size());
345  //Logger::log(Logger::DEBUG, "inv_pos size %d:", _inv_pos.size());
346  //Logger::log(Logger::DEBUG, "inv size %d:", _inv.size());
347  Logger::log(Logger::INFO, "inv_index size %d:", _inv_index.size());
348  Logger::log(Logger::INFO, "inv_pos size %d:", _inv_pos.size());
349  Logger::log(Logger::INFO, "inv size %d:", _inv.size());
350 }
351 
352 void
354 {
355  min_value_sequence = min_value;
356 }
357 
358 int
360 {
361  return min_value_sequence;
362 }
363 
364 void
366 {
367  max_value_sequence = max_value;
368 }
369 
370 int
372 {
373  return max_value_sequence;
374 }
375 
376 
377 void
379 {
380  gram_length_sequence = gram_length;
381 
382 }
383 
384 int
386 {
387  return gram_length_sequence;
388 }
int size()
Return the number of instances.
Definition: inv_list.cc:47
int get_gram_length_sequence()
Get the gram length.
Definition: inv_table.cu:385
bool cpy_data_to_gpu()
Copy vector _inv to gpu memory which is referenced by d_inv_p.
Definition: inv_table.cu:30
std::unordered_map< size_t, int > * inv_index_map()
Definition: inv_table.cu:255
void set_gram_length_sequence(int gram_length)
Set length of each gram.
Definition: inv_table.cu:378
virtual std::vector< int > * inv_pos()
Definition: inv_table.cu:261
void clear_gpu_mem()
clear the corresponding gpu memory referenced by d_inv_p
Definition: inv_table.cu:69
static int max_inv_size
Definition: inv_table.h:61
virtual ~inv_table()
The Destructor of the inv_table. It will also clear the related gpu memory.
Definition: inv_table.cu:55
int get_upperbound_of_list(int attr_index)
Definition: inv_table.cu:184
unsigned long long getTime()
Get system time.
Definition: Timing.cc:22
status
This enum var defines two statuses for a inv_table object, which is either builded or not_builded...
Definition: inv_table.h:48
int get_total_num_of_table() const
return the total_num_of_table.
Definition: inv_table.cu:225
std::unordered_map< int, int > _distinct
Definition: inv_list.h:29
std::unordered_map< int, int > * get_distinct_map(int dim)
Definition: inv_table.cu:102
int get_table_index() const
return the index of this inv_table.
Definition: inv_table.cu:219
virtual std::vector< int > * ck()
Definition: inv_table.cu:242
void append(inv_list &inv)
Append an inv_list to the inv_table.
Definition: inv_table.cu:107
int get_min_value_sequence()
Get the min value for sequences&#39; elements in this inv_table.
Definition: inv_table.cu:359
virtual void build(size_t max_length, bool use_load_balance)
Build the inv_table.
Definition: inv_table.cu:268
void clear()
Clear the inv_table.
Definition: inv_table.cu:47
int max()
Return the max value of the inverted vector.
Definition: inv_list.cc:42
int get_lowerbound_of_list(int attr_index)
Definition: inv_table.cu:193
unsigned int _shift_bits_subsequence()
Definition: inv_table.cu:202
define class inv_table
int get_posting_list_size(int attr_index, int value)
Definition: inv_table.cu:164
virtual std::vector< int > * inv()
Definition: inv_table.cu:248
Record run-time information.
unsigned long long u64
A type definition for a 64-bit unsigned integer.
Definition: match_common.h:19
void set_table_index(int attr_index)
Set the table_index to &#39;index&#39;.
Definition: inv_table.cu:208
double getInterval(unsigned long long start, unsigned long long stop)
Calculate time interval from start to end.
Definition: Timing.cc:36
Functions about getting system time.
void set_min_value_sequence(int min_value)
Used in sequence search. To set the min_value for all sequences&#39; element.
Definition: inv_table.cu:353
int value_range()
Return the number of different values in the attribute.
Definition: inv_list.cc:389
void set_total_num_of_table(int num)
Set the total_num_of_table to &#39;num&#39;.
Definition: inv_table.cu:213
int min()
Return the min value of the inverted vector.
Definition: inv_list.cc:37
std::vector< int > * index(int value)
The indexes of the value.
Definition: inv_list.cc:381
This class manages one inverted list.
Definition: inv_list.h:23
bool empty()
Check whether the inv_table is empty.
Definition: inv_table.cu:81
int get_max_value_sequence()
Get the max value.
Definition: inv_table.cu:371
bool list_contain(int attr_index, int value)
Test whether a value is possible for an specific attribute.
Definition: inv_table.cu:173
std::vector< inv_list > * inv_lists()
Definition: inv_table.cu:236
#define cudaCheckErrors(err)
The wrapper function to validate CUDA calls.
Definition: cuda_macros.h:23
void set_max_value_sequence(int max_value)
Set the max value for all sequence. Compare to set_min_value_sequence()
Definition: inv_table.cu:365
void append_sequence(inv_list &inv)
append inv_list for sequence search
Definition: inv_table.cu:130