12 #include <boost/archive/binary_iarchive.hpp> 13 #include <boost/archive/binary_oarchive.hpp> 14 #include <boost/serialization/serialization.hpp> 15 #include <boost/serialization/unordered_map.hpp> 36 cudaCheckErrors(cudaMemcpy(d_inv_p, &_inv[0],
sizeof(
int) * _inv.size(), cudaMemcpyHostToDevice));
37 is_stored_in_gpu =
true;
39 catch(std::bad_alloc &e)
49 _build_status = not_builded;
60 cout <<
"Program end ---- cudaFreeTime: " ;
74 cout <<
"cudaFreeTime: " ;
94 return _size <= -1 ? 0 : _size;
104 return &_distinct_map[dim];
109 if (_size == -1 || _size == inv.
size())
111 _build_status = not_builded;
113 _inv_lists.push_back(inv);
115 _dim_size = _inv_lists.size();
121 line.push_back(inv.
index(i+inv.
min())->size());
122 posting_list_size.push_back(line);
125 inv_list_upperbound.push_back(inv.
max());
126 inv_list_lowerbound.push_back(inv.
min());
135 _build_status = not_builded;
137 _inv_lists.push_back(inv);
138 _dim_size = _inv_lists.size();
144 line.push_back(inv.
index(i+inv.
min())->size());
145 posting_list_size.push_back(line);
148 inv_list_upperbound.push_back(inv.
max());
149 inv_list_lowerbound.push_back(inv.
min());
166 if((
unsigned int)attr_index<posting_list_size.size() && value>=inv_list_lowerbound[attr_index] && value<=inv_list_upperbound[attr_index])
167 return posting_list_size[attr_index][value-inv_list_lowerbound[attr_index]];
175 if(value <= inv_list_upperbound[attr_index] && value >= inv_list_lowerbound[attr_index])
186 if((
unsigned int)attr_index < inv_list_upperbound.size())
187 return inv_list_upperbound[attr_index];
195 if((
unsigned int)attr_index < inv_list_lowerbound.size())
196 return inv_list_lowerbound[attr_index];
204 return shift_bits_subsequence;
210 table_index = attr_index;
215 total_num_of_table = num;
227 return total_num_of_table;
232 return _build_status;
235 std::vector<genie::table::inv_list>*
254 unordered_map<size_t, int>*
257 return &_inv_index_map;
271 vector<int> _inv_index;
275 if(!use_load_balance)
277 max_length = (size_t)0 - (
size_t)1;
281 for (
unsigned int i = 0; i < _inv_lists.size(); i++)
284 for (value = _inv_lists[i].min(); value <= _inv_lists[i].max(); value++)
286 key = dim + value - _inv_lists[i].min();
290 _index = _inv_lists[i].index(value);
296 if(_inv_lists.size() <= 1)
297 shift_bits_subsequence = _inv_lists[i]._shift_bits_subsequence();
299 if (_ck.size() <= (
unsigned int) key)
303 _inv_index.resize(key + 1);
304 for (; last < _ck.size(); ++last)
306 _ck[last] = _inv.size();
307 _inv_index[last] = _inv_pos.size();
310 for (
unsigned int j = 0; j < index.size(); ++j)
312 if (j % max_length == 0)
314 _inv_pos.push_back(_inv.size());
316 _inv.push_back(index[j]);
317 _ck[key] = _inv.size();
323 _inv_index.push_back(_inv_pos.size());
324 _inv_pos.push_back(_inv.size());
327 _inv_index_map.clear();
328 for (
size_t i = 0; i < _inv_lists.size(); ++i)
331 for (
int j = _inv_lists[i].min(); j <= _inv_lists[i].max() + 1; ++j)
333 key = dim + j - _inv_lists[i].min();
334 size_t unsigned_key =
static_cast<size_t>(key);
335 _inv_index_map.insert(make_pair(unsigned_key, _inv_index.at(unsigned_key)));
339 max_inv_size = (int)_inv.size() > max_inv_size?(int)_inv.size():max_inv_size;
341 _build_status = builded;
343 cout<<
"build table time = "<<
getInterval(table_start, table_end)<<
"ms."<<endl;
347 Logger::log(Logger::INFO,
"inv_index size %d:", _inv_index.size());
348 Logger::log(Logger::INFO,
"inv_pos size %d:", _inv_pos.size());
349 Logger::log(Logger::INFO,
"inv size %d:", _inv.size());
355 min_value_sequence = min_value;
361 return min_value_sequence;
367 max_value_sequence = max_value;
373 return max_value_sequence;
380 gram_length_sequence = gram_length;
387 return gram_length_sequence;
int size()
Return the number of instances.
int get_gram_length_sequence()
Get the gram length.
bool cpy_data_to_gpu()
Copy vector _inv to gpu memory which is referenced by d_inv_p.
std::unordered_map< size_t, int > * inv_index_map()
void set_gram_length_sequence(int gram_length)
Set length of each gram.
virtual std::vector< int > * inv_pos()
void clear_gpu_mem()
clear the corresponding gpu memory referenced by d_inv_p
virtual ~inv_table()
The Destructor of the inv_table. It will also clear the related gpu memory.
int get_upperbound_of_list(int attr_index)
unsigned long long getTime()
Get system time.
status
This enum var defines two statuses for a inv_table object, which is either builded or not_builded...
int get_total_num_of_table() const
return the total_num_of_table.
std::unordered_map< int, int > _distinct
std::unordered_map< int, int > * get_distinct_map(int dim)
int get_table_index() const
return the index of this inv_table.
virtual std::vector< int > * ck()
void append(inv_list &inv)
Append an inv_list to the inv_table.
int get_min_value_sequence()
Get the min value for sequences' elements in this inv_table.
virtual void build(size_t max_length, bool use_load_balance)
Build the inv_table.
void clear()
Clear the inv_table.
int max()
Return the max value of the inverted vector.
int get_lowerbound_of_list(int attr_index)
unsigned int _shift_bits_subsequence()
int get_posting_list_size(int attr_index, int value)
virtual std::vector< int > * inv()
Record run-time information.
unsigned long long u64
A type definition for a 64-bit unsigned integer.
void set_table_index(int attr_index)
Set the table_index to 'index'.
double getInterval(unsigned long long start, unsigned long long stop)
Calculate time interval from start to end.
Functions about getting system time.
void set_min_value_sequence(int min_value)
Used in sequence search. To set the min_value for all sequences' element.
int value_range()
Return the number of different values in the attribute.
void set_total_num_of_table(int num)
Set the total_num_of_table to 'num'.
int min()
Return the min value of the inverted vector.
std::vector< int > * index(int value)
The indexes of the value.
This class manages one inverted list.
bool empty()
Check whether the inv_table is empty.
int get_max_value_sequence()
Get the max value.
bool list_contain(int attr_index, int value)
Test whether a value is possible for an specific attribute.
std::vector< inv_list > * inv_lists()
#define cudaCheckErrors(err)
The wrapper function to validate CUDA calls.
void set_max_value_sequence(int max_value)
Set the max value for all sequence. Compare to set_min_value_sequence()
void append_sequence(inv_list &inv)
append inv_list for sequence search