GENIE
inv_list.cc
Go to the documentation of this file.
1 
5 #include "inv_list.h"
6 
7 #include <cstdlib>
8 #include <iostream>
9 
10 using namespace std;
11 
12 int cv(string& s)
13 {
14  return atoi(s.c_str());
15 }
16 
17 genie::table::inv_list::inv_list(vector<int>& vin)
18 {
19  invert(vin);
20 }
21 
22 genie::table::inv_list::inv_list(vector<int>* vin)
23 {
24  invert(vin);
25 }
26 
27 genie::table::inv_list::inv_list(vector<string>& vin)
28 {
29  invert(vin);
30 }
31 
32 genie::table::inv_list::inv_list(vector<string>* vin)
33 {
34  invert(vin);
35 }
36 
38 {
39  return _bound.first;
40 }
41 
43 {
44  return _bound.second;
45 }
46 
48 {
49  return _size;
50 }
51 void genie::table::inv_list::invert_bijectMap(vector<vector<int> > & vin)
52 {
53  _size = vin.size();
54  if (vin.empty())
55  return;
56 
57  _bound.first = vin[0][0], _bound.second = vin[0][0], _inv.clear();
58 
59  unsigned int i, j;
60  for (i = 0; i < vin.size(); i++)
61  {
62  for (j = 0; j < vin[i].size(); ++j)
63  {
64  if (_bound.first > vin[i][j])
65  _bound.first = vin[i][j];
66  if (_bound.second < vin[i][j])
67  _bound.second = vin[i][j];
68  }
69  }
70  unsigned int gap = _bound.second - _bound.first + 1;
71  _inv.resize(gap);
72  for (i = 0; i < gap; i++)
73  _inv[i].clear();
74 
75  for (i = 0; i < vin.size(); i++)
76  {
77  for (j = 0; j < vin[i].size(); ++j)
78  {
79  _inv[vin[i][j] - _bound.first].push_back(i);
80  }
81  }
82  shift_bits_subsequence = 0;
83  return;
84 }
85 
86 
87 void genie::table::inv_list::invert_sequence(vector<vector<int> > & vin, int shift_bits)
88  //, vector<int> & respective_id)
89 {
90  _size = vin.size();
91  if (vin.empty())
92  {
93  _bound.first = 0;
94  _bound.second = 0;
95 
96  return;
97  }
98 
99  unsigned int i, j;
100  _bound.first = 0, _bound.second = 0, _inv.clear();
101 
102  vector<vector<int> > vin_after_shift;
103  //shift_bits = 6;
104  unordered_map<int, int> _map;
105  for(i = 0; i < vin.size(); ++i)
106  {
107  vector<int> line;
108  for(j = 0; j < vin[i].size(); ++j)
109  {
110  int temp_value;
111  unordered_map<int, int>::iterator result = _map.find(vin[i][j]);
112  if(result == _map.end())
113  {
114  temp_value = vin[i][j]<<shift_bits;
115  _map.insert({vin[i][j], 0});
116  }
117  else
118  {
119  if(result->second < 63)
120  result->second += 1;
121  temp_value = (result->first<<shift_bits) + result->second;
122  }
123 
124  unordered_map<int, int>::iterator it = _distinct.find(temp_value);
125  if(it != _distinct.end())
126  temp_value = it->second;
127  else
128  {
129  _distinct.insert({temp_value, _distinct.size()});
130  //distinct_value_sequence.push_back(temp_value);
131  temp_value = _distinct.size() - 1;
132  }
133 
134  line.push_back(temp_value);
135  }
136  vin_after_shift.push_back(line);
137  _map.clear();
138  }
139  _bound.second = _distinct.size() - 1;
140 
141  unsigned int gap = _bound.second - _bound.first + 1;
142  //cout<<"Gap = "<<gap<<endl;
143  _inv.resize(gap);
144  for (i = 0; i < gap; i++)
145  _inv[i].clear();
146 
147  for (i = 0; i < vin_after_shift.size(); ++i)
148  {
149  for (j = 0; j < vin_after_shift[i].size(); ++j)
150  {
151  _inv[vin_after_shift[i][j] - _bound.first].push_back(i);
152  }
153  }
154 
155  shift_bits_subsequence = 0;
156  return;
157 }
158 
159 
160 void genie::table::inv_list::invert_bijectMap(int *data, unsigned int item_num,
161  unsigned int *index, unsigned int row_num)
162 {
163  _size = row_num;
164  if (item_num == 0)
165  return;
166  unsigned int i, j;
167  for (i = 0; i < item_num; ++i)
168  {
169  if (_bound.first > data[i])
170  _bound.first = data[i];
171  if (_bound.second < data[i])
172  _bound.second = data[i];
173  }
174  unsigned int gap = _bound.second - _bound.first + 1;
175  _inv.resize(gap);
176  for (i = 0; i < gap; ++i)
177  _inv[i].clear();
178  for (i = 0; i < row_num - 1; ++i)
179  for (j = index[i] - index[0]; j < index[i + 1] - index[0]; ++j)
180  _inv[data[j] - _bound.first].push_back(i);
181 
182  for (i = index[row_num - 1]; i < item_num; ++i)
183  _inv[data[i] - _bound.first].push_back(row_num - 1);
184 
185  shift_bits_subsequence = 0;
186  return;
187 }
188 
189 void genie::table::inv_list::invert_subsequence(vector<vector<int> > & vin)
190 {
191  _size = vin.size();
192  if (vin.empty())
193  return;
194 
195  _bound.first = vin[0][0], _bound.second = vin[0][0], _inv.clear();
196 
197  unsigned int i, j, max_offset_subsequence=0;
198  for (i = 0; i < vin.size(); i++)
199  {
200  if(vin[i].size() > max_offset_subsequence)
201  max_offset_subsequence = vin[i].size();
202  for (j = 0; j < vin[i].size(); ++j)
203  {
204  if (_bound.first > vin[i][j])
205  _bound.first = vin[i][j];
206  if (_bound.second < vin[i][j])
207  _bound.second = vin[i][j];
208  }
209  }
210  unsigned int gap = _bound.second - _bound.first + 1;
211  _inv.resize(gap);
212 
213  shift_bits_subsequence = 0;
214  for(i = 1 ; i < max_offset_subsequence ; i *= 2)
215  shift_bits_subsequence++;
216 
217  for (i = 0; i < gap; i++)
218  _inv[i].clear();
219 
220  unsigned int rowID_offset;
221  for (i = 0; i < vin.size(); i++)
222  {
223  for (j = 0; j < vin[i].size(); ++j)
224  {
225  rowID_offset = (i<<shift_bits_subsequence) + j;
226  _inv[vin[i][j] - _bound.first].push_back(rowID_offset);
227  }
228  }
229  return;
230 }
231 
232 void genie::table::inv_list::invert_subsequence(int *data, unsigned int item_num, unsigned int * index, unsigned int row_num)
233 {
234  _size = row_num;
235  if (item_num == 0)
236  return;
237  unsigned int i, j, max_offset_subsequence=0;
238  unsigned int length;
239  _bound.first = data[0], _bound.second = data[0], _inv.clear();
240  for( i = 1; i < row_num ;++i )
241  {
242  length = index[i] - index[i-1];
243  if(length > max_offset_subsequence)
244  max_offset_subsequence = length;
245  }
246  length = item_num - index[row_num - 1];
247  if(length > max_offset_subsequence)
248  max_offset_subsequence = length;
249 
250  for (i = 0; i < item_num; ++i)
251  {
252  if (_bound.first > data[i])
253  _bound.first = data[i];
254  if (_bound.second < data[i])
255  _bound.second = data[i];
256  }
257  unsigned int gap = _bound.second - _bound.first + 1;
258  _inv.resize(gap);
259 
260  shift_bits_subsequence = 0;
261  for(i = 1 ; i < max_offset_subsequence ; i *= 2)
262  shift_bits_subsequence++;
263 
264  for (i = 0; i < gap; ++i)
265  _inv[i].clear();
266 
267  unsigned int rowID_offset;
268  int offset;
269  for (i = 0; i < row_num - 1; ++i)
270  {
271  offset = 0;
272  for (j = index[i] - index[0]; j < index[i + 1] - index[0]; ++j)
273  {
274  rowID_offset = (i<<shift_bits_subsequence) + offset;
275  _inv[data[j] - _bound.first].push_back(rowID_offset);
276  offset++;
277  }
278  }
279  offset = 0;
280  int rowID = (row_num - 1)<<shift_bits_subsequence;
281  for (i = index[row_num - 1] - index[0]; i < item_num; ++i)
282  {
283  rowID_offset = rowID + offset;
284  _inv[data[i] - _bound.first].push_back(rowID_offset);
285  offset++;
286  }
287  return;
288 }
289 
291 {
292  return shift_bits_subsequence;
293 }
294 
295 void genie::table::inv_list::invert(vector<int>& vin)
296 {
297  _size = vin.size();
298  if (vin.empty())
299  return;
300 
301  _bound.first = vin[0], _bound.second = vin[0], _inv.clear();
302 
303  unsigned int i;
304  for (i = 0; i < vin.size(); i++)
305  {
306  if (_bound.first > vin[i])
307  _bound.first = vin[i];
308  if (_bound.second < vin[i])
309  _bound.second = vin[i];
310  }
311 
312  unsigned int gap = _bound.second - _bound.first + 1;
313  _inv.resize(gap);
314  for (i = 0; i < gap; i++)
315  _inv[i].clear();
316 
317  for (i = 0; i < vin.size(); i++)
318  _inv[vin[i] - _bound.first].push_back(i);
319  return;
320 }
321 
322 void genie::table::inv_list::invert(vector<int>* vin)
323 {
324  invert(*vin);
325 }
326 
327 void genie::table::inv_list::invert(vector<string>& vin)
328 {
329  invert(vin, &cv);
330 }
331 
332 void genie::table::inv_list::invert(vector<string>* vin)
333 {
334  invert(*vin);
335 }
336 
337 void genie::table::inv_list::invert(vector<string>& vin,
338  int (*stoi)(string&))
339 {
340  _size = vin.size();
341  if (vin.empty())
342  return;
343 
344  _bound.first = stoi(vin[0]);
345  _bound.second = stoi(vin[0]);
346  _inv.clear();
347 
348  unsigned int i;
349  for (i = 0; i < vin.size(); i++)
350  {
351  if (_bound.first > stoi(vin[i]))
352  _bound.first = stoi(vin[i]);
353  if (_bound.second < stoi(vin[i]))
354  _bound.second = stoi(vin[i]);
355  }
356 
357  unsigned int gap = _bound.second - _bound.first + 1;
358  _inv.resize(gap);
359  for (i = 0; i < gap; i++)
360  _inv[i].clear();
361 
362  for (i = 0; i < vin.size(); i++)
363  _inv[stoi(vin[i]) - _bound.first].push_back(i);
364  return;
365 }
366 
367 void genie::table::inv_list::invert(vector<string>* vin,
368  int (*stoi)(string&))
369 {
370  invert(*vin, stoi);
371 }
372 
374 {
375  if (value > _bound.second || value < _bound.first)
376  return false;
377  return true;
378 }
379 
380 vector<int>*
382 {
383  if (!contains(value))
384  return NULL;
385  return &_inv[value - min()];
386 }
387 
388 int
390 {
391  return _inv.size();
392 }
void invert_bijectMap(std::vector< std::vector< int > > &vin)
int size()
Return the number of instances.
Definition: inv_list.cc:47
int cv(string &s)
Definition: inv_list.cc:12
void invert_subsequence(std::vector< std::vector< int >> &vin)
inv_list()
Create an empty inv_list.
Definition: inv_list.h:61
unsigned int _shift_bits_subsequence()
Definition: inv_list.cc:290
int max()
Return the max value of the inverted vector.
Definition: inv_list.cc:42
int value_range()
Return the number of different values in the attribute.
Definition: inv_list.cc:389
int min()
Return the min value of the inverted vector.
Definition: inv_list.cc:37
Declaration of inv_list class.
std::vector< int > * index(int value)
The indexes of the value.
Definition: inv_list.cc:381
void invert(std::vector< int > &vin)
bool contains(int value)
Check whether the vaule is in the inv_list.
Definition: inv_list.cc:373
void invert_sequence(std::vector< std::vector< int >> &vin, int shift_bits)
This is used for sequence match.
Definition: inv_list.cc:87