GENIE
knn.cu
Go to the documentation of this file.
1 
4 #include <math.h>
5 #include <assert.h>
6 
7 #include <genie/table/inv_list.h>
8 #include "match.h"
9 #include "match_integrated.h"
10 #include "heap_count.h"
11 
12 #include <genie/utility/Logger.h>
13 #include <genie/utility/Timing.h>
16 
17 #include <genie/configure.h>
18 
19 #include "knn.h"
20 
21 using namespace genie::table;
22 using namespace genie::query;
23 using namespace genie::matching;
24 using namespace genie::utility;
25 using namespace std;
26 using namespace thrust;
27 
28 bool GPUGENIE_ERROR = false;
29 unsigned long long GPUGENIE_TIME = 0ull;
30 
31 #ifndef GPUGenie_knn_THREADS_PER_BLOCK
32 #define GPUGenie_knn_THREADS_PER_BLOCK 1024
33 #endif
34 
35 #ifndef GPUGenie_knn_DEFAULT_HASH_TABLE_SIZE
36 #define GPUGenie_knn_DEFAULT_HASH_TABLE_SIZE 1
37 #endif
38 
39 #ifndef GPUGenie_knn_DEFAULT_BITMAP_BITS
40 #define GPUGenie_knn_DEFAULT_BITMAP_BITS 2
41 #endif
42 
43 #ifndef GPUGenie_knn_DEFAULT_DATA_PER_THREAD
44 #define GPUGenie_knn_DEFAULT_DATA_PER_THREAD 256
45 #endif
46 
47 __global__
48 void extract_index_and_count(data_t * data, int * id, int * count, int size)
49 {
50  int tId = threadIdx.x + blockIdx.x * blockDim.x;
51  if (tId >= size)
52  return;
53  id[tId] = data[tId].id;
54  count[tId] = (int) data[tId].aggregation;
55 }
56 
58  vector<Query>& queries, device_vector<int>& d_top_indexes,
59  device_vector<int>& d_top_count, int hash_table_size, int max_load,
60  int bitmap_bits)
61 {
62  int qmax = 0;
63 
64  for (unsigned int i = 0; i < queries.size(); ++i)
65  {
66  int count = queries[i].count_ranges();
67  if (count > qmax)
68  qmax = count;
69  }
70 
71  u64 start = getTime();
72 
73  knn(table, queries, d_top_indexes, d_top_count, hash_table_size, max_load,
74  bitmap_bits);
75 
76  u64 end = getTime();
77  double elapsed = getInterval(start, end);
78  Logger::log(Logger::VERBOSE, ">>>>>>> knn takes %fms <<<<<<", elapsed);
79 
80 }
81 
82 void genie::matching::knn_bijectMap_MT(vector<inv_table*>& table, vector<vector<Query> >& queries,
83  vector<device_vector<int> >& d_top_indexes, vector<device_vector<int> >& d_top_count,
84  vector<int>& hash_table_size, vector<int>& max_load, int bitmap_bits)
85 {
86  vector<int> qmaxs(table.size(), 0);
87 
88  auto it1 = qmaxs.begin();
89  auto it2 = queries.begin();
90  for (; it1 != qmaxs.end(); ++it1, ++it2)
91  for (auto it3 = it2->begin(); it3 != it2->end(); ++it3)
92  {
93  int count = it3->count_ranges();
94  if (count > *it1)
95  *it1 = count;
96  }
97 
98  u64 start = getTime();
99  knn_MT(table, queries, d_top_indexes, d_top_count, hash_table_size, max_load, bitmap_bits);
100  u64 end = getTime();
101 
102  double elapsed = getInterval(start, end);
103  Logger::log(Logger::VERBOSE, ">>>>>>> knn takes %fms <<<<<<", elapsed);
104 }
105 
106 void genie::matching::knn(genie::table::inv_table& table, vector<Query>& queries,
107  device_vector<int>& d_top_indexes, device_vector<int>& d_top_count,
108  int hash_table_size, int max_load, int bitmap_bits)
109 {
110  Logger::log(Logger::DEBUG, "Parameters: %d,%d", hash_table_size, bitmap_bits);
111 
112  device_vector<data_t> d_data;
113  device_vector<u32> d_bitmap;
114 
115  device_vector<u32> d_num_of_items_in_hashtable(queries.size());
116 
117  device_vector<u32> d_threshold, d_passCount;
118 
119  Logger::log(Logger::DEBUG, "[knn] max_load is %d.", max_load);
120 
121  if(queries.empty()){
122  throw genie::exception::cpu_runtime_error("Queries not loaded!");
123  }
124 
125  u64 startMatch = getTime();
126 
127  #ifdef GENIE_COMPR
128  genie::table::inv_compr_table *comprTable = dynamic_cast<inv_compr_table*>(&table);
129  if (comprTable){
131  if (!matchFn)
132  {
133  Logger::log(Logger::ALERT, "No matching function for %s compression!",
135  throw genie::exception::cpu_runtime_error("No compression matching function avaiable for required compression!");
136  }
137 
138  matchFn(
139  *comprTable, queries, d_data, d_bitmap,
140  hash_table_size, bitmap_bits, d_num_of_items_in_hashtable, d_threshold, d_passCount);
141 
142  } else
143  #endif
144  {
145  match(table, queries, d_data, d_bitmap,
146  hash_table_size, max_load, bitmap_bits, d_num_of_items_in_hashtable, d_threshold, d_passCount);
147  }
148 
149  u64 endMatch = getTime();
150  Logger::log(Logger::VERBOSE,
151  ">>>>> match() takes %f ms <<<<<",
152  getInterval(startMatch, endMatch));
153 
154  Logger::log(Logger::INFO, "Start topk....");
155  u64 start = getTime();
156 
157  thrust::device_vector<data_t> d_topk;
158  genie::matching::heap_count_topk(d_data, d_topk, d_threshold, d_passCount,
159  queries[0].topk(),queries.size());
160 
161  u64 end = getTime();
162  Logger::log(Logger::INFO, "Topk Finished!");
163  Logger::log(Logger::VERBOSE, ">>>>> main topk takes %fms <<<<<",
164  getInterval(start, end));
165  start = getTime();
166 
167 
168  d_top_count.resize(d_topk.size());
169  d_top_indexes.resize(d_topk.size());
171  d_top_indexes.size() / GPUGenie_knn_THREADS_PER_BLOCK + 1,
173  thrust::raw_pointer_cast(d_topk.data()),
174  thrust::raw_pointer_cast(d_top_indexes.data()),
175  thrust::raw_pointer_cast(d_top_count.data()), d_top_indexes.size());
176 
177 
178  end = getTime();
179  Logger::log(Logger::INFO, "Finish topk search!");
180  Logger::log(Logger::VERBOSE,
181  ">>>>> extract index and copy selected topk results takes %fms <<<<<",
182  getInterval(start, end));
183 }
184 
185 void
186 genie::matching::knn_MT(vector<inv_table*>& table, vector<vector<Query> >& queries,
187  vector<device_vector<int> >& d_top_indexes, vector<device_vector<int> >& d_top_count,
188  vector<int>& hash_table_size, vector<int>& max_load, int bitmap_bits)
189 {
190  /* pre-process */
191  vector<device_vector<data_t> > d_data(table.size());
192  vector<device_vector<u32> > d_bitmap(table.size());
193  vector<device_vector<u32> > d_num_of_items_in_hashtable(table.size());
194  vector<device_vector<u32> > d_threshold(table.size());
195  vector<device_vector<u32> > d_passCount(table.size());
196  vector<device_vector<data_t> > d_topk(table.size());
197  for (size_t i = 0; i < table.size(); ++i)
198  {
199  d_num_of_items_in_hashtable.at(i).resize(queries.at(i).size());
200  Logger::log(Logger::DEBUG, "[knn] max_load is %d.", max_load.at(i));
201  //if (queries.at(i).empty())
202  // clog << "No query on table " << i << "/" << table.size() << endl;
203  }
204 
205  /* run batched match kernels */
206  u64 startMatch = getTime();
207  size_t query_bytesize, gpu_free_mem, gpu_total_mem;
208  size_t start = 0, finish = 0;
209  size_t tolerance = 50 * 1024 * 1024; // make sure GPU always has at least 50 MB left
210  cudaCheckErrors(cudaMemGetInfo(&gpu_free_mem, &gpu_total_mem));
211  while (true)
212  {
213  /* if not all tables are already processed */
214  if (table.size() != start)
215  {
216  /* if not all remaining tables are scheduled for processing */
217  if (table.size() != finish)
218  {
219  // TODO: accurately estimate GPU memory size
220  query_bytesize = queries.at(finish).size() * hash_table_size.at(finish) * sizeof(data_t) + // d_data
221  queries.at(finish).size() * sizeof(u32) + // d_noiih
222  queries.at(finish).size() * sizeof(u32) + // d_threshold
223  queries.at(finish).size() * table.at(finish)->m_size() + // d_passCount
224  queries.at(finish).size() * sizeof(u32) + // d_topk in match
225  queries.at(finish).size() * table.at(finish)->m_size() * sizeof(Query::dim) + // d_dims
226  queries.at(finish).size() * table.at(finish)->i_size(); // d_bitmap
227  if (!queries.at(finish).empty())
228  query_bytesize += queries.at(finish).size() * queries.at(finish).at(0).topk() * sizeof(data_t); // d_topk
229  /* if mem has space */
230  if (gpu_free_mem > query_bytesize + tolerance)
231  {
232  gpu_free_mem -= query_bytesize;
233  ++finish;
234  continue;
235  }
236  /* cannot fit a single table */
237  else if (start == finish)
238  throw genie::exception::gpu_bad_alloc("MEMORY NOT ENOUGH");
239  }
240  /* match and extract top k for a batch */
241  match_MT(table, queries, d_data, d_bitmap, hash_table_size, max_load,
242  bitmap_bits, d_num_of_items_in_hashtable, d_threshold, d_passCount, start, finish);
243  for (size_t i = start; i < finish; ++i)
244  {
245  if (queries.at(i).empty())
246  continue;
247  heap_count_topk(d_data.at(i), d_topk.at(i), d_threshold.at(i), d_passCount.at(i),
248  queries.at(i).at(0).topk(), queries.at(i).size());
249 
250  d_top_count.at(i).resize(d_topk.at(i).size());
251  d_top_indexes.at(i).resize(d_topk.at(i).size());
253  d_top_indexes.at(i).size() / GPUGenie_knn_THREADS_PER_BLOCK + 1,
255  thrust::raw_pointer_cast(d_topk.at(i).data()),
256  thrust::raw_pointer_cast(d_top_indexes.at(i).data()),
257  thrust::raw_pointer_cast(d_top_count.at(i).data()), d_top_indexes.at(i).size());
258 
259  d_data.at(i).clear();
260  d_data.at(i).shrink_to_fit();
261  d_bitmap.at(i).clear();
262  d_bitmap.at(i).shrink_to_fit();
263  d_topk.at(i).clear();
264  d_topk.at(i).shrink_to_fit();
265  d_num_of_items_in_hashtable.at(i).clear();
266  d_num_of_items_in_hashtable.at(i).shrink_to_fit();
267  d_threshold.at(i).clear();
268  d_threshold.at(i).shrink_to_fit();
269  d_passCount.at(i).clear();
270  d_passCount.at(i).shrink_to_fit();
271  }
272  /* update mem info after memory free and continue with next batch */
273  cudaCheckErrors(cudaMemGetInfo(&gpu_free_mem, &gpu_total_mem));
274  start = finish;
275  }
276  else
277  break;
278  }
279  u64 endMatch = getTime();
280  Logger::log(Logger::VERBOSE,
281  ">>>>> match() takes %f ms <<<<<",
282  getInterval(startMatch, endMatch));
283 }
static MatchIntegratedFunPtr getMatchingFunPtr(COMPRESSION_TYPE type)
bool GPUGENIE_ERROR
Definition: knn.cu:28
unsigned long long GPUGENIE_TIME
Definition: knn.cu:29
unsigned long long getTime()
Get system time.
Definition: Timing.cc:22
The declaration for class inv_table.
Definition: inv_table.h:41
void(* MatchIntegratedFunPtr)(genie::table::inv_compr_table &, std::vector< genie::query::Query > &, thrust::device_vector< genie::matching::data_t > &, thrust::device_vector< u32 > &, int, int, thrust::device_vector< u32 > &, thrust::device_vector< u32 > &, thrust::device_vector< u32 > &)
void match_MT(vector< inv_table *> &table, vector< vector< Query > > &queries, vector< device_vector< data_t > > &d_data, vector< device_vector< u32 > > &d_bitmap, vector< int > &hash_table_size, vector< int > &max_load, int bitmap_bits, vector< device_vector< u32 > > &d_noiih, vector< device_vector< u32 > > &d_threshold, vector< device_vector< u32 > > &d_passCount, size_t start, size_t finish)
Definition: match.cu:502
genie::compression::COMPRESSION_TYPE getCompression() const
__global__ void extract_index_and_count(data_t *data, int *id, int *count, int size)
Definition: knn.cu:48
void knn_bijectMap(genie::table::inv_table &table, std::vector< genie::query::Query > &queries, thrust::device_vector< int > &d_top_indexes, thrust::device_vector< int > &d_top_count, int hash_table_size, int max_load, int bitmap_bits)
Collection of knn functions.
This file includes interfaces of original GENIE match functions.
void knn_bijectMap_MT(std::vector< genie::table::inv_table *> &table, std::vector< std::vector< genie::query::Query > > &queries, std::vector< thrust::device_vector< int > > &d_top_indexes, std::vector< thrust::device_vector< int > > &d_top_count, std::vector< int > &hash_table_size, std::vector< int > &max_load, int bitmap_bits)
void heap_count_topk(thrust::device_vector< genie::matching::data_t > &d_data, thrust::device_vector< genie::matching::data_t > &d_topk, thrust::device_vector< u32 > &d_threshold, thrust::device_vector< u32 > &d_passCount, int topk, int num_of_queries)
Record run-time information.
uint32_t u32
Definition: match_common.h:18
void knn_MT(std::vector< genie::table::inv_table *> &table, std::vector< std::vector< genie::query::Query > > &queries, std::vector< thrust::device_vector< int > > &d_top_indexes, std::vector< thrust::device_vector< int > > &d_top_count, std::vector< int > &hash_table_size, std::vector< int > &max_load, int bitmap_bits)
unsigned long long u64
A type definition for a 64-bit unsigned integer.
Definition: match_common.h:19
This file implements the function for topk selection in the final hashtable.
The second-step struct for processing queries.
Definition: query.h:59
double getInterval(unsigned long long start, unsigned long long stop)
Calculate time interval from start to end.
Definition: Timing.cc:36
Functions about getting system time.
void knn(genie::table::inv_table &table, std::vector< genie::query::Query > &queries, thrust::device_vector< int > &d_top_indexes, thrust::device_vector< int > &d_top_count, int hash_table_size, int max_load, int bitmap_bits)
#define GPUGenie_knn_THREADS_PER_BLOCK
Definition: knn.cu:32
Declaration of inv_list class.
void match(inv_table &table, vector< Query > &queries, device_vector< data_t > &d_data, device_vector< u32 > &d_bitmap, int hash_table_size, int max_load, int bitmap_bits, device_vector< u32 > &d_noiih, device_vector< u32 > &d_threshold, device_vector< u32 > &d_passCount)
Definition: match.cu:224
static std::string getCompressionName(COMPRESSION_TYPE type)
#define cudaCheckErrors(err)
The wrapper function to validate CUDA calls.
Definition: cuda_macros.h:23