31 #ifndef GPUGenie_knn_THREADS_PER_BLOCK 32 #define GPUGenie_knn_THREADS_PER_BLOCK 1024 35 #ifndef GPUGenie_knn_DEFAULT_HASH_TABLE_SIZE 36 #define GPUGenie_knn_DEFAULT_HASH_TABLE_SIZE 1 39 #ifndef GPUGenie_knn_DEFAULT_BITMAP_BITS 40 #define GPUGenie_knn_DEFAULT_BITMAP_BITS 2 43 #ifndef GPUGenie_knn_DEFAULT_DATA_PER_THREAD 44 #define GPUGenie_knn_DEFAULT_DATA_PER_THREAD 256 50 int tId = threadIdx.x + blockIdx.x * blockDim.x;
53 id[tId] = data[tId].
id;
54 count[tId] = (int) data[tId].aggregation;
58 vector<Query>& queries, device_vector<int>& d_top_indexes,
59 device_vector<int>& d_top_count,
int hash_table_size,
int max_load,
64 for (
unsigned int i = 0; i < queries.size(); ++i)
66 int count = queries[i].count_ranges();
73 knn(table, queries, d_top_indexes, d_top_count, hash_table_size, max_load,
78 Logger::log(Logger::VERBOSE,
">>>>>>> knn takes %fms <<<<<<", elapsed);
83 vector<device_vector<int> >& d_top_indexes, vector<device_vector<int> >& d_top_count,
84 vector<int>& hash_table_size, vector<int>& max_load,
int bitmap_bits)
86 vector<int> qmaxs(table.size(), 0);
88 auto it1 = qmaxs.begin();
89 auto it2 = queries.begin();
90 for (; it1 != qmaxs.end(); ++it1, ++it2)
91 for (
auto it3 = it2->begin(); it3 != it2->end(); ++it3)
93 int count = it3->count_ranges();
99 knn_MT(table, queries, d_top_indexes, d_top_count, hash_table_size, max_load, bitmap_bits);
103 Logger::log(Logger::VERBOSE,
">>>>>>> knn takes %fms <<<<<<", elapsed);
107 device_vector<int>& d_top_indexes, device_vector<int>& d_top_count,
108 int hash_table_size,
int max_load,
int bitmap_bits)
110 Logger::log(Logger::DEBUG,
"Parameters: %d,%d", hash_table_size, bitmap_bits);
112 device_vector<data_t> d_data;
113 device_vector<u32> d_bitmap;
115 device_vector<u32> d_num_of_items_in_hashtable(queries.size());
117 device_vector<u32> d_threshold, d_passCount;
119 Logger::log(Logger::DEBUG,
"[knn] max_load is %d.", max_load);
133 Logger::log(Logger::ALERT,
"No matching function for %s compression!",
139 *comprTable, queries, d_data, d_bitmap,
140 hash_table_size, bitmap_bits, d_num_of_items_in_hashtable, d_threshold, d_passCount);
145 match(table, queries, d_data, d_bitmap,
146 hash_table_size, max_load, bitmap_bits, d_num_of_items_in_hashtable, d_threshold, d_passCount);
150 Logger::log(Logger::VERBOSE,
151 ">>>>> match() takes %f ms <<<<<",
154 Logger::log(Logger::INFO,
"Start topk....");
157 thrust::device_vector<data_t> d_topk;
159 queries[0].topk(),queries.size());
162 Logger::log(Logger::INFO,
"Topk Finished!");
163 Logger::log(Logger::VERBOSE,
">>>>> main topk takes %fms <<<<<",
168 d_top_count.resize(d_topk.size());
169 d_top_indexes.resize(d_topk.size());
173 thrust::raw_pointer_cast(d_topk.data()),
174 thrust::raw_pointer_cast(d_top_indexes.data()),
175 thrust::raw_pointer_cast(d_top_count.data()), d_top_indexes.size());
179 Logger::log(Logger::INFO,
"Finish topk search!");
180 Logger::log(Logger::VERBOSE,
181 ">>>>> extract index and copy selected topk results takes %fms <<<<<",
187 vector<device_vector<int> >& d_top_indexes, vector<device_vector<int> >& d_top_count,
188 vector<int>& hash_table_size, vector<int>& max_load,
int bitmap_bits)
191 vector<device_vector<data_t> > d_data(table.size());
192 vector<device_vector<u32> > d_bitmap(table.size());
193 vector<device_vector<u32> > d_num_of_items_in_hashtable(table.size());
194 vector<device_vector<u32> > d_threshold(table.size());
195 vector<device_vector<u32> > d_passCount(table.size());
196 vector<device_vector<data_t> > d_topk(table.size());
197 for (
size_t i = 0; i < table.size(); ++i)
199 d_num_of_items_in_hashtable.at(i).resize(queries.at(i).size());
200 Logger::log(Logger::DEBUG,
"[knn] max_load is %d.", max_load.at(i));
207 size_t query_bytesize, gpu_free_mem, gpu_total_mem;
208 size_t start = 0, finish = 0;
209 size_t tolerance = 50 * 1024 * 1024;
214 if (table.size() != start)
217 if (table.size() != finish)
220 query_bytesize = queries.at(finish).size() * hash_table_size.at(finish) *
sizeof(
data_t) +
221 queries.at(finish).size() *
sizeof(
u32) +
222 queries.at(finish).size() *
sizeof(
u32) +
223 queries.at(finish).size() * table.at(finish)->
m_size() +
224 queries.at(finish).size() *
sizeof(
u32) +
225 queries.at(finish).size() * table.at(finish)->
m_size() *
sizeof(
Query::dim) +
226 queries.at(finish).size() * table.at(finish)->
i_size();
227 if (!queries.at(finish).empty())
228 query_bytesize += queries.at(finish).size() * queries.at(finish).at(0).topk() *
sizeof(
data_t);
230 if (gpu_free_mem > query_bytesize + tolerance)
232 gpu_free_mem -= query_bytesize;
237 else if (start == finish)
241 match_MT(table, queries, d_data, d_bitmap, hash_table_size, max_load,
242 bitmap_bits, d_num_of_items_in_hashtable, d_threshold, d_passCount, start, finish);
243 for (
size_t i = start; i < finish; ++i)
245 if (queries.at(i).empty())
247 heap_count_topk(d_data.at(i), d_topk.at(i), d_threshold.at(i), d_passCount.at(i),
248 queries.at(i).at(0).topk(), queries.at(i).size());
250 d_top_count.at(i).resize(d_topk.at(i).size());
251 d_top_indexes.at(i).resize(d_topk.at(i).size());
255 thrust::raw_pointer_cast(d_topk.at(i).data()),
256 thrust::raw_pointer_cast(d_top_indexes.at(i).data()),
257 thrust::raw_pointer_cast(d_top_count.at(i).data()), d_top_indexes.at(i).size());
259 d_data.at(i).clear();
260 d_data.at(i).shrink_to_fit();
261 d_bitmap.at(i).clear();
262 d_bitmap.at(i).shrink_to_fit();
263 d_topk.at(i).clear();
264 d_topk.at(i).shrink_to_fit();
265 d_num_of_items_in_hashtable.at(i).clear();
266 d_num_of_items_in_hashtable.at(i).shrink_to_fit();
267 d_threshold.at(i).clear();
268 d_threshold.at(i).shrink_to_fit();
269 d_passCount.at(i).clear();
270 d_passCount.at(i).shrink_to_fit();
280 Logger::log(Logger::VERBOSE,
281 ">>>>> match() takes %f ms <<<<<",
static MatchIntegratedFunPtr getMatchingFunPtr(COMPRESSION_TYPE type)
unsigned long long GPUGENIE_TIME
unsigned long long getTime()
Get system time.
The declaration for class inv_table.
void(* MatchIntegratedFunPtr)(genie::table::inv_compr_table &, std::vector< genie::query::Query > &, thrust::device_vector< genie::matching::data_t > &, thrust::device_vector< u32 > &, int, int, thrust::device_vector< u32 > &, thrust::device_vector< u32 > &, thrust::device_vector< u32 > &)
void match_MT(vector< inv_table *> &table, vector< vector< Query > > &queries, vector< device_vector< data_t > > &d_data, vector< device_vector< u32 > > &d_bitmap, vector< int > &hash_table_size, vector< int > &max_load, int bitmap_bits, vector< device_vector< u32 > > &d_noiih, vector< device_vector< u32 > > &d_threshold, vector< device_vector< u32 > > &d_passCount, size_t start, size_t finish)
genie::compression::COMPRESSION_TYPE getCompression() const
__global__ void extract_index_and_count(data_t *data, int *id, int *count, int size)
void knn_bijectMap(genie::table::inv_table &table, std::vector< genie::query::Query > &queries, thrust::device_vector< int > &d_top_indexes, thrust::device_vector< int > &d_top_count, int hash_table_size, int max_load, int bitmap_bits)
Collection of knn functions.
This file includes interfaces of original GENIE match functions.
void knn_bijectMap_MT(std::vector< genie::table::inv_table *> &table, std::vector< std::vector< genie::query::Query > > &queries, std::vector< thrust::device_vector< int > > &d_top_indexes, std::vector< thrust::device_vector< int > > &d_top_count, std::vector< int > &hash_table_size, std::vector< int > &max_load, int bitmap_bits)
void heap_count_topk(thrust::device_vector< genie::matching::data_t > &d_data, thrust::device_vector< genie::matching::data_t > &d_topk, thrust::device_vector< u32 > &d_threshold, thrust::device_vector< u32 > &d_passCount, int topk, int num_of_queries)
Record run-time information.
void knn_MT(std::vector< genie::table::inv_table *> &table, std::vector< std::vector< genie::query::Query > > &queries, std::vector< thrust::device_vector< int > > &d_top_indexes, std::vector< thrust::device_vector< int > > &d_top_count, std::vector< int > &hash_table_size, std::vector< int > &max_load, int bitmap_bits)
unsigned long long u64
A type definition for a 64-bit unsigned integer.
This file implements the function for topk selection in the final hashtable.
The second-step struct for processing queries.
double getInterval(unsigned long long start, unsigned long long stop)
Calculate time interval from start to end.
Functions about getting system time.
void knn(genie::table::inv_table &table, std::vector< genie::query::Query > &queries, thrust::device_vector< int > &d_top_indexes, thrust::device_vector< int > &d_top_count, int hash_table_size, int max_load, int bitmap_bits)
#define GPUGenie_knn_THREADS_PER_BLOCK
Declaration of inv_list class.
void match(inv_table &table, vector< Query > &queries, device_vector< data_t > &d_data, device_vector< u32 > &d_bitmap, int hash_table_size, int max_load, int bitmap_bits, device_vector< u32 > &d_noiih, device_vector< u32 > &d_threshold, device_vector< u32 > &d_passCount)
static std::string getCompressionName(COMPRESSION_TYPE type)
#define cudaCheckErrors(err)
The wrapper function to validate CUDA calls.