GENIE
FileReader.cc
Go to the documentation of this file.
1 
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <fstream>
8 #include <vector>
9 #include <string>
10 #include <cstring>
11 #include <stdexcept>
12 #include <map>
13 #include <iostream>
14 
15 #include <genie/utility/Logger.h>
16 #include <genie/query/query.h>
17 #include <genie/table/inv_table.h>
18 
19 #include "FileReader.h"
20 
21 using namespace std;
22 using namespace genie::query;
23 using namespace genie::table;
24 
25 
26 vector<string> split(string& str, const char* c)
27 {
28  char *cstr, *p;
29  vector<string> res;
30  cstr = new char[str.size() + 1];
31  strcpy(cstr, str.c_str());
32  p = strtok(cstr, c);
33  while (p != NULL)
34  {
35  res.push_back(p);
36  p = strtok(NULL, c);
37  }
38  delete[] cstr;
39  return res;
40 }
41 
42 string eraseSpace(string origin)
43 {
44  int start = 0;
45  while (origin[start] == ' ')
46  start++;
47  int end = origin.length() - 1;
48  while (origin[end] == ' ')
49  end--;
50  return origin.substr(start, end - start + 1);
51 }
52 
53 
54 void genie::utility::read_file(vector<vector<int> >& dest, const char* fname, int num)
55 {
56  string line;
57  ifstream ifile(fname);
58 
59  dest.clear();
60 
61  if (ifile.is_open())
62  {
63  int count = 0;
64  while (getline(ifile, line) && (count < num || num < 0))
65  {
66  vector<int> row;
67  vector<string> nstring = split(line, ", ");
68  unsigned int i;
69  for (i = 0; i < nstring.size(); ++i)
70  {
71  int int_value = atoi(eraseSpace(nstring[i]).c_str());
72  row.push_back(int_value);
73  }
74  dest.push_back(row);
75  count++;
76  }
77  Logger::log(Logger::INFO, "Finish reading file!");
78  Logger::log(Logger::DEBUG, "%d rows are read into memory!",
79  dest.size());
80  }
81 
82  ifile.close();
83 }
84 
85 //Read new format query data
86 //Sample data format
87 //qid dim value selectivity weight
88 // 0 0 15 0.04 1
89 // 0 1 6 0.04 1
90 // ....
91 void genie::utility::read_query(std::vector<genie::utility::attr_t>& data, const char* file_name,
92  int num)
93 {
94 
95  string line;
96  ifstream ifile(file_name);
97 
98  data.clear();
99  int count = num;
100  int total = 0;
101  attr_t attr;
102  if (ifile.is_open())
103  {
104 
105  while (getline(ifile, line) && count != 0)
106  {
107 
108  vector<string> nstring = split(line, ", ");
109 
110  if (nstring.size() == GPUGENIE_QUERY_NUM_OF_FIELDS)
111  {
112  count--;
113  total++;
114  attr.qid = atoi(nstring[GPUGENIE_QUERY_QID_INDEX].c_str());
115  attr.dim = atoi(nstring[GPUGENIE_QUERY_DIM_INDEX].c_str());
116  attr.value = atoi(nstring[GPUGENIE_QUERY_VALUE_INDEX].c_str());
117  attr.sel = atof(
118  nstring[GPUGENIE_QUERY_SELECTIVITY_INDEX].c_str());
119  attr.weight = atof(
120  nstring[GPUGENIE_QUERY_WEIGHT_INDEX].c_str());
121  data.push_back(attr);
122  }
123  }
124  }
125 
126  ifile.close();
127 
128  Logger::log(Logger::INFO, "Finish reading query data!");
129  Logger::log(Logger::DEBUG, "%d attributes are loaded.", total);
130 }
131 
132 //Read old format query data: same format as data files
133 void genie::utility::read_query(genie::table::inv_table& table, const char* fname,
134  vector<genie::query::Query>& queries, int num_of_queries, int num_of_query_dims,
135  int radius, int topk, float selectivity)
136 {
137 
138  string line;
139  ifstream ifile(fname);
140 
141  queries.clear();
142  queries.reserve(num_of_queries);
143 
144  if (ifile.is_open())
145  {
146  int j = 0;
147  while (getline(ifile, line) && j < num_of_queries)
148  {
149 
150  vector<string> nstring = split(line, ", ");
151  unsigned int i;
152  Query q(table, j);
153  for (i = 0; i < nstring.size() && i < (unsigned int) num_of_query_dims; ++i)
154  {
155  string myString = eraseSpace(nstring[i]);
156  int value = atoi(myString.c_str());
157 
158  q.attr(j, value - radius < 0 ? 0 : value - radius,
159  value + radius, 1, i);
160  }
161  q.topk(topk);
162  if (selectivity > 0.0f)
163  {
164  q.selectivity(selectivity);
166  }
167  queries.push_back(q);
168  ++j;
169  }
170  }
171 
172  ifile.close();
173 
174  Logger::log(Logger::INFO, "Finish reading queries!");
175  Logger::log(Logger::DEBUG, "%d queries are loaded.", num_of_queries);
176 }
177 
void topk(int k)
Set top k matches.
Definition: query.cc:263
const unsigned int GPUGENIE_QUERY_WEIGHT_INDEX
Definition: FileReader.h:56
vector< string > split(string &str, const char *c)
Definition: FileReader.cc:26
The declaration for class inv_table.
Definition: inv_table.h:41
string eraseSpace(string origin)
Definition: FileReader.cc:42
Declaration of query class.
const unsigned int GPUGENIE_QUERY_NUM_OF_FIELDS
Definition: FileReader.h:60
const unsigned int GPUGENIE_QUERY_DIM_INDEX
Definition: FileReader.h:44
void apply_adaptive_query_range()
Construct query in adaptice range mode.
Definition: query.cc:196
const unsigned int GPUGENIE_QUERY_SELECTIVITY_INDEX
Definition: FileReader.h:52
void read_query(genie::table::inv_table &table, const char *fname, std::vector< genie::query::Query > &queries, int num_of_queries, int num_of_query_dims, int radius, int topk, float selectivity)
struct genie::utility::_GPUGenie_Query_Data attr_t
define class inv_table
This file declares functions about file operations.
Record run-time information.
void read_file(std::vector< std::vector< int > > &dest, const char *fname, int num)
const unsigned int GPUGENIE_QUERY_QID_INDEX
Definition: FileReader.h:40
void attr(int index, int low, int up, float weight, int order)
Modify the matching range and weight of an attribute.
Definition: query.cc:76
const unsigned int GPUGENIE_QUERY_VALUE_INDEX
Definition: FileReader.h:48
void selectivity(float s)
Set the selectivity.
Definition: query.cc:131