FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
matrix_config.h
1 #ifndef __MATRIX_CONFIG_H__
2 #define __MATRIX_CONFIG_H__
3 
23 #include "common.h"
24 #include "log.h"
25 #include "config_map.h"
26 
27 #include "graph_exception.h"
28 
29 namespace fm
30 {
31 
36 {
37  // The number of threads for sparse matrix.
38  int num_SpM_threads;
39  // The number of threads for dense matrix.
40  int num_DM_threads;
41  std::string prof_file;
42  bool _in_mem_matrix;
43  // With 1D partition, a matrix is partitioned into row blocks.
44  int row_block_size;
45  // For 1D partition, each matrix I/O contains multiple row blocks.
46  // The matrix I/O size in row blocks.
47  int rb_io_size;
48  // For 1D partition, the size of a matrix I/O stolen from another thread.
49  int rb_steal_io_size;
50  // The size of CPU cache that can be used by a thread. It affects
51  // the super block size.
52  int cpu_cache_size;
53  // Indicate whether the hilbert order is enabled.
54  bool hilbert_order;
55  // The number of NUMA nodes.
56  int num_nodes;
57  // The buffer size used for external-memory sorting.
58  // The number of bytes.
59  size_t sort_buf_size;
60  // The buffer size used for external-memory groupby on vectors.
61  // The number of bytes.
62  size_t groupby_buf_size;
63  // The buffer size used for EM groupby on vector vectors.
64  // The number of vectors.
65  size_t vv_groupby_buf_size;
66  // The I/O buffer size for writing merge results in sorting a vector.
67  // The number of bytes.
68  size_t write_io_buf_size;
69  // The I/O size used for streaming.
70  size_t stream_io_size;
71  // Indicate whether we keep the memory buffer for I/O in dense matrix
72  // operations. Allocating the memory buffer for every dense matrix operation
73  // is expensive.
74  bool keep_mem_buf;
75 public:
81  num_SpM_threads = 4;
82  num_DM_threads = 4;
83  _in_mem_matrix = false;
84  row_block_size = 1024;
85  rb_io_size = 1024;
86  rb_steal_io_size = 1;
87  cpu_cache_size = 1024 * 1024;
88  hilbert_order = false;
89  num_nodes = 1;
90  sort_buf_size = 128 * 1024 * 1024;
91  groupby_buf_size = 128 * 1024 * 1024;
92  vv_groupby_buf_size = 1024 * 1024;
93  write_io_buf_size = 128 * 1024 * 1024;
94  stream_io_size = 128 * 1024 * 1024;
95  keep_mem_buf = false;
96  }
97 
101  void print_help();
105  void print();
106 
110  void init(config_map::ptr map);
111 
116  const std::string &get_prof_file() const {
117  return prof_file;
118  }
119 
124  int get_num_SpM_threads() const {
125  return num_SpM_threads;
126  }
127 
132  int get_num_DM_threads() const {
133  return num_DM_threads;
134  }
135 
141  bool use_in_mem_matrix() const {
142  return _in_mem_matrix;
143  }
144 
148  int get_row_block_size() const {
149  return row_block_size;
150  }
151 
156  int get_rb_io_size() const {
157  return rb_io_size;
158  }
159 
164  int get_rb_steal_io_size() const {
165  return rb_steal_io_size;
166  }
167 
168  size_t get_cpu_cache_size() const {
169  return cpu_cache_size;
170  }
171 
172  bool use_hilbert_order() const {
173  return hilbert_order;
174  }
175 
176  void set_cpu_cache_size(size_t size) {
177  cpu_cache_size = size;
178  }
179 
180  void set_hilbert_order(bool hilbert) {
181  hilbert_order = hilbert;
182  }
183 
184  void set_num_SpM_threads(int nthreads) {
185  this->num_SpM_threads = nthreads;
186  }
187 
188  void set_num_DM_threads(int nthreads) {
189  this->num_DM_threads = nthreads;
190  }
191 
192  void set_num_nodes(int num_nodes) {
193  this->num_nodes = num_nodes;
194  }
195 
196  int get_num_nodes() const {
197  return num_nodes;
198  }
199 
200  void set_sort_buf_size(size_t sort_buf_size) {
201  this->sort_buf_size = sort_buf_size;
202  }
203 
204  void set_groupby_buf_size(size_t groupby_buf_size) {
205  this->groupby_buf_size = groupby_buf_size;
206  }
207 
208  void set_vv_groupby_buf_size(size_t vv_groupby_buf_size) {
209  this->vv_groupby_buf_size = vv_groupby_buf_size;
210  }
211 
212  void set_write_io_buf_size(size_t write_io_buf_size) {
213  this->write_io_buf_size = write_io_buf_size;
214  }
215 
216  size_t get_sort_buf_size() const {
217  return sort_buf_size;
218  }
219 
220  size_t get_groupby_buf_size() const {
221  return groupby_buf_size;
222  }
223 
224  size_t get_vv_groupby_buf_size() const {
225  return vv_groupby_buf_size;
226  }
227 
228  size_t get_write_io_buf_size() const {
229  return write_io_buf_size;
230  }
231 
232  size_t get_stream_io_size() const {
233  return stream_io_size;
234  }
235 
236  bool is_keep_mem_buf() const {
237  return keep_mem_buf;
238  }
239 };
240 
242 {
243  printf("Configuration parameters in matrix operations.\n");
244  printf("\tthreads: the number of threads processing the matrix\n");
245  printf("\tFM_prof_file: the output file containing CPU profiling\n");
246  printf("\tin_mem_matrix: indicate whether to load the entire matrix to memory in advance\n");
247  printf("\trow_block_size: the size of a row block (the number of rows)\n");
248  printf("\trb_io_size: the size of a matrix I/O in 1D (the number of row blocks)\n");
249  printf("\trb_steal_io_size: the size of a stolen matrix I/O(the number of row blocks)\n");
250  printf("\tcpu_cache_size: the cpu cache size that can be used by a thread\n");
251  printf("\thilbert_order: use the hilbert order\n");
252  printf("\tnum_nodes: The number of NUMA nodes\n");
253  printf("\tsort_buf_size: the buffer size for EM sorting\n");
254  printf("\tgroupby_buf_size: the buffer size for EM groupby on vectors\n");
255  printf("\tvv_groupby_buf_size: the buffer size for EM groupby on vector vectors\n");
256  printf("\twrite_io_buf_size: the I/O buffer size for writing merge results\n");
257  printf("\tstream_io_size: the I/O size used for streaming\n");
258  printf("\tkeep_mem_buf: indicate whether to keep memory buffer for I/O in dense matrix operation\n");
259 }
260 
261 inline void matrix_config::print()
262 {
263  BOOST_LOG_TRIVIAL(info) << "Configuration parameters in matrix operations.";
264  BOOST_LOG_TRIVIAL(info) << "\tSpM threads: " << num_SpM_threads;
265  BOOST_LOG_TRIVIAL(info) << "\tDM threads: " << num_DM_threads;
266  BOOST_LOG_TRIVIAL(info) << "\tFM_prof_file: " << prof_file;
267  BOOST_LOG_TRIVIAL(info) << "\tin_mem_matrix: " << _in_mem_matrix;
268  BOOST_LOG_TRIVIAL(info) << "\trow_block_size: " << row_block_size;
269  BOOST_LOG_TRIVIAL(info) << "\trb_io_size" << rb_io_size;
270  BOOST_LOG_TRIVIAL(info) << "\trb_steal_io_size" << rb_steal_io_size;
271  BOOST_LOG_TRIVIAL(info) << "\tcpu_cache_size" << cpu_cache_size;
272  BOOST_LOG_TRIVIAL(info) << "\thilbert_order" << hilbert_order;
273  BOOST_LOG_TRIVIAL(info) << "\tnum_nodes" << num_nodes;
274  BOOST_LOG_TRIVIAL(info) << "\tsort_buf_size" << sort_buf_size;
275  BOOST_LOG_TRIVIAL(info) << "\tgroupby_buf_size" << groupby_buf_size;
276  BOOST_LOG_TRIVIAL(info) << "\tvv_groupby_buf_size" << vv_groupby_buf_size;
277  BOOST_LOG_TRIVIAL(info) << "\twrite_io_buf_size" << write_io_buf_size;
278  BOOST_LOG_TRIVIAL(info) << "\tstream_io_size" << stream_io_size;
279  BOOST_LOG_TRIVIAL(info) << "\tkeep_mem_buf" << keep_mem_buf;
280 }
281 
282 inline void matrix_config::init(config_map::ptr map)
283 {
284  if (map->has_option("SpM_threads"))
285  map->read_option_int("SpM_threads", num_SpM_threads);
286  if (map->has_option("DM_threads"))
287  map->read_option_int("DM_threads", num_DM_threads);
288  if (map->has_option("FM_prof_file"))
289  map->read_option("FM_prof_file", prof_file);
290  if (map->has_option("in_mem_matrix"))
291  map->read_option_bool("in_mem_matrix", _in_mem_matrix);
292  if (map->has_option("row_block_size"))
293  map->read_option_int("row_block_size", row_block_size);
294  if (map->has_option("rb_io_size"))
295  map->read_option_int("rb_io_size", rb_io_size);
296  if (map->has_option("rb_steal_io_size"))
297  map->read_option_int("rb_steal_io_size", rb_steal_io_size);
298  if (map->has_option("cpu_cache_size"))
299  map->read_option_int("cpu_cache_size", cpu_cache_size);
300  if (map->has_option("hilbert_order"))
301  map->read_option_bool("hilbert_order", hilbert_order);
302  if (map->has_option("num_nodes"))
303  map->read_option_int("num_nodes", num_nodes);
304  if (map->has_option("sort_buf_size")) {
305  long tmp = 0;
306  map->read_option_long("sort_buf_size", tmp);
307  sort_buf_size = tmp;
308  }
309  if (map->has_option("groupby_buf_size")) {
310  long tmp = 0;
311  map->read_option_long("groupby_buf_size", tmp);
312  groupby_buf_size = tmp;
313  }
314  if (map->has_option("vv_groupby_buf_size")) {
315  long tmp = 0;
316  map->read_option_long("vv_groupby_buf_size", tmp);
317  vv_groupby_buf_size = tmp;
318  }
319  if (map->has_option("write_io_buf_size")) {
320  long tmp = 0;
321  map->read_option_long("write_io_buf_size", tmp);
322  write_io_buf_size = tmp;
323  }
324  if (map->has_option("stream_io_size")) {
325  long tmp = 0;
326  map->read_option_long("stream_io_size", tmp);
327  stream_io_size = tmp;
328  }
329  if (map->has_option("keep_mem_buf"))
330  map->read_option_bool("keep_mem_buf", keep_mem_buf);
331 }
332 
333 extern matrix_config matrix_conf;
334 
335 static const int PAGE_SIZE = 4096;
336 
337 // TODO We need to try different range size to get better performance.
338 static const size_t NUMA_range_size_log = 20;
339 
340 }
341 
342 #endif
matrix_config()
The default constructor that set all configurations to their default values.
Definition: matrix_config.h:80
Definition: matrix_config.h:35
const std::string & get_prof_file() const
Get the output file containing CPU profiling.
Definition: matrix_config.h:116
void print()
Print the current values of all configurations.
Definition: matrix_config.h:261
int get_num_DM_threads() const
Get the number of worker threads for dense matrix.
Definition: matrix_config.h:132
int get_num_SpM_threads() const
Get the number of worker threads for sparse matrix.
Definition: matrix_config.h:124
void init(config_map::ptr map)
Set the configurations to the user-defined values.
Definition: matrix_config.h:282
int get_rb_steal_io_size() const
The size of a matrix I/O stolen from another thread (the number of row blocks).
Definition: matrix_config.h:164
int get_rb_io_size() const
The size of a matrix I/O in 1D partitioning (the number of row blocks).
Definition: matrix_config.h:156
bool use_in_mem_matrix() const
Determine whether to use in-mem matrix data.
Definition: matrix_config.h:141
void print_help()
Print the explanations of all configurations.
Definition: matrix_config.h:241
int get_row_block_size() const
The size of a row block (the number of rows).
Definition: matrix_config.h:148