FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
sparse_matrix.h
1 #ifndef __SPARSE_MATRIX_H__
2 #define __SPARSE_MATRIX_H__
3 
4 /*
5  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
6  * Written by Da Zheng (zhengda1936@gmail.com)
7  *
8  * This file is part of FlashMatrix.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include <memory>
24 
25 #include "FGlib.h"
26 #include "matrix_io.h"
27 #include "io_interface.h"
28 #include "mem_vec_store.h"
29 #include "NUMA_dense_matrix.h"
30 #include "dense_matrix.h"
31 #include "local_matrix_store.h"
32 #include "local_mem_buffer.h"
33 #include "EM_object.h"
34 
35 namespace fm
36 {
37 
38 class compute_task: public detail::portion_compute
39 {
40 public:
41  typedef std::shared_ptr<compute_task> ptr;
42 
43  virtual safs::io_request get_request() const = 0;
44 };
45 
46 class task_creator
47 {
48 public:
49  typedef std::shared_ptr<task_creator> ptr;
50 
51  virtual compute_task::ptr create(const matrix_io &) const = 0;
52  virtual bool set_data(detail::mem_matrix_store::const_ptr in,
53  detail::matrix_store::ptr out) = 0;
54  virtual std::vector<detail::EM_object *> get_EM_objs() = 0;
55  virtual bool is_complete() const = 0;
56 };
57 
58 /*
59  * This task performs computation on a sparse matrix in the FlashGraph format.
60  */
61 class fg_row_compute_task: public compute_task
62 {
63  matrix_io io;
64  off_t off;
65  char *buf;
66  size_t buf_size;
67 public:
68  fg_row_compute_task(const matrix_io &_io): io(_io) {
69  off_t orig_off = io.get_loc().get_offset();
70  off = ROUND_PAGE(orig_off);
71  buf_size = ROUNDUP_PAGE(orig_off - off + io.get_size());
72  buf = (char *) valloc(buf_size);
73  }
74 
75  ~fg_row_compute_task() {
76  free(buf);
77  }
78  virtual void run(char *buf, size_t size);
79  virtual void run_on_row(const fg::ext_mem_undirected_vertex &v) = 0;
80  virtual safs::io_request get_request() const {
81  return safs::io_request(buf, safs::data_loc_t(io.get_loc().get_file_id(),
82  off), buf_size, READ);
83  }
84 };
85 
86 /*
87  * This task performs sparse matrix dense matrix multiplication
88  * in the FlashGraph format.
89  * We implement this method for the sake of compatibility. It doesn't
90  * run very fast.
91  */
92 template<class DenseType, class SparseType, int ROW_WIDTH>
93 class fg_row_spmm_task: public fg_row_compute_task
94 {
95  const detail::mem_matrix_store &input;
96  detail::mem_matrix_store &output;
97 public:
98  fg_row_spmm_task(const detail::mem_matrix_store &_input,
99  detail::mem_matrix_store &_output,
100  const matrix_io &_io): fg_row_compute_task(_io),
101  input(_input), output(_output) {
102  assert(input.get_type() == get_scalar_type<DenseType>());
103  assert(output.get_type() == get_scalar_type<DenseType>());
104  assert(input.get_num_cols() == output.get_num_cols());
105  assert(input.get_num_cols() == (size_t) ROW_WIDTH);
106  }
107 
108  void run_on_row(const fg::ext_mem_undirected_vertex &v);
109 };
110 
111 template<class DenseType, class SparseType, int ROW_WIDTH>
112 void fg_row_spmm_task<DenseType, SparseType, ROW_WIDTH>::run_on_row(
113  const fg::ext_mem_undirected_vertex &v)
114 {
115  DenseType res[ROW_WIDTH];
116  for (size_t i = 0; i < (size_t) ROW_WIDTH; i++)
117  res[i] = 0;
118 
119  bool has_val = v.has_edge_data();
120  for (size_t i = 0; i < v.get_num_edges(); i++) {
121  fg::vertex_id_t id = v.get_neighbor(i);
122  SparseType data = 1;
123  if (has_val)
124  data = v.get_edge_data<SparseType>(i);
125  // TODO It's fairly expensive to get a row because it requires a function
126  // call on a virtual method.
127  const DenseType *row = (const DenseType *) input.get_row(id);
128  for (size_t j = 0; j < (size_t) ROW_WIDTH; j++)
129  res[j] += row[j] * data;
130  }
131  memcpy(output.get_row(v.get_id()), res, sizeof(DenseType) * ROW_WIDTH);
132 }
133 
134 template<class DenseType, class SparseType>
135 class fg_row_spmm_task<DenseType, SparseType, 0>: public fg_row_compute_task
136 {
137  const detail::mem_matrix_store &input;
138  detail::mem_matrix_store &output;
139 public:
140  fg_row_spmm_task(const detail::mem_matrix_store &_input,
141  detail::mem_matrix_store &_output,
142  const matrix_io &_io): fg_row_compute_task(_io),
143  input(_input), output(_output) {
144  assert(input.get_type() == get_scalar_type<DenseType>());
145  assert(output.get_type() == get_scalar_type<DenseType>());
146  assert(input.get_num_cols() == output.get_num_cols());
147  }
148 
149  void run_on_row(const fg::ext_mem_undirected_vertex &v) {
150  DenseType res[input.get_num_cols()];
151  for (size_t i = 0; i < input.get_num_cols(); i++)
152  res[i] = 0;
153 
154  bool has_val = v.has_edge_data();
155  for (size_t i = 0; i < v.get_num_edges(); i++) {
156  fg::vertex_id_t id = v.get_neighbor(i);
157  SparseType data = 1;
158  if (has_val)
159  data = v.get_edge_data<SparseType>(i);
160  // It's fairly expensive to get a row because it requires a function
161  // call on a virtual method.
162  const DenseType *row = (const DenseType *) input.get_row(id);
163  for (size_t j = 0; j < input.get_num_cols(); j++)
164  res[j] += row[j] * data;
165  }
166  memcpy(output.get_row(v.get_id()), res,
167  sizeof(DenseType) * input.get_num_cols());
168  }
169 };
170 
171 class block_compute_task;
172 
173 /*
174  * The interface for processing blocks.
175  */
176 class block_exec_order
177 {
178 public:
179  typedef std::shared_ptr<block_exec_order> ptr;
180 
181  virtual bool is_valid_size(size_t height, size_t width) const = 0;
182 
183  // The vector of blocks should be from multiple block rows.
184  virtual bool exec(block_compute_task &task,
185  std::vector<const sparse_block_2d *> &blocks) const = 0;
186 };
187 
188 /*
189  * A compute task runs on multiple 2D-partitioned block rows of a sparse matrix.
190  */
191 class block_compute_task: public compute_task
192 {
193  block_exec_order::ptr exec_order;
194  // The block rows in the buffer.
195  std::vector<char *> block_rows;
196  matrix_io io;
197  off_t off;
198  detail::local_mem_buffer::irreg_buf_t buf;
199  size_t real_io_size;
200  size_t entry_size;
201 protected:
202  block_2d_size block_size;
203 public:
204  block_compute_task(const matrix_io &_io, const sparse_matrix &mat,
205  block_exec_order::ptr order);
206  ~block_compute_task();
207 
208  const matrix_io &get_io() const {
209  return io;
210  }
211 
212  virtual void run(char *buf, size_t size);
213 
214  virtual safs::io_request get_request() const {
215  return safs::io_request(buf.second.get(),
216  safs::data_loc_t(io.get_loc().get_file_id(), off),
217  real_io_size, READ);
218  }
219 
220  virtual void run_on_block(const sparse_block_2d &block) = 0;
221  virtual void notify_complete() = 0;
222 };
223 
224 /*
225  * This class helps to stream data to the output dense matrix from SpMM
226  * on disks, so the dense matrix is a very tall and skinny matrix.
227  * This assumes that the data comes from multiple threads without a specific
228  * order. But once we order the incoming data, we can stream data to disks
229  * sequentially.
230  * This data structure is shared by multiple threads.
231  */
232 class EM_matrix_stream
233 {
234  detail::matrix_store::ptr mat;
235 
236  class filled_local_store
237  {
238  detail::local_matrix_store::ptr data;
239  std::atomic<size_t> num_filled_rows;
240  public:
241  typedef std::shared_ptr<filled_local_store> ptr;
242 
243  filled_local_store(detail::local_matrix_store::ptr data) {
244  this->data = data;
245  num_filled_rows = 0;
246  }
247  // If the write completely fill the local buffer, it returns true.
248  bool write(detail::local_matrix_store::const_ptr portion,
249  off_t global_start_row, off_t global_start_col);
250 
251  detail::local_matrix_store::const_ptr get_whole_portion() const {
252  return data;
253  }
254 
255  off_t get_global_start_row() const {
256  return data->get_global_start_row();
257  }
258  };
259 
260  pthread_spinlock_t lock;
261  // This keeps the buffers with partial data in EM matrix portions.
262  // If an EM matrix portion is complete, the portion is flushed to disks
263  // and it is deleted from the hashtable.
264  std::unordered_map<off_t, filled_local_store::ptr> portion_bufs;
265 
266  EM_matrix_stream(detail::matrix_store::ptr mat) {
267  pthread_spin_init(&lock, PTHREAD_PROCESS_PRIVATE);
268  this->mat = mat;
269  }
270 public:
271  typedef std::shared_ptr<EM_matrix_stream> ptr;
272 
273  static ptr create(detail::matrix_store::ptr mat) {
274  return ptr(new EM_matrix_stream(mat));
275  }
276 
277  void write_async(detail::local_matrix_store::const_ptr portion,
278  off_t start_row, off_t start_col);
279  bool is_complete() const;
280 };
281 
282 class block_spmm_task: public block_compute_task
283 {
284  // The size of the non-zero entries.
285  size_t entry_size;
286 
287  const detail::mem_matrix_store &input;
288  detail::matrix_store &output;
289  EM_matrix_stream::ptr output_stream;
290 
291  /*
292  * A task is responsible for processing the entire block rows.
293  * The result is stored in out_part.
294  */
295  detail::local_row_matrix_store::ptr out_part;
296 public:
297  block_spmm_task(const detail::mem_matrix_store &_input,
298  detail::matrix_store &_output, EM_matrix_stream::ptr stream,
299  const matrix_io &io, const sparse_matrix &mat,
300  block_exec_order::ptr order);
301 
302  const detail::matrix_store &get_in_matrix() const {
303  return input;
304  }
305  const detail::matrix_store &get_out_matrix() const {
306  return output;
307  }
308 
309  size_t get_entry_size() const {
310  return entry_size;
311  }
312 
313  /*
314  * Get the rows in the input matrix required by SpMM.
315  */
316  const char *get_in_rows(size_t in_row, size_t num_rows);
317 
318  /*
319  * Get the rows in the output matrix required by SpMM.
320  */
321  char *get_out_rows(size_t out_row, size_t num_rows);
322 
323  /*
324  * The entire block rows have been processed, the data in out_part is
325  * the final result, we can now save it in the output matrix.
326  */
327  void notify_complete();
328 };
329 
330 template<class DenseType, class SparseType, int ROW_WIDTH>
331 class row_part_func
332 {
333  size_t row_width;
334 public:
335  row_part_func(size_t row_width) {
336  this->row_width = row_width;
337  assert(ROW_WIDTH == row_width);
338  }
339 
340  rp_edge_iterator operator()(rp_edge_iterator it,
341  const char *_in_rows, char *_out_rows) {
342  const DenseType *in_rows = (const DenseType *) _in_rows;
343  DenseType *out_rows = (DenseType *) _out_rows;
344  size_t row_idx = it.get_rel_row_idx();
345  DenseType *dest_row = out_rows + ROW_WIDTH * row_idx;
346  bool has_val = it.get_entry_size() > 0;
347  while (it.has_next()) {
348  SparseType data = 1;
349  if (has_val)
350  data = it.get_curr_data<SparseType>();
351  size_t col_idx = it.next();
352  const DenseType *src_row = in_rows + ROW_WIDTH * col_idx;
353  for (size_t j = 0; j < ROW_WIDTH; j++)
354  dest_row[j] += src_row[j] * data;
355  }
356  return it;
357  }
358 };
359 
360 template<class DenseType, class SparseType>
361 class row_part_func<DenseType, SparseType, 0>
362 {
363  size_t row_width;
364 public:
365  row_part_func(size_t row_width) {
366  this->row_width = row_width;
367  }
368 
369  rp_edge_iterator operator()(rp_edge_iterator it,
370  const char *_in_rows, char *_out_rows) {
371  const DenseType *in_rows = (const DenseType *) _in_rows;
372  DenseType *out_rows = (DenseType *) _out_rows;
373  size_t row_idx = it.get_rel_row_idx();
374  DenseType *dest_row = out_rows + row_width * row_idx;
375  bool has_val = it.get_entry_size() > 0;
376  while (it.has_next()) {
377  SparseType data = 1;
378  if (has_val)
379  data = it.get_curr_data<SparseType>();
380  size_t col_idx = it.next();
381  const DenseType *src_row = in_rows + row_width * col_idx;
382  for (size_t j = 0; j < row_width; j++)
383  dest_row[j] += src_row[j] * data;
384  }
385  return it;
386  }
387 };
388 
389 template<class DenseType, class SparseType, int ROW_WIDTH>
390 class coo_func
391 {
392  size_t row_width;
393 public:
394  coo_func(size_t row_width) {
395  this->row_width = row_width;
396  assert(ROW_WIDTH == row_width);
397  }
398 
399  void operator()(const local_coo_t *coos, const char *_coo_vals,
400  size_t num, const char *_in_rows, char *_out_rows) {
401  const SparseType *coo_vals = (const SparseType *) _coo_vals;
402  const DenseType *in_rows = (const DenseType *) _in_rows;
403  DenseType *out_rows = (DenseType *) _out_rows;
404  for (size_t i = 0; i < num; i++) {
405  local_coo_t coo = coos[i];
406  SparseType data = 1;
407  if (coo_vals)
408  data = coo_vals[i];
409  const DenseType *src_row = in_rows + ROW_WIDTH * coo.get_col_idx();
410  DenseType *dest_row = out_rows + ROW_WIDTH * coo.get_row_idx();
411  for (size_t j = 0; j < ROW_WIDTH; j++)
412  dest_row[j] += src_row[j] * data;
413  }
414  }
415 };
416 
417 template<class DenseType, class SparseType>
418 class coo_func<DenseType, SparseType, 0>
419 {
420  size_t row_width;
421 public:
422  coo_func(size_t row_width) {
423  this->row_width = row_width;
424  }
425 
426  void operator()(const local_coo_t *coos, const char *_coo_vals,
427  size_t num, const char *_in_rows, char *_out_rows) {
428  const SparseType *coo_vals = (const SparseType *) _coo_vals;
429  const DenseType *in_rows = (const DenseType *) _in_rows;
430  DenseType *out_rows = (DenseType *) _out_rows;
431  for (size_t i = 0; i < num; i++) {
432  local_coo_t coo = coos[i];
433  SparseType data = 1;
434  if (coo_vals)
435  data = coo_vals[i];
436  const DenseType *src_row = in_rows + row_width * coo.get_col_idx();
437  DenseType *dest_row = out_rows + row_width * coo.get_row_idx();
438  for (size_t j = 0; j < row_width; j++)
439  dest_row[j] += src_row[j] * data;
440  }
441  }
442 };
443 
444 template<class RpFuncType, class COOFuncType>
445 class block_spmm_task_impl: public block_spmm_task
446 {
447 public:
448  block_spmm_task_impl(const detail::mem_matrix_store &input,
449  detail::matrix_store &output, EM_matrix_stream::ptr stream
450  , const matrix_io &io, const sparse_matrix &mat,
451  block_exec_order::ptr order): block_spmm_task(input,
452  output, stream, io, mat, order) {
453  }
454 
455  void run_on_block(const sparse_block_2d &block) {
456  if (block.is_empty())
457  return;
458 
459  size_t in_row_start = block.get_block_col_idx() * block_size.get_num_cols();
460  size_t num_in_rows = std::min(block_size.get_num_cols(),
461  get_in_matrix().get_num_rows() - in_row_start);
462  const char *in_rows = get_in_rows(in_row_start, num_in_rows);
463 
464  size_t out_row_start = block.get_block_row_idx() * block_size.get_num_rows();
465  size_t num_out_rows = std::min(block_size.get_num_rows(),
466  get_out_matrix().get_num_rows() - out_row_start);
467  char *out_rows = get_out_rows(out_row_start, num_out_rows);
468  size_t row_width = get_out_matrix().get_num_cols();
469 
470  if (block.has_rparts()) {
471  RpFuncType rp_func(row_width);
472  rp_edge_iterator it = block.get_first_edge_iterator(get_entry_size());
473  while (!block.is_rparts_end(it)) {
474  it = rp_func(it, in_rows, out_rows);
475  it = block.get_next_edge_iterator(it, get_entry_size());
476  }
477  }
478  const char *coo_vals = NULL;
479  COOFuncType coo_func(row_width);
480  if (get_entry_size() > 0)
481  coo_vals = block.get_coo_val_start(get_entry_size());
482  coo_func(block.get_coo_start(), coo_vals, block.get_num_coo_vals(),
483  in_rows, out_rows);
484  }
485 };
486 
487 template<class DenseType, class SparseType>
488 class spmm_creator: public task_creator
489 {
490  detail::mem_matrix_store::const_ptr input;
491  detail::matrix_store::ptr output;
492  EM_matrix_stream::ptr output_stream;
493  const sparse_matrix &mat;
494  block_exec_order::ptr order;
495 
496  spmm_creator(const sparse_matrix &_mat, size_t num_in_cols);
497 
498  template<int ROW_WIDTH>
499  compute_task::ptr create_block_compute_task(const matrix_io &io) const {
500  typedef row_part_func<DenseType, SparseType, ROW_WIDTH> width_rp_func;
501  typedef coo_func<DenseType, SparseType, ROW_WIDTH> width_coo_func;
502  return compute_task::ptr(
503  new block_spmm_task_impl<width_rp_func, width_coo_func>(
504  *input, *output, output_stream, io, mat, order));
505  }
506 public:
507  static task_creator::ptr create(const sparse_matrix &mat,
508  size_t num_in_cols) {
509  return task_creator::ptr(new spmm_creator<DenseType, SparseType>(
510  mat, num_in_cols));
511  }
512 
513  virtual bool is_complete() const {
514  if (output_stream == NULL)
515  return true;
516  else
517  return output_stream->is_complete();
518  }
519 
520  virtual bool set_data(detail::mem_matrix_store::const_ptr in,
521  detail::matrix_store::ptr out) {
522  if (in->get_type() != get_scalar_type<DenseType>()
523  || out->get_type() != get_scalar_type<DenseType>()) {
524  BOOST_LOG_TRIVIAL(error) << "wrong matrix type in spmm creator";
525  return false;
526  }
527  this->input = in;
528  this->output = out;
529  if (!output->is_in_mem())
530  output_stream = EM_matrix_stream::create(output);
531  return true;
532  }
533 
534  virtual std::vector<detail::EM_object *> get_EM_objs() {
535  std::vector<detail::EM_object *> ret;
536  if (!output->is_in_mem()) {
537  const detail::EM_object *obj
538  = dynamic_cast<const detail::EM_object *>(output.get());
539  ret.push_back(const_cast<detail::EM_object *>(obj));
540  }
541  return ret;
542  }
543 
544  virtual compute_task::ptr create(const matrix_io &io) const {
545  if (order) {
546  switch (output->get_num_cols()) {
547  case 1: return create_block_compute_task<1>(io);
548  case 2: return create_block_compute_task<2>(io);
549  case 4: return create_block_compute_task<4>(io);
550  case 8: return create_block_compute_task<8>(io);
551  case 16: return create_block_compute_task<16>(io);
552  default: return create_block_compute_task<0>(io);
553  }
554  }
555  else {
556  detail::mem_matrix_store &mem_out
557  = dynamic_cast<detail::mem_matrix_store &>(*output);
558  switch (output->get_num_cols()) {
559  case 1: return compute_task::ptr(new fg_row_spmm_task<DenseType,
560  SparseType, 1>(*input, mem_out, io));
561  case 2: return compute_task::ptr(new fg_row_spmm_task<DenseType,
562  SparseType, 2>(*input, mem_out, io));
563  case 4: return compute_task::ptr(new fg_row_spmm_task<DenseType,
564  SparseType, 4>(*input, mem_out, io));
565  case 8: return compute_task::ptr(new fg_row_spmm_task<DenseType,
566  SparseType, 8>(*input, mem_out, io));
567  case 16: return compute_task::ptr(new fg_row_spmm_task<DenseType,
568  SparseType, 16>(*input, mem_out, io));
569  default: return compute_task::ptr(new fg_row_spmm_task<DenseType,
570  SparseType, 0>(*input, mem_out, io));
571  }
572  }
573  }
574 };
575 
576 /*
577  * The non-zero entries in a sparse matrix are organized in blocks.
578  * When multiplying the sparse matrix with other dense matrices, we traverse
579  * the blocks in a certain order to increase CPU cache hits. To do so, we
580  * need to group multiple adjacent blocks in a super block, which has
581  * the same number of rows and columns. The size of the super block directly
582  * affects CPU cache hits and is affected by the size of the dense matrix.
583  * This function estimates the super block size in terms of the number of
584  * block rows. The goal is to have the rows of the dense matrix involved
585  * in the computation of the super block always in the CPU cache.
586  * `entry_size' is the size of a row in the dense matrix.
587  */
588 static inline size_t cal_super_block_size(const block_2d_size &block_size,
589  size_t entry_size)
590 {
591  // 1MB gives the best performance.
592  // Maybe the reason is that I run eight threads in a processor, which has
593  // a L3 cache of 8MB. Therefore, each thread gets 1MB in L3.
594  size_t size = matrix_conf.get_cpu_cache_size() / entry_size
595  / block_size.get_num_rows() / 2;
596  size_t max_size
597  = detail::mem_matrix_store::CHUNK_SIZE / block_size.get_num_rows();
598  return std::min(std::max(size, 1UL), max_size);
599 }
600 
601 /*
602  * This is a base class for a sparse matrix. It provides a set of functions
603  * to perform computation on the sparse matrix. Currently, it has matrix
604  * vector multiplication and matrix matrix multiplication.
605  * We assume the sparse matrix is stored in external memory. If the matrix
606  * is in memory, we can use in_mem_io to access the sparse matrix in memory
607  * while reusing all the existing code for computation.
608  */
609 class sparse_matrix: public detail::EM_object
610 {
611  // Whether the matrix is represented by the FlashGraph format.
612  bool is_fg;
613  size_t nrows;
614  size_t ncols;
615  // The type of a non-zero entry.
616  const scalar_type *entry_type;
617  bool symmetric;
618  detail::EM_object::io_set::ptr ios;
619 
620  template<class DenseType, class SparseType>
621  task_creator::ptr get_multiply_creator(size_t num_in_cols) const {
622  return spmm_creator<DenseType, SparseType>::create(*this, num_in_cols);
623  }
624 protected:
625  // This constructor is used for the sparse matrix stored
626  // in the FlashGraph format.
627  sparse_matrix(size_t num_vertices, const scalar_type *entry_type,
628  bool symmetric) {
629  this->nrows = num_vertices;
630  this->ncols = num_vertices;
631  this->entry_type = entry_type;
632  this->symmetric = symmetric;
633  this->is_fg = true;
634  }
635 
636  sparse_matrix(size_t nrows, size_t ncols, const scalar_type *entry_type,
637  bool symmetric) {
638  this->symmetric = symmetric;
639  this->is_fg = false;
640  this->nrows = nrows;
641  this->ncols = ncols;
642  this->entry_type = entry_type;
643  }
644 
645  void _transpose() {
646  size_t tmp = nrows;
647  nrows = ncols;
648  ncols = tmp;
649  }
650 public:
651  typedef std::shared_ptr<sparse_matrix> ptr;
652 
653  virtual ~sparse_matrix() {
654  }
655 
656  /*
657  * This creates a sparse matrix sotred in the FlashGraph format.
658  */
659  static ptr create(fg::FG_graph::ptr, const scalar_type *entry_type);
660  /*
661  * This create a symmetric sparse matrix partitioned in 2D dimensions.
662  * The sparse matrix is stored in memory.
663  */
664  static ptr create(SpM_2d_index::ptr, SpM_2d_storage::ptr);
665  /*
666  * This creates an asymmetric sparse matrix partitioned in 2D dimensions.
667  * The sparse matrix is stored in memory.
668  */
669  static ptr create(SpM_2d_index::ptr index, SpM_2d_storage::ptr mat,
670  SpM_2d_index::ptr t_index, SpM_2d_storage::ptr t_mat);
671 
672  /*
673  * These two functions creates a sparse matrix partitioned in 2D dimensions
674  * from the data stored in SAFS.
675  */
676  static ptr create(SpM_2d_index::ptr index,
677  safs::file_io_factory::shared_ptr mat_io_fac);
678  static ptr create(SpM_2d_index::ptr index,
679  safs::file_io_factory::shared_ptr mat_io_fac,
680  SpM_2d_index::ptr t_index,
681  safs::file_io_factory::shared_ptr t_mat_io_fac);
682 
683  bool is_fg_matrix() const {
684  return is_fg;
685  }
686 
687  std::vector<safs::io_interface::ptr> create_ios() const;
688 
689  /*
690  * When customizing computatin on a sparse matrix (regardless of
691  * the storage format and the computation), we need to define two things:
692  * how data is accessed and what computation runs on the data fetched
693  * from the external memory. matrix_io_generator defines data access
694  * and compute_task defines the computation.
695  * `num_block_rows' will affect the behavior of matrix_io_generator.
696  * More explanation of `num_block_rows' is seen in the comments of
697  * `init_io_gens'.
698  */
699  void compute(task_creator::ptr creator, size_t num_block_rows,
700  size_t min_num_brows) const;
701  /*
702  * This method defines how data in the matrix is accessed.
703  * `num_block_rows' defines how many block rows are accessed in an I/O.
704  */
705  virtual matrix_io_generator::ptr create_io_gen(size_t num_block_rows,
706  size_t min_num_brows) const = 0;
707 
708  virtual safs::file_io_factory::shared_ptr get_io_factory() const = 0;
709  /*
710  * Get the block size.
711  * For a row-major or column-major matrix, a block has only one row
712  * or one column.
713  */
714  virtual const block_2d_size &get_block_size() const = 0;
715  /*
716  * Get the offsets of the block rows.
717  */
718  virtual void get_block_row_offs(const std::vector<off_t> &block_row_idxs,
719  std::vector<off_t> &offs) const = 0;
720  /*
721  * The subclass defines the order of processing a set of blocks when
722  * multiplying this sparse matrix with a dense matrix.
723  * All blocks are organized in a rectangular area.
724  */
725  virtual block_exec_order::ptr get_multiply_order(
726  size_t num_block_rows, size_t num_block_cols) const = 0;
727 
728  /*
729  * The size of a non-zero entry.
730  */
731  size_t get_entry_size() const {
732  // Binary matrix doesn't need to store the non-zero entry.
733  if (entry_type == NULL || *entry_type == get_scalar_type<bool>())
734  return 0;
735  else
736  return entry_type->get_size();
737  }
738  template<class T>
739  bool is_type() const {
740  if (entry_type == NULL)
741  return false;
742  else
743  return *entry_type == get_scalar_type<T>();
744  }
745 
746  size_t get_num_rows() const {
747  return nrows;
748  }
749 
750  size_t get_num_cols() const {
751  return ncols;
752  }
753 
754  bool is_symmetric() const {
755  return symmetric;
756  }
757 
758  virtual sparse_matrix::ptr transpose() const = 0;
759 
760  bool multiply(detail::matrix_store::const_ptr in,
761  detail::matrix_store::ptr out, task_creator::ptr create) const;
762 
763  /*
764  * This version of SpMM allows users to provide the output matrix.
765  * It requires users to initialize the output matrix.
766  */
767  template<class DenseType, class SparseType>
768  bool multiply(detail::matrix_store::const_ptr in,
769  detail::matrix_store::ptr out) const {
770  return multiply(in, out, get_multiply_creator<DenseType, SparseType>(
771  in->get_num_cols()));
772  }
773 
774  template<class DenseType, class SparseType>
775  bool multiply(detail::vec_store::const_ptr in,
776  detail::vec_store::ptr out) const {
777  return multiply<DenseType, SparseType>(
778  in->conv2mat(in->get_length(), 1, true),
779  out->conv2mat(out->get_length(), 1, true));
780  }
781 };
782 
783 template<class DenseType, class SparseType>
784 spmm_creator<DenseType, SparseType>::spmm_creator(const sparse_matrix &_mat,
785  size_t num_in_cols): mat(_mat)
786 {
787  // This initialization only for 2D partitioned sparse matrix.
788  if (!mat.is_fg_matrix()) {
789  // We only handle the case the element size is 2^n.
790  assert(1 << ((size_t) log2(sizeof(DenseType))) == sizeof(DenseType));
791  // Hilbert curve requires that there are 2^n block rows and block columns.
792  // If the input matrix doesn't have 2^n columns, we have to find a number
793  // of 2^n that is close to the number of columns in the input matrix
794  // to calculate the super block size, so that the super block has 2^n
795  // block rows and columns.
796  size_t num_cols = num_in_cols;
797  num_cols = 1 << ((size_t) log2(num_cols));
798  size_t sb_size = cal_super_block_size(mat.get_block_size(),
799  sizeof(DenseType) * num_cols);
800  order = mat.get_multiply_order(sb_size, sb_size);
801  }
802 }
803 
804 void init_flash_matrix(config_map::ptr configs);
805 void destroy_flash_matrix();
806 
807 }
808 
809 #endif
Definition: io_request.h:491
Definition: io_request.h:186