FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
dense_matrix.h
1 #ifndef __DENSE_MATRIX_H__
2 #define __DENSE_MATRIX_H__
3 
4 /*
5  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
6  * Written by Da Zheng (zhengda1936@gmail.com)
7  *
8  * This file is part of FlashMatrix.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include <stdlib.h>
24 
25 #include <vector>
26 #include <memory>
27 
28 #include "safs_file.h"
29 
30 #include "generic_type.h"
31 #include "matrix_header.h"
32 #include "bulk_operate.h"
33 #include "bulk_operate_ext.h"
34 #include "matrix_store.h"
35 #include "mem_matrix_store.h"
36 #include "factor.h"
37 
38 namespace fm
39 {
40 
41 class bulk_operate;
42 class bulk_uoperate;
43 class set_operate;
44 class arr_apply_operate;
45 class vector;
46 
47 enum matrix_margin
48 {
49  MAR_ROW = 1,
50  MAR_COL = 2,
51  BOTH,
52 };
53 
54 class dense_matrix;
55 
56 namespace detail
57 {
58 
59 class local_matrix_store;
60 
61 class portion_mapply_op
62 {
63  size_t out_num_rows;
64  size_t out_num_cols;
65  const scalar_type &type;
66 public:
67  typedef std::shared_ptr<const portion_mapply_op> const_ptr;
68 
69  portion_mapply_op(size_t out_num_rows, size_t out_num_cols,
70  const scalar_type &_type): type(_type) {
71  this->out_num_rows = out_num_rows;
72  this->out_num_cols = out_num_cols;
73  }
74 
75  virtual portion_mapply_op::const_ptr transpose() const = 0;
76 
77  /*
78  * There are three versions of performing computation on the portions.
79  * The first version performs computation only on input portions;
80  * the second version performs computation on input portions and
81  * outputs only one matrix;
82  * the third version performs computation on input portions and outputs
83  * multiple matrices.
84  */
85 
86  virtual void run(
87  const std::vector<std::shared_ptr<const local_matrix_store> > &ins) const;
88  virtual void run(
89  const std::vector<std::shared_ptr<const local_matrix_store> > &ins,
90  local_matrix_store &out) const;
91  virtual void run(
92  const std::vector<std::shared_ptr<const local_matrix_store> > &ins,
93  const std::vector<std::shared_ptr<local_matrix_store> > &outs) const;
94 
95  virtual std::string to_string(
96  const std::vector<matrix_store::const_ptr> &mats) const = 0;
97 
98  /*
99  * Give a hint if this operation is aggregation, so we can optimize
100  * the backend accordingly. When this is an aggregation operation,
101  * the second `run' method has to be implemented.
102  */
103  virtual bool is_agg() const {
104  return false;
105  }
106 
107  size_t get_out_num_rows() const {
108  return out_num_rows;
109  }
110  size_t get_out_num_cols() const {
111  return out_num_cols;
112  }
113  const scalar_type &get_output_type() const {
114  return type;
115  }
116 };
117 
118 /*
119  * These two functions return a virtual matrix that records the computation.
120  */
121 
122 std::shared_ptr<dense_matrix> mapply_portion(
123  const std::vector<std::shared_ptr<const dense_matrix> > &mats,
124  // A user can specify the layout of the output dense matrix.
125  portion_mapply_op::const_ptr op, matrix_layout_t out_layout,
126  bool par_access = true);
127 matrix_store::ptr __mapply_portion_virtual(
128  const std::vector<matrix_store::const_ptr> &store,
129  portion_mapply_op::const_ptr op, matrix_layout_t out_layout,
130  bool par_access = true);
131 
132 /*
133  * These three functions return a materialized matrix.
134  * The first version determines the storage of the output matrix automatically.
135  * The second version allows users to specify the storage for the output matrix.
136  * The third version not only allows users to specify the storage for the output
137  * matrix, but also allows multiple output matrices.
138  */
139 
140 matrix_store::ptr __mapply_portion(
141  const std::vector<matrix_store::const_ptr> &mats,
142  portion_mapply_op::const_ptr op, matrix_layout_t out_layout,
143  bool par_access = true);
144 matrix_store::ptr __mapply_portion(
145  const std::vector<matrix_store::const_ptr> &mats,
146  portion_mapply_op::const_ptr op, matrix_layout_t out_layout,
147  bool out_in_mem, int out_num_nodes, bool par_access = true);
148 bool __mapply_portion(
149  const std::vector<matrix_store::const_ptr> &mats,
150  portion_mapply_op::const_ptr op,
151  const std::vector<matrix_store::ptr> &out_mats, bool par_access = true);
152 }
153 
154 /*
155  * This class represents a dense matrix and is able to perform computation
156  * on the matrix. However, this class can't modify the matrix data. The only
157  * way to modify the matrix is to have the pointer point to another matrix.
158  */
159 class dense_matrix
160 {
161 public:
162  typedef std::shared_ptr<dense_matrix> ptr;
163  typedef std::shared_ptr<const dense_matrix> const_ptr;
164 private:
165  detail::matrix_store::const_ptr store;
166 
167  static ptr _create_randu(const scalar_variable &min, const scalar_variable &max,
168  size_t nrow, size_t ncol, matrix_layout_t layout, int num_nodes,
169  bool in_mem, safs::safs_file_group::ptr group);
170  static ptr _create_randn(const scalar_variable &mean, const scalar_variable &var,
171  size_t nrow, size_t ncol, matrix_layout_t layout, int num_nodes,
172  bool in_mem, safs::safs_file_group::ptr group);
173  static ptr _create_const(scalar_variable::ptr val, size_t nrow, size_t ncol,
174  matrix_layout_t layout, int num_nodes, bool in_mem,
175  safs::safs_file_group::ptr group);
176 
177  detail::matrix_store::ptr inner_prod_tall(const dense_matrix &m,
178  bulk_operate::const_ptr left_op, bulk_operate::const_ptr right_op,
179  matrix_layout_t out_layout) const;
180  detail::matrix_store::ptr inner_prod_wide(const dense_matrix &m,
181  bulk_operate::const_ptr left_op, bulk_operate::const_ptr right_op,
182  matrix_layout_t out_layout) const;
183 
184  detail::matrix_store::const_ptr _conv_store(bool in_mem, int num_nodes) const;
185 protected:
186  dense_matrix(detail::matrix_store::const_ptr store) {
187  this->store = store;
188  }
189  bool verify_inner_prod(const dense_matrix &m,
190  const bulk_operate &left_op, const bulk_operate &right_op) const;
191  bool verify_mapply2(const dense_matrix &m,
192  const bulk_operate &op) const;
193  bool verify_apply(matrix_margin margin, const arr_apply_operate &op) const;
194 public:
195  static ptr create(size_t nrow, size_t ncol, matrix_layout_t layout,
196  const scalar_type &type, int num_nodes = -1, bool in_mem = true,
197  safs::safs_file_group::ptr group = NULL);
198  static ptr create(size_t nrow, size_t ncol, matrix_layout_t layout,
199  const scalar_type &type, const set_operate &op, int num_nodes = -1,
200  bool in_mem = true, safs::safs_file_group::ptr group = NULL);
201  static ptr create(detail::matrix_store::const_ptr store) {
202  return dense_matrix::ptr(new dense_matrix(store));
203  }
204 
205  template<class T>
206  static ptr create_randu(T _min, T _max, size_t nrow, size_t ncol,
207  matrix_layout_t layout, int num_nodes = -1, bool in_mem = true,
208  safs::safs_file_group::ptr group = NULL) {
209  scalar_variable_impl<T> min(_min);
210  scalar_variable_impl<T> max(_max);
211  return _create_randu(min, max, nrow, ncol, layout, num_nodes, in_mem,
212  group);
213  }
214  template<class T>
215  static ptr create_randn(T _mean, T _var, size_t nrow, size_t ncol,
216  matrix_layout_t layout, int num_nodes = -1, bool in_mem = true,
217  safs::safs_file_group::ptr group = NULL) {
218  scalar_variable_impl<T> mean(_mean);
219  scalar_variable_impl<T> var(_var);
220  return _create_randn(mean, var, nrow, ncol, layout, num_nodes, in_mem,
221  group);
222  }
223 
224  template<class T>
225  static ptr create_const(T _val, size_t nrow, size_t ncol,
226  matrix_layout_t layout, int num_nodes = -1, bool in_mem = true,
227  safs::safs_file_group::ptr group = NULL) {
228  scalar_variable::ptr val(new scalar_variable_impl<T>(_val));
229  return _create_const(val, nrow, ncol, layout, num_nodes, in_mem, group);
230  }
231 
232  dense_matrix() {
233  }
234  dense_matrix(size_t nrow, size_t ncol, matrix_layout_t layout,
235  const scalar_type &type, int num_nodes = -1, bool in_mem = true,
236  safs::safs_file_group::ptr group = NULL);
237 
238  const detail::matrix_store &get_data() const {
239  return *store;
240  }
241 
242  detail::matrix_store::const_ptr get_raw_store() const {
243  return store;
244  }
245 
246  size_t get_entry_size() const {
247  return store->get_entry_size();
248  }
249 
250  size_t get_num_rows() const {
251  return store->get_num_rows();
252  }
253 
254  size_t get_num_cols() const {
255  return store->get_num_cols();
256  }
257 
258  const scalar_type &get_type() const {
259  return store->get_type();
260  }
261 
262  matrix_layout_t store_layout() const {
263  return store->store_layout();
264  }
265 
266  bool is_in_mem() const {
267  return store->is_in_mem();
268  }
269 
270  bool is_wide() const {
271  return store->is_wide();
272  }
273 
274  bool is_virtual() const {
275  return store->is_virtual();
276  }
277 
278  void materialize_self() const;
279 
280  template<class T>
281  bool is_type() const {
282  return get_type() == get_scalar_type<T>();
283  }
284 
285  /*
286  * We can't change the matrix data that it points to, but we can change
287  * the pointer in the class so that it can point to another matrix data.
288  */
289  void assign(const dense_matrix &mat) {
290  store = mat.store;
291  }
292 
293  std::shared_ptr<vector> get_col(off_t idx) const;
294  std::shared_ptr<vector> get_row(off_t idx) const;
295  dense_matrix::ptr get_cols(const std::vector<off_t> &idxs) const;
296  dense_matrix::ptr get_rows(const std::vector<off_t> &idxs) const;
297  /*
298  * Clone the matrix.
299  * The class can't modify the matrix data that it points to, but it
300  * can modify the pointer. If someone changes in the pointer in the cloned
301  * matrix, it doesn't affect the current matrix.
302  */
303  dense_matrix::ptr clone() const {
304  return ptr(new dense_matrix(get_raw_store()));
305  }
306  dense_matrix::ptr deep_copy() const;
307 
308  dense_matrix::ptr transpose() const;
309  /*
310  * This converts the data layout of the dense matrix.
311  * It actually generates a virtual matrix that represents the matrix
312  * with required data layout.
313  */
314  dense_matrix::ptr conv2(matrix_layout_t layout) const;
315  /*
316  * This method converts the storage media of the matrix.
317  * It can convert an in-memory matrix to an EM matrix, or vice versa.
318  * The output matrix is always materialized.
319  */
320  dense_matrix::ptr conv_store(bool in_mem, int num_nodes) const;
321  bool move_store(bool in_mem, int num_nodes) const;
322 
323  dense_matrix::ptr inner_prod(const dense_matrix &m,
324  bulk_operate::const_ptr left_op, bulk_operate::const_ptr right_op,
325  matrix_layout_t out_layout = matrix_layout_t::L_NONE) const;
326  vector::ptr aggregate(matrix_margin margin, agg_operate::const_ptr op) const;
327  std::shared_ptr<scalar_variable> aggregate(agg_operate::const_ptr op) const;
328  std::shared_ptr<scalar_variable> aggregate(bulk_operate::const_ptr op) const;
329 
330  /*
331  * This operator groups rows based on the labels in the factor vector
332  * and aggregate the elements of each column.
333  * It outputs a dense matrix whose #cols == this->#cols and #rows == #levels.
334  * Each row of the output dense matrix is the aggregation of all rows in
335  * the input dense matrix that have the same factor.
336  */
337  dense_matrix::ptr groupby_row(factor_vector::const_ptr labels,
338  agg_operate::const_ptr) const;
339  dense_matrix::ptr groupby_row(factor_vector::const_ptr labels,
340  bulk_operate::const_ptr) const;
341 
342  dense_matrix::ptr mapply_cols(std::shared_ptr<const vector> vals,
343  bulk_operate::const_ptr op) const;
344  dense_matrix::ptr mapply_rows(std::shared_ptr<const vector> vals,
345  bulk_operate::const_ptr op) const;
346  dense_matrix::ptr mapply2(const dense_matrix &m,
347  bulk_operate::const_ptr op) const;
348  dense_matrix::ptr sapply(bulk_uoperate::const_ptr op) const;
349  dense_matrix::ptr apply(matrix_margin margin,
350  arr_apply_operate::const_ptr op) const;
351  dense_matrix::ptr apply_scalar(scalar_variable::const_ptr var,
352  bulk_operate::const_ptr) const;
353 
354  dense_matrix::ptr cast_ele_type(const scalar_type &type) const;
355 
356  dense_matrix::ptr scale_cols(std::shared_ptr<const vector> vals) const {
357  bulk_operate::const_ptr multiply
358  = bulk_operate::conv2ptr(get_type().get_basic_ops().get_multiply());
359  // When we scale columns, it's the same as applying the vector to
360  // each row.
361  return mapply_rows(vals, multiply);
362  }
363  dense_matrix::ptr scale_rows(std::shared_ptr<const vector> vals) const {
364  bulk_operate::const_ptr multiply
365  = bulk_operate::conv2ptr(get_type().get_basic_ops().get_multiply());
366  // When we scale rows, it's the same as applying the vector to
367  // each column.
368  return mapply_cols(vals, multiply);
369  }
370  dense_matrix::ptr multiply(const dense_matrix &mat,
371  matrix_layout_t out_layout = matrix_layout_t::L_NONE,
372  bool use_blas = false) const;
373 
374  dense_matrix::ptr add(const dense_matrix &mat) const {
375  const bulk_operate &op = get_type().get_basic_ops().get_add();
376  return this->mapply2(mat, bulk_operate::conv2ptr(op));
377  }
378  dense_matrix::ptr minus(const dense_matrix &mat) const {
379  const bulk_operate &op = get_type().get_basic_ops().get_sub();
380  return this->mapply2(mat, bulk_operate::conv2ptr(op));
381  }
382  /*
383  * This performs element-wise multiplication between two matrices.
384  */
385  dense_matrix::ptr multiply_ele(const dense_matrix &mat) const {
386  const bulk_operate &op = get_type().get_basic_ops().get_multiply();
387  return this->mapply2(mat, bulk_operate::conv2ptr(op));
388  }
389 
390  dense_matrix::ptr abs() const {
391  bulk_uoperate::const_ptr op = bulk_uoperate::conv2ptr(
392  *get_type().get_basic_uops().get_op(basic_uops::op_idx::ABS));
393  return sapply(op);
394  }
395 
396  dense_matrix::ptr logic_not() const;
397 
398  std::shared_ptr<vector> row_sum() const;
399  std::shared_ptr<vector> col_sum() const;
400  std::shared_ptr<vector> row_norm2() const;
401  std::shared_ptr<vector> col_norm2() const;
402 
403  std::shared_ptr<scalar_variable> sum() const {
404  if (get_type() == get_scalar_type<bool>()) {
405  dense_matrix::ptr tmp = cast_ele_type(get_scalar_type<size_t>());
406  return tmp->sum();
407  }
408  else
409  return aggregate(bulk_operate::conv2ptr(
410  get_type().get_basic_ops().get_add()));
411  }
412 
413  std::shared_ptr<scalar_variable> max() const {
414  return aggregate(bulk_operate::conv2ptr(
415  *get_type().get_basic_ops().get_op(basic_ops::op_idx::MAX)));
416  }
417 
418  template<class T>
419  dense_matrix::ptr multiply_scalar(T val) const {
420  scalar_variable::ptr var(new scalar_variable_impl<T>(val));
421  bulk_operate::const_ptr op = bulk_operate::conv2ptr(
422  var->get_type().get_basic_ops().get_multiply());
423  return apply_scalar(var, op);
424  }
425 
426  template<class T>
427  dense_matrix::ptr minus_scalar(T val) const {
428  scalar_variable::ptr var(new scalar_variable_impl<T>(val));
429  bulk_operate::const_ptr op = bulk_operate::conv2ptr(
430  var->get_type().get_basic_ops().get_sub());
431  return apply_scalar(var, op);
432  }
433 
434  template<class T>
435  dense_matrix::ptr lt_scalar(T val) const {
436  scalar_variable::ptr var(new scalar_variable_impl<T>(val));
437  bulk_operate::const_ptr op = bulk_operate::conv2ptr(
438  *var->get_type().get_basic_ops().get_op(basic_ops::op_idx::GE));
439  dense_matrix::ptr tmp = apply_scalar(var, op);
440  return tmp->logic_not();
441  }
442 
443  double norm2() const;
444 };
445 
446 class col_vec: public dense_matrix
447 {
448  col_vec(detail::matrix_store::const_ptr mat): dense_matrix(mat) {
449  assert(mat->get_num_cols() == 1);
450  }
451 public:
452  template<class T>
453  static ptr create_randn(size_t len) {
454  dense_matrix::ptr mat = dense_matrix::create_randn<T>(0, 1, len, 1,
455  matrix_layout_t::L_COL);
456  return ptr(new col_vec(mat->get_raw_store()));
457  }
458  template<class T>
459  static ptr create_randu(size_t len) {
460  dense_matrix::ptr mat = dense_matrix::create_randu<T>(0, 1, len, 1,
461  matrix_layout_t::L_COL);
462  return ptr(new col_vec(mat->get_raw_store()));
463  }
464 
465  col_vec(): dense_matrix(NULL) {
466  }
467 
468  col_vec(size_t len, const scalar_type &type): dense_matrix(len, 1,
469  matrix_layout_t::L_COL, type) {
470  }
471 
472  size_t get_length() const {
473  return get_num_rows();
474  }
475 
476  col_vec operator=(const dense_matrix &mat) {
477  assert(mat.get_num_cols() == 1);
478  assign(mat);
479  return *this;
480  }
481 };
482 
483 template<class T>
484 dense_matrix operator*(const dense_matrix &m, T val)
485 {
486  dense_matrix::ptr ret = m.multiply_scalar<T>(val);
487  assert(ret);
488  // TODO I shouldn't materialize immediately.
489  ret->materialize_self();
490  return *ret;
491 }
492 
493 template<class T>
494 dense_matrix operator*(T val, const dense_matrix &m)
495 {
496  dense_matrix::ptr ret = m.multiply_scalar<T>(val);
497  assert(ret);
498  // TODO I shouldn't materialize immediately.
499  ret->materialize_self();
500  return *ret;
501 }
502 
503 inline dense_matrix operator*(const dense_matrix &m1, const dense_matrix &m2)
504 {
505  dense_matrix::ptr ret = m1.multiply(m2);
506  assert(ret);
507  // TODO I shouldn't materialize immediately.
508  ret->materialize_self();
509  return *ret;
510 }
511 
512 inline dense_matrix operator*(const dense_matrix &m1, const col_vec &m2)
513 {
514  dense_matrix::ptr ret = m1.multiply(m2);
515  assert(ret);
516  // TODO I shouldn't materialize immediately.
517  ret->materialize_self();
518  return *ret;
519 }
520 
521 inline dense_matrix operator+(const dense_matrix &m1, const dense_matrix &m2)
522 {
523  dense_matrix::ptr ret = m1.add(m2);
524  assert(ret);
525  // TODO I shouldn't materialize immediately.
526  ret->materialize_self();
527  return *ret;
528 }
529 
530 inline dense_matrix operator-(const dense_matrix &m1, const dense_matrix &m2)
531 {
532  dense_matrix::ptr ret = m1.minus(m2);
533  assert(ret);
534  // TODO I shouldn't materialize immediately.
535  ret->materialize_self();
536  return *ret;
537 }
538 
539 template<class T>
540 inline T as_scalar(const dense_matrix &m)
541 {
542  assert(m.get_type() == get_scalar_type<T>());
543  m.materialize_self();
544  assert(m.is_in_mem());
545  detail::mem_matrix_store::const_ptr mem_m
546  = detail::mem_matrix_store::cast(m.get_raw_store());
547  return mem_m->get<T>(0, 0);
548 }
549 
550 inline dense_matrix t(const dense_matrix &m)
551 {
552  dense_matrix::ptr ret = m.transpose();
553  assert(ret);
554  return *ret;
555 }
556 
557 }
558 
559 #endif