FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
matrix_store.h
1 #ifndef __MATRIX_STORE_H__
2 #define __MATRIX_STORE_H__
3 
4 /*
5  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
6  * Written by Da Zheng (zhengda1936@gmail.com)
7  *
8  * This file is part of FlashMatrix.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include <memory>
24 #include <atomic>
25 #include <unordered_map>
26 
27 #include "safs_file.h"
28 
29 #include "matrix_header.h"
30 #include "generic_type.h"
31 
32 namespace fm
33 {
34 
35 class set_operate;
36 
37 namespace detail
38 {
39 
40 class portion_compute;
41 class local_matrix_store;
42 class vec_store;
43 
44 typedef std::pair<bool, std::shared_ptr<local_matrix_store> > async_res_t;
45 typedef std::pair<bool, std::shared_ptr<const local_matrix_store> > async_cres_t;
46 
47 class matrix_store
48 {
49  size_t nrow;
50  size_t ncol;
51  bool in_mem;
52  // This is kind of redundant because we can always get the entry size
53  // from the type. However, getting the entry size of the type requires
54  // to call a virtual method. Storing the entry size here can avoid
55  // the function call. It doesn't increase the size of the data structure
56  // due to the data alignment by the compiler.
57  int entry_size;
58  // The type is a reference. It makes the dense matrix object uncopiable.
59  // Maybe this is what we want.
60  const scalar_type &type;
61 protected:
62  static std::atomic<size_t> mat_counter;
63 public:
64  typedef std::shared_ptr<matrix_store> ptr;
65  typedef std::shared_ptr<const matrix_store> const_ptr;
66 
67  static ptr create(size_t nrow, size_t ncol, matrix_layout_t layout,
68  const scalar_type &type, int num_nodes, bool in_mem,
69  safs::safs_file_group::ptr group = NULL);
70 
71  matrix_store(size_t nrow, size_t ncol, bool in_mem,
72  const scalar_type &_type);
73 
74  virtual ~matrix_store() {
75  }
76 
77  void resize(size_t num_rows, size_t num_cols) {
78  this->nrow = num_rows;
79  this->ncol = num_cols;
80  }
81 
82  size_t get_num_rows() const {
83  return nrow;
84  }
85 
86  size_t get_num_cols() const {
87  return ncol;
88  }
89 
90  size_t get_entry_size() const {
91  return entry_size;
92  }
93 
94  const scalar_type &get_type() const {
95  return type;
96  }
97 
98  bool is_in_mem() const {
99  return in_mem;
100  }
101 
102  virtual int get_num_nodes() const {
103  return -1;
104  }
105 
106  /*
107  * The shape of a matrix: a tall matrix or a wide matrix.
108  * We care about the shape of a large matrix. We deal with a tall matrix
109  * different from a wide matrix.
110  */
111  bool is_wide() const {
112  return get_num_cols() > get_num_rows();
113  }
114 
115  /*
116  * This method gets underlying materialized matrix IDs and the number of
117  * elements in each of these materialized matrices.
118  */
119  virtual std::unordered_map<size_t, size_t> get_underlying_mats() const = 0;
120  virtual std::string get_name() const = 0;
121 
122  virtual matrix_layout_t store_layout() const = 0;
123 
124  virtual void reset_data() = 0;
125  virtual void set_data(const set_operate &op) = 0;
126 
127  virtual matrix_store::const_ptr transpose() const = 0;
128 
129  /*
130  * When matrix data is move to faster memory, data is moved in one chunk
131  * at a time. The chunk size is defined by a specific implementation of
132  * matrix store. Each chunk is assigned with an identifier, which is
133  * defined sequentially.
134  */
135  size_t get_num_portions() const;
136  virtual std::pair<size_t, size_t> get_portion_size() const = 0;
137  /*
138  * These two versions get a portion of data from the matrix asynchronously.
139  * When a local matrix store is returned, it's not guaranteed that the
140  * data in the local matrix store is valid. A status in the returned value
141  * indicates whether the data is valid. If the data is invalid when it's
142  * returned from the two methods, the computation passed to
143  * these two methods are invoked when the portion of data is loaded
144  * in memory. During the time between returning from the methods and
145  * the portion of data becomes available, it's users' responsibility
146  * of keep the local matrix store alive.
147  */
148  virtual async_cres_t get_portion_async(size_t start_row, size_t start_col,
149  size_t num_rows, size_t num_cols,
150  std::shared_ptr<portion_compute> compute) const = 0;
151  virtual async_res_t get_portion_async(size_t start_row, size_t start_col,
152  size_t num_rows, size_t num_cols,
153  std::shared_ptr<portion_compute> compute) = 0;
154  /*
155  * These versions fetches the portion of data. It's guaranteed that
156  * the data in the returned local matrix store is valid.
157  */
158  virtual std::shared_ptr<const local_matrix_store> get_portion(
159  size_t start_row, size_t start_col, size_t num_rows,
160  size_t num_cols) const = 0;
161  virtual std::shared_ptr<local_matrix_store> get_portion(
162  size_t start_row, size_t start_col, size_t num_rows,
163  size_t num_cols) = 0;
164  virtual std::shared_ptr<local_matrix_store> get_portion(size_t id);
165  virtual std::shared_ptr<const local_matrix_store> get_portion(
166  size_t id) const;
167  virtual void write_portion_async(
168  std::shared_ptr<const local_matrix_store> portion,
169  off_t start_row, off_t start_col) = 0;
170 
171  virtual matrix_store::const_ptr get_cols(
172  const std::vector<off_t> &idxs) const {
173  matrix_store::const_ptr tm = transpose();
174  return tm->get_rows(idxs)->transpose();
175  }
176  virtual matrix_store::const_ptr get_rows(
177  const std::vector<off_t> &idxs) const {
178  return matrix_store::const_ptr();
179  }
180  virtual std::shared_ptr<const vec_store> get_col_vec(off_t idx) const {
181  assert(0);
182  return std::shared_ptr<const vec_store>();
183  }
184  virtual std::shared_ptr<const vec_store> get_row_vec(off_t idx) const {
185  assert(0);
186  return std::shared_ptr<const vec_store>();
187  }
188 
189  virtual bool is_virtual() const {
190  return false;
191  }
192  virtual void materialize_self() const {
193  }
194 
195  /*
196  * This allow users to enable/disable caching data portions.
197  * It's used by EM matrix and mapply virtual matrix.
198  */
199  virtual void set_cache_portion(bool cache_portion) {
200  }
201 };
202 
203 }
204 
205 }
206 
207 #endif