FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
mem_matrix_store.h
1 #ifndef __MEM_MATRIX_STORE_H__
2 #define __MEM_MATRIX_STORE_H__
3 
4 /*
5  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
6  * Written by Da Zheng (zhengda1936@gmail.com)
7  *
8  * This file is part of FlashMatrix.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include <boost/format.hpp>
24 
25 #include "matrix_store.h"
26 #include "raw_data_array.h"
27 
28 namespace fm
29 {
30 
31 namespace detail
32 {
33 
34 class local_matrix_store;
35 class vec_store;
36 
37 /*
38  * This is the base class that represents an in-memory complete matrix.
39  * It is used for SMP.
40  */
41 class mem_matrix_store: public matrix_store
42 {
43  const size_t mat_id;
44 protected:
45  bool write_header(FILE *f) const;
46 public:
47  typedef std::shared_ptr<mem_matrix_store> ptr;
48  typedef std::shared_ptr<const mem_matrix_store> const_ptr;
49  /*
50  * We partition a matrix for parallel.
51  */
52  static const size_t CHUNK_SIZE;
53 
54  static ptr load(const std::string &file_name);
55  static ptr cast(matrix_store::ptr store);
56  static const_ptr cast(matrix_store::const_ptr store);
57 
58  static ptr create(size_t nrow, size_t ncol, matrix_layout_t layout,
59  const scalar_type &type, int num_nodes);
60 
61  mem_matrix_store(size_t nrow, size_t ncol, const scalar_type &type);
62 
63  virtual std::unordered_map<size_t, size_t> get_underlying_mats() const {
64  // TODO for now, we assume that an in-mem matrix doesn't have
65  // underlying matrix.
66  return std::unordered_map<size_t, size_t>();
67  }
68  virtual std::string get_name() const {
69  return (boost::format("mem_mat-%1%(%2%,%3%)") % mat_id % get_num_rows()
70  % get_num_cols()).str();
71  }
72 
73  virtual void reset_data();
74  virtual void set_data(const set_operate &op);
75 
76  virtual const char *get(size_t row, size_t col) const = 0;
77  virtual char *get(size_t row, size_t col) = 0;
78  virtual const char *get_row(size_t row) const {
79  return NULL;
80  }
81  virtual char *get_row(size_t row) {
82  return NULL;
83  }
84  virtual const char *get_col(size_t col) const {
85  return NULL;
86  }
87  virtual char *get_col(size_t col) {
88  return NULL;
89  }
90  virtual const char *get_rows(size_t row_start, size_t row_end) const {
91  return NULL;
92  }
93  virtual char *get_rows(size_t row_start, size_t row_end) {
94  return NULL;
95  }
96 
97  virtual async_cres_t get_portion_async(size_t start_row, size_t start_col,
98  size_t num_rows, size_t num_cols,
99  std::shared_ptr<portion_compute> compute) const {
100  return async_cres_t(true,
101  get_portion(start_row, start_col, num_rows, num_cols));
102  }
103  virtual async_res_t get_portion_async(size_t start_row, size_t start_col,
104  size_t num_rows, size_t num_cols,
105  std::shared_ptr<portion_compute> compute) {
106  return async_res_t(true,
107  get_portion(start_row, start_col, num_rows, num_cols));
108  }
109  virtual void write_portion_async(
110  std::shared_ptr<const local_matrix_store> portion,
111  off_t start_row, off_t start_col) {
112  // TODO
113  assert(0);
114  }
115 
116  virtual bool write2file(const std::string &file_name) const = 0;
117 
118  virtual std::pair<size_t, size_t> get_portion_size() const {
119  if (is_wide())
120  return std::pair<size_t, size_t>(get_num_rows(), CHUNK_SIZE);
121  else
122  return std::pair<size_t, size_t>(CHUNK_SIZE, get_num_cols());
123  }
124 
125  template<class T>
126  T get(size_t row, size_t col) const {
127  return *(const T *) get(row, col);
128  }
129  template<class T>
130  void set(size_t row, size_t col, T val) {
131  *(T *) get(row, col) = val;
132  }
133 };
134 
135 /*
136  * This represents a column-major matrix. All columns are stored
137  * in contiguous memory.
138  */
139 class mem_col_matrix_store: public mem_matrix_store
140 {
141  raw_data_array data;
142 
143  mem_col_matrix_store(size_t nrow, size_t ncol,
144  const scalar_type &type): mem_matrix_store(nrow, ncol, type) {
145  if (nrow * ncol > 0)
146  data = raw_data_array(nrow * ncol * type.get_size());
147  }
148 protected:
149  mem_col_matrix_store(size_t nrow, size_t ncol, const scalar_type &type,
150  const raw_data_array &data): mem_matrix_store(nrow, ncol,
151  type) {
152  this->data = data;
153  }
154 public:
155  typedef std::shared_ptr<mem_col_matrix_store> ptr;
156  typedef std::shared_ptr<const mem_col_matrix_store> const_ptr;
157 
158  static ptr create(const raw_data_array &data, size_t nrow, size_t ncol,
159  const scalar_type &type) {
160  return ptr(new mem_col_matrix_store(nrow, ncol, type, data));
161  }
162 
163  static ptr create(size_t nrow, size_t ncol, const scalar_type &type) {
164  return ptr(new mem_col_matrix_store(nrow, ncol, type));
165  }
166 
167  static const_ptr cast(matrix_store::const_ptr store);
168  static ptr cast(matrix_store::ptr store);
169 
170  const raw_data_array &get_data() const {
171  return data;
172  }
173 
174  virtual const char *get_col(size_t col) const {
175  return data.get_raw() + col * get_num_rows() * get_entry_size();
176  }
177  virtual char *get_col(size_t col) {
178  return data.get_raw() + col * get_num_rows() * get_entry_size();
179  }
180 
181  virtual const char *get(size_t row, size_t col) const {
182  return get_col(col) + row * get_entry_size();
183  }
184  virtual char *get(size_t row, size_t col) {
185  return get_col(col) + row * get_entry_size();
186  }
187 
188  virtual std::shared_ptr<const local_matrix_store> get_portion(
189  size_t start_row, size_t start_col, size_t num_rows,
190  size_t num_cols) const;
191  virtual std::shared_ptr<local_matrix_store> get_portion(
192  size_t start_row, size_t start_col, size_t num_rows,
193  size_t num_cols);
194 
195  virtual matrix_store::const_ptr transpose() const;
196 
197  virtual matrix_store::const_ptr get_cols(const std::vector<off_t> &idxs) const;
198  virtual matrix_store::const_ptr get_rows(const std::vector<off_t> &idxs) const;
199 
200  virtual std::shared_ptr<const vec_store> get_col_vec(off_t idx) const;
201  virtual std::shared_ptr<const vec_store> get_row_vec(off_t idx) const;
202 
203  virtual matrix_layout_t store_layout() const {
204  return matrix_layout_t::L_COL;
205  }
206  virtual bool write2file(const std::string &file_name) const;
207 };
208 
209 /*
210  * This represents a row-major matrix. All rows are stored in contiguous
211  * memory.
212  */
213 class mem_row_matrix_store: public mem_matrix_store
214 {
215  raw_data_array data;
216 
217  mem_row_matrix_store(size_t nrow, size_t ncol,
218  const scalar_type &type): mem_matrix_store(nrow, ncol, type) {
219  if (nrow * ncol > 0)
220  data = raw_data_array(nrow * ncol * type.get_size());
221  }
222 protected:
223  mem_row_matrix_store(size_t nrow, size_t ncol, const scalar_type &type,
224  const raw_data_array &data): mem_matrix_store(nrow, ncol,
225  type) {
226  this->data = data;
227  }
228 public:
229  typedef std::shared_ptr<mem_row_matrix_store> ptr;
230  typedef std::shared_ptr<const mem_row_matrix_store> const_ptr;
231 
232  static ptr create(const raw_data_array &data, size_t nrow, size_t ncol,
233  const scalar_type &type) {
234  return ptr(new mem_row_matrix_store(nrow, ncol, type, data));
235  }
236 
237  static ptr create(size_t nrow, size_t ncol, const scalar_type &type) {
238  return ptr(new mem_row_matrix_store(nrow, ncol, type));
239  }
240 
241  static ptr cast(matrix_store::ptr store);
242  static const_ptr cast(matrix_store::const_ptr store);
243 
244  const raw_data_array &get_data() const {
245  return data;
246  }
247 
248  virtual const char *get_row(size_t row) const {
249  return data.get_raw() + row * get_num_cols() * get_entry_size();
250  }
251 
252  virtual char *get_row(size_t row) {
253  return data.get_raw() + row * get_num_cols() * get_entry_size();
254  }
255  virtual const char *get_rows(size_t row_start, size_t row_end) const {
256  return get_row(row_start);
257  }
258  virtual char *get_rows(size_t row_start, size_t row_end) {
259  return get_row(row_start);
260  }
261 
262  virtual const char *get(size_t row, size_t col) const {
263  return get_row(row) + col * get_entry_size();
264  }
265  virtual char *get(size_t row, size_t col) {
266  return get_row(row) + col * get_entry_size();
267  }
268 
269  virtual std::shared_ptr<const local_matrix_store> get_portion(
270  size_t start_row, size_t start_col, size_t num_rows,
271  size_t num_cols) const;
272  virtual std::shared_ptr<local_matrix_store> get_portion(
273  size_t start_row, size_t start_col, size_t num_rows,
274  size_t num_cols);
275 
276  virtual matrix_store::const_ptr transpose() const;
277 
278  virtual matrix_store::const_ptr get_rows(const std::vector<off_t> &idxs) const;
279  virtual std::shared_ptr<const vec_store> get_col_vec(off_t idx) const;
280  virtual std::shared_ptr<const vec_store> get_row_vec(off_t idx) const;
281 
282  virtual matrix_layout_t store_layout() const {
283  return matrix_layout_t::L_ROW;
284  }
285  virtual bool write2file(const std::string &file_name) const;
286 };
287 
288 /*
289  * This matrix contains a few columns of a column-major matrix.
290  * The columns in this matrix isn't necessarily stored contiguously.
291  */
292 class mem_sub_col_matrix_store: public mem_col_matrix_store
293 {
294  std::shared_ptr<const std::vector<off_t> > orig_col_idxs;
295 
296  mem_sub_col_matrix_store(const mem_col_matrix_store &store,
297  std::shared_ptr<const std::vector<off_t> > col_idxs): mem_col_matrix_store(
298  store.get_num_rows(), col_idxs->size(), store.get_type(),
299  store.get_data()) {
300  this->orig_col_idxs = col_idxs;
301  }
302  mem_sub_col_matrix_store(const raw_data_array &data,
303  std::shared_ptr<const std::vector<off_t> > col_idxs, size_t nrow,
304  const scalar_type &type): mem_col_matrix_store(nrow, col_idxs->size(),
305  type, data) {
306  this->orig_col_idxs = col_idxs;
307  }
308 public:
309  static ptr create(const raw_data_array &data,
310  std::shared_ptr<const std::vector<off_t> > col_idxs, size_t nrow,
311  const scalar_type &type) {
312  return ptr(new mem_sub_col_matrix_store(data, col_idxs, nrow, type));
313  }
314 
315  /*
316  * The column indexes are the absolute index on the original column matrix.
317  */
318  static ptr create(const mem_col_matrix_store &store,
319  const std::vector<off_t> &abs_col_idxs) {
320  std::shared_ptr<std::vector<off_t> > idxs(new std::vector<off_t>());
321  *idxs = abs_col_idxs;
322  return ptr(new mem_sub_col_matrix_store(store, idxs));
323  }
324 
325  virtual char *get_col(size_t col) {
326  return mem_col_matrix_store::get_col(orig_col_idxs->at(col));
327  }
328 
329  virtual const char *get_col(size_t col) const {
330  return mem_col_matrix_store::get_col(orig_col_idxs->at(col));
331  }
332 
333  virtual std::shared_ptr<const local_matrix_store> get_portion(
334  size_t start_row, size_t start_col, size_t num_rows,
335  size_t num_cols) const;
336  virtual std::shared_ptr<local_matrix_store> get_portion(
337  size_t start_row, size_t start_col, size_t num_rows,
338  size_t num_cols);
339 
340  virtual matrix_store::const_ptr transpose() const;
341 
342  virtual matrix_store::const_ptr get_cols(const std::vector<off_t> &idxs) const;
343 
344  virtual std::shared_ptr<const vec_store> get_col_vec(off_t idx) const;
345  virtual std::shared_ptr<const vec_store> get_row_vec(off_t idx) const;
346 };
347 
348 /*
349  * This matrix contains a few rows of a row-major matrix.
350  * The rows in this matrix isn't necessarily stored contiguously.
351  */
352 class mem_sub_row_matrix_store: public mem_row_matrix_store
353 {
354  std::shared_ptr<const std::vector<off_t> > orig_row_idxs;
355 
356  mem_sub_row_matrix_store(const mem_row_matrix_store &store,
357  std::shared_ptr<const std::vector<off_t> > row_idxs): mem_row_matrix_store(
358  row_idxs->size(), store.get_num_cols(), store.get_type(),
359  store.get_data()) {
360  this->orig_row_idxs = row_idxs;
361  }
362  mem_sub_row_matrix_store(const raw_data_array &data,
363  std::shared_ptr<const std::vector<off_t> > row_idxs, size_t ncol,
364  const scalar_type &type): mem_row_matrix_store(row_idxs->size(),
365  ncol, type, data) {
366  this->orig_row_idxs = row_idxs;
367  }
368 public:
369  static ptr create(const raw_data_array &data,
370  std::shared_ptr<const std::vector<off_t> > row_idxs, size_t ncol,
371  const scalar_type &type) {
372  return ptr(new mem_sub_row_matrix_store(data, row_idxs, ncol, type));
373  }
374  /*
375  * The row indexes are the absolute index on the original row matrix.
376  */
377  static ptr create(const mem_row_matrix_store &store,
378  const std::vector<off_t> &abs_row_idxs) {
379  std::shared_ptr<std::vector<off_t> > idxs(new std::vector<off_t>());
380  *idxs = abs_row_idxs;
381  return ptr(new mem_sub_row_matrix_store(store, idxs));
382  }
383 
384  virtual char *get_row(size_t row) {
385  return mem_row_matrix_store::get_row(orig_row_idxs->at(row));
386  }
387 
388  virtual const char *get_row(size_t row) const {
389  return mem_row_matrix_store::get_row(orig_row_idxs->at(row));
390  }
391 
392  virtual std::shared_ptr<const local_matrix_store> get_portion(
393  size_t start_row, size_t start_col, size_t num_rows,
394  size_t num_cols) const;
395  virtual std::shared_ptr<local_matrix_store> get_portion(
396  size_t start_row, size_t start_col, size_t num_rows,
397  size_t num_cols);
398 
399  virtual matrix_store::const_ptr transpose() const;
400 
401  virtual matrix_store::const_ptr get_rows(const std::vector<off_t> &idxs) const;
402  virtual std::shared_ptr<const vec_store> get_col_vec(off_t idx) const;
403  virtual std::shared_ptr<const vec_store> get_row_vec(off_t idx) const;
404 };
405 
406 }
407 
408 }
409 
410 #endif