FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
EM_dense_matrix.h
1 #ifndef __EM_DENSE_MATRIX_H__
2 #define __EM_DENSE_MATRIX_H__
3 
4 /*
5  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
6  * Written by Da Zheng (zhengda1936@gmail.com)
7  *
8  * This file is part of FlashMatrix.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include <memory>
24 #include <boost/format.hpp>
25 
26 #include "log.h"
27 
28 #include "bulk_operate.h"
29 #include "matrix_store.h"
30 #include "EM_object.h"
31 #include "mem_worker_thread.h"
32 
33 namespace fm
34 {
35 
36 namespace detail
37 {
38 
39 class local_matrix_store;
40 class mem_matrix_store;
41 
42 class EM_matrix_store: public matrix_store, public EM_object
43 {
44  /*
45  * The difference between the two identifiers are:
46  * `mat_id' identifies the matrix data structure. Whenever the matrix
47  * is shallow copied or transposed, `mat_id' changes.
48  * `data_id' identifies the content in a matrix.
49  * So when a matrix is transposed or shallow copied, it should share
50  * the same data id.
51  */
52  const size_t mat_id;
53  const size_t data_id;
54 
55  matrix_layout_t layout;
56  file_holder::ptr holder;
57  io_set::ptr ios;
58 
59  /*
60  * This indicates whether or not we cache a portion in each worker thread.
61  * By default, this is enabled.
62  */
63  bool cache_portion;
64 
65  /*
66  * These two fields are used for sub matrix.
67  * They indicates the actual number of rows and columns stored on disks.
68  * In contrast, get_num_rows() and get_num_cols() are #rows and columns
69  * exposed to users.
70  */
71  size_t orig_num_rows;
72  size_t orig_num_cols;
73 
74  size_t get_orig_num_rows() const {
75  return orig_num_rows;
76  }
77 
78  size_t get_orig_num_cols() const {
79  return orig_num_cols;
80  }
81 
82  EM_matrix_store(size_t nrow, size_t ncol, matrix_layout_t layout,
83  const scalar_type &type, safs::safs_file_group::ptr group);
84  EM_matrix_store(file_holder::ptr holder, io_set::ptr ios, size_t nrow,
85  size_t ncol, size_t orig_nrow, size_t orig_ncol,
86  matrix_layout_t layout, const scalar_type &type, size_t _data_id);
87 public:
88  static const size_t CHUNK_SIZE;
89 
90  typedef std::shared_ptr<EM_matrix_store> ptr;
91  typedef std::shared_ptr<const EM_matrix_store> const_ptr;
92 
93  static ptr create(const std::string &mat_file);
94 
95  static ptr create(size_t nrow, size_t ncol, matrix_layout_t layout,
96  const scalar_type &type, safs::safs_file_group::ptr group = NULL) {
97  return ptr(new EM_matrix_store(nrow, ncol, layout, type, group));
98  }
99 
100  static ptr cast(matrix_store::ptr store) {
101  return std::dynamic_pointer_cast<EM_matrix_store>(store);
102  }
103 
104  static const_ptr cast(matrix_store::const_ptr store) {
105  return std::dynamic_pointer_cast<const EM_matrix_store>(store);
106  }
107 
108  virtual void set_cache_portion(bool cache_portion) {
109  this->cache_portion = cache_portion;
110  }
111 
112  bool is_cache_portion() const {
113  return cache_portion;
114  }
115 
116  virtual std::unordered_map<size_t, size_t> get_underlying_mats() const {
117  std::unordered_map<size_t, size_t> ret;
118  ret.insert(std::pair<size_t, size_t>(data_id,
119  get_num_rows() * get_num_cols()));
120  return ret;
121  }
122  virtual std::string get_name() const {
123  return (boost::format("EM_mat-%1%(%2%,%3%)") % mat_id % get_num_rows()
124  % get_num_cols()).str();
125  }
126 
127  virtual void reset_data();
128  virtual void set_data(const set_operate &op);
129 
130  virtual matrix_layout_t store_layout() const {
131  return layout;
132  }
133 
134  virtual matrix_store::const_ptr transpose() const;
135 
136  virtual std::vector<safs::io_interface::ptr> create_ios() const;
137 
138  virtual std::shared_ptr<const local_matrix_store> get_portion(
139  size_t start_row, size_t start_col, size_t num_rows,
140  size_t num_cols) const;
141  virtual std::shared_ptr<local_matrix_store> get_portion(
142  size_t start_row, size_t start_col, size_t num_rows,
143  size_t num_cols);
144 
145  virtual std::pair<size_t, size_t> get_portion_size() const;
146  virtual async_cres_t get_portion_async(size_t start_row, size_t start_col,
147  size_t num_rows, size_t num_cols,
148  portion_compute::ptr compute) const;
149  virtual async_res_t get_portion_async(size_t start_row, size_t start_col,
150  size_t num_rows, size_t num_cols,
151  portion_compute::ptr compute);
152  virtual void write_portion_async(
153  std::shared_ptr<const local_matrix_store> portion,
154  off_t start_row, off_t start_col);
155 
156  virtual matrix_store::const_ptr get_cols(
157  const std::vector<off_t> &idxs) const;
158  virtual matrix_store::const_ptr get_rows(
159  const std::vector<off_t> &idxs) const;
160  virtual std::shared_ptr<const vec_store> get_col_vec(off_t idx) const;
161  virtual std::shared_ptr<const vec_store> get_row_vec(off_t idx) const;
162 
163  /*
164  * Set this matrix persistent in SAFS, so that even if there isn't
165  * a reference to the matrix, its data still stored in SAFS.
166  * This method isn't thread-safe.
167  */
168  bool set_persistent(const std::string &name) const;
169  /*
170  * Unset the persistency of the matrix in SAFS, so that the matrix file
171  * is deleted after all references to the matrix are gone.
172  */
173  void unset_persistent() const;
174 };
175 
176 }
177 
178 }
179 
180 #endif