FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
matrix_io.h
1 #ifndef __MATRIX_IO_H__
2 #define __MATRIX_IO_H__
3 
4 /*
5  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
6  * Written by Da Zheng (zhengda1936@gmail.com)
7  *
8  * This file is part of FlashMatrix.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include <stdlib.h>
24 
25 #include <memory>
26 
27 #include "io_request.h"
28 
29 #include "vertex.h"
30 #include "sparse_matrix_format.h"
31 
32 namespace fm
33 {
34 
35 class matrix_loc
36 {
37  std::pair<off_t, off_t> pair;
38 public:
39  matrix_loc(off_t row_idx, off_t col_idx): pair(row_idx, col_idx) {
40  }
41 
42  off_t get_row_idx() const {
43  return pair.first;
44  }
45 
46  off_t get_col_idx() const {
47  return pair.second;
48  }
49 };
50 
51 /*
52  * This represents a matrix block that we want to access. This data structure
53  * maintains the mapping of the matrix block to a data region on disks.
54  */
55 class matrix_io
56 {
57  matrix_loc top_left;
58  size_t num_rows;
59  size_t num_cols;
60 
61  safs::data_loc_t loc;
62  size_t size;
63 public:
64  matrix_io(): top_left(-1, -1) {
65  num_rows = 0;
66  num_cols = 0;
67  size = 0;
68  }
69 
70  matrix_io(const matrix_loc &_top_left, size_t num_rows,
71  size_t num_cols, safs::data_loc_t loc, size_t size): top_left(_top_left) {
72  this->num_rows = num_rows;
73  this->num_cols = num_cols;
74  this->loc = loc;
75  this->size = size;
76  }
77 
78  const safs::data_loc_t &get_loc() const {
79  return loc;
80  }
81 
82  size_t get_size() const {
83  return size;
84  }
85 
86  const matrix_loc &get_top_left() const {
87  return top_left;
88  }
89 
90  size_t get_num_rows() const {
91  return num_rows;
92  }
93 
94  size_t get_num_cols() const {
95  return num_cols;
96  }
97 
98  bool is_valid() const {
99  return loc.get_offset() >= 0;
100  }
101 };
102 
103 class sparse_matrix;
104 
105 /*
106  * This represents a minimal row block that we can access from disks.
107  * It's used for the matrix partitioned in one dimension.
108  */
109 class row_block
110 {
111  off_t off;
112 public:
113  row_block(off_t off) {
114  this->off = off;
115  }
116 
117  off_t get_offset() const {
118  return off;
119  }
120 };
121 
122 class SpM_2d_index;
123 
124 /*
125  * This maps row blocks to different I/O generators. It also defines how
126  * row blocks are distributed across threads.
127  */
128 class row_block_mapper
129 {
130 public:
131  /*
132  * A range of row blocks that are accessed together.
133  */
134  struct rb_range
135  {
136  // The index of the row block in array.
137  off_t idx;
138  // The number of row blocks in the range.
139  size_t num;
140  };
141 private:
142  std::vector<rb_range> ranges;
143 
144  void init(size_t num_rbs, size_t range_size);
145 public:
146  row_block_mapper(const std::vector<row_block> &rblocks, size_t range_size) {
147  // The last row block in the vector indicates the end of the row block
148  // vector, so the true number of row blocks is one fewer.
149  size_t num_rbs = rblocks.size() - 1;
150  init(num_rbs, range_size);
151  }
152  row_block_mapper(const SpM_2d_index &index, size_t range_size);
153 
154  size_t get_num_ranges() const {
155  return ranges.size();
156  }
157 
158  rb_range get_range(off_t idx) const {
159  return ranges[idx];
160  }
161 };
162 
163 /*
164  * This class generates I/O accesses for a worker thread.
165  * It defines which matrix blocks are accessed by the current worker thread.
166  * An I/O generator is referenced by multiple threads, so all of its methods
167  * need to be thread-safe.
168  */
169 class matrix_io_generator
170 {
171 public:
172  typedef std::shared_ptr<matrix_io_generator> ptr;
173 
174  /*
175  * This generates an I/O generator that accesses a matrix by rows.
176  */
177  static matrix_io_generator::ptr create(
178  const std::vector<row_block> &_blocks, size_t tot_num_rows,
179  size_t tot_num_cols, int file_id, const row_block_mapper &mapper);
180  /*
181  * This generates an I/O generator that accesses a matrix partitioned
182  * into blocks.
183  */
184  static matrix_io_generator::ptr create(SpM_2d_index::ptr idx, int file_id,
185  size_t io_num_brows, size_t min_num_brows);
186 
187  // Get the next I/O access in the current worker thread.
188  virtual matrix_io get_next_io() = 0;
189  virtual bool has_next_io() = 0;
190 };
191 
192 }
193 
194 #endif
Definition: io_request.h:186
off_t get_offset() const
Definition: io_request.h:218