FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
matrix_header.h
1 #ifndef __MATRIX_HEADER_H__
2 #define __MATRIX_HEADER_H__
3 
4 /*
5  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
6  * Written by Da Zheng (zhengda1936@gmail.com)
7  *
8  * This file is part of FlashMatrix.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include <stdlib.h>
24 #include <assert.h>
25 #include <string.h>
26 #include <math.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 
30 #include "generic_type.h"
31 
32 namespace fm
33 {
34 
35 enum matrix_type
36 {
37  VECTOR,
38  DENSE,
39  SPARSE,
40 };
41 
42 enum matrix_layout_t
43 {
44  L_COL,
45  L_ROW,
46  L_ROW_2D,
47  // It indicates that the layout isn't defined.
48  L_NONE,
49 };
50 
51 static const size_t block_max_num_rows = ((size_t) std::numeric_limits<int16_t>::max()) + 1;
52 static const size_t block_max_num_cols = ((size_t) std::numeric_limits<int16_t>::max()) + 1;
53 
54 class block_2d_size
55 {
56  uint16_t nrow_log;
57  uint16_t ncol_log;
58 public:
59  block_2d_size() {
60  nrow_log = 0;
61  ncol_log = 0;
62  }
63 
64  block_2d_size(size_t num_rows, size_t num_cols) {
65  assert(num_rows <= block_max_num_rows);
66  assert(num_cols <= block_max_num_cols);
67  nrow_log = log2(num_rows);
68  ncol_log = log2(num_cols);
69  assert((1U << nrow_log) == num_rows);
70  assert((1U << ncol_log) == num_cols);
71  }
72 
73  size_t get_nrow_log() const {
74  return nrow_log;
75  }
76 
77  size_t get_ncol_log() const {
78  return ncol_log;
79  }
80 
81  size_t get_nrow_mask() const {
82  return get_num_rows() - 1;
83  }
84 
85  size_t get_ncol_mask() const {
86  return get_num_cols() - 1;
87  }
88 
89  size_t get_num_rows() const {
90  return 1 << nrow_log;
91  }
92 
93  size_t get_num_cols() const {
94  return 1 << ncol_log;
95  }
96 
97  size_t cal_num_block_rows(size_t tot_num_rows) const {
98  return ceil(((double) tot_num_rows) / get_num_rows());
99  }
100 };
101 
102 /*
103  * The matrix header contains the basic information about the matrix
104  * when the matrix is stored on disks.
105  */
106 class matrix_header
107 {
108  static const int64_t MAGIC_NUMBER = 0x123456789FFFFFFL;
109  static const int CURR_VERSION = 2;
110 
111  union {
112  struct info {
113  int64_t magic_number;
114  int version_number;
115  matrix_type type;
116  size_t entry_size;
117  size_t nrows;
118  size_t ncols;
119  matrix_layout_t layout;
120  prim_type data_type;
121  // These two fields are valid when the matrix layout is
122  // 2D partitioned.
123  size_t block_2d_height;
124  size_t block_2d_width;
125  // It doesn't include the entry type, which should be determined by
126  // users at runtime.
127  } d;
128 
129  char page[4096];
130  } u;
131 
132  void init(matrix_type mat_type, size_t entry_size, size_t nrows,
133  size_t ncols, matrix_layout_t layout, prim_type data_type);
134 public:
135  static size_t get_header_size() {
136  return sizeof(matrix_header);
137  }
138 
139  matrix_header() {
140  memset(u.page, 0, sizeof(u.page));
141  }
142 
143  matrix_header(matrix_type mat_type, size_t entry_size, size_t nrows,
144  size_t ncols, matrix_layout_t layout, prim_type data_type) {
145  init(mat_type, entry_size, nrows, ncols, layout, data_type);
146  }
147 
148  matrix_header(matrix_type mat_type, size_t entry_size, size_t nrows,
149  size_t ncols, matrix_layout_t layout, prim_type data_type,
150  const block_2d_size &block_size);
151 
152  matrix_type get_matrix_type() const {
153  return u.d.type;
154  }
155 
156  bool is_sparse() const {
157  return get_matrix_type() == matrix_type::SPARSE;
158  }
159 
160  size_t get_entry_size() const {
161  return u.d.entry_size;
162  }
163 
164  size_t get_num_rows() const {
165  return u.d.nrows;
166  }
167 
168  size_t get_num_cols() const {
169  return u.d.ncols;
170  }
171 
172  matrix_layout_t get_layout() const {
173  return u.d.layout;
174  }
175 
176  const scalar_type &get_data_type() const {
177  return get_scalar_type(u.d.data_type);
178  }
179 
180  bool is_matrix_file() const {
181  return u.d.magic_number == MAGIC_NUMBER;
182  }
183 
184  bool is_right_version() const {
185  return u.d.version_number == CURR_VERSION;
186  }
187 
188  void verify() const;
189 
190  block_2d_size get_2d_block_size() const {
191  return block_2d_size(u.d.block_2d_height, u.d.block_2d_width);
192  }
193 };
194 
195 }
196 
197 #endif
prim_type
Definition: generic_type.h:37