FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
vector.h
1 #ifndef __FM_VECTOR_H__
2 #define __FM_VECTOR_H__
3 
4 /*
5  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
6  * Written by Da Zheng (zhengda1936@gmail.com)
7  *
8  * This file is part of FlashMatrix.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 #include <memory>
23 
24 #include "generic_type.h"
25 #include "bulk_operate.h"
26 #include "vec_store.h"
27 #include "mem_vec_store.h"
28 
29 namespace fm
30 {
31 
32 class bulk_operate;
33 class data_frame;
34 class agg_operate;
35 class dense_matrix;
36 
37 class vector
38 {
39  detail::vec_store::const_ptr store;
40 
41  bool verify_groupby(const gr_apply_operate<local_vec_store> &op) const;
42 protected:
43  vector(detail::vec_store::const_ptr store) {
44  this->store = store;
45  }
46 public:
47  typedef std::shared_ptr<vector> ptr;
48  typedef std::shared_ptr<const vector> const_ptr;
49 
50  static ptr create(detail::vec_store::const_ptr store) {
51  return ptr(new vector(store));
52  }
53  static ptr create(size_t length, const scalar_type &type, int num_nodes,
54  bool in_mem, const set_vec_operate &op);
55 
56  ~vector() {
57  }
58 
59  const detail::vec_store &get_data() const {
60  return *store;
61  }
62 
63  detail::vec_store::const_ptr get_raw_store() const {
64  return store;
65  }
66 
67  template<class T>
68  std::vector<T> conv2std() const {
69  assert(is_type<T>());
70  std::vector<T> ret(get_length());
71  store->copy_to((char *) ret.data(), ret.size());
72  return ret;
73  }
74 
75  bool is_in_mem() const {
76  return store->is_in_mem();
77  }
78 
79  // Normally the entry size is the type size. But a vector may also
80  // contains vectors, and the entry size is 0, which is no longer
81  // the type size.
82  size_t get_entry_size() const {
83  return store->get_entry_size();
84  }
85  size_t get_length() const {
86  return store->get_length();
87  }
88 
89  template<class T>
90  bool is_type() const {
91  return store->get_type().get_type() == fm::get_type<T>();
92  }
93 
94  const scalar_type &get_type() const {
95  return store->get_type();
96  }
97 
98  bool is_sorted() const {
99  return store->is_sorted();
100  }
101 
102  bool equals(const vector &vec) const;
103 
104  vector::ptr sort() const;
105  std::shared_ptr<data_frame> sort_with_index() const;
106  std::shared_ptr<dense_matrix> conv2mat(size_t nrow, size_t ncol,
107  bool byrow) const;
108 
109  /*
110  * This is a general version of groupby. It requires sorting on the entire
111  * vector. If `with_val' is true, this method returns a data frame with two
112  * columns: the first column is a vector of unique values in the vector;
113  * the second column is a vector of vectors and contains the aggregation
114  * result for each unique value in the first column.
115  * If `with_val' is false, this method returns a data frame with only
116  * one column, which is a vector of vectors and contains the aggregation
117  * result for each unique value.
118  */
119  std::shared_ptr<data_frame> groupby(
120  const gr_apply_operate<local_vec_store> &op, bool with_val) const;
121  /*
122  * This version of groupby runs aggregation on each group. It only needs
123  * to scan the vector once. If `with_val' is true, this method returns
124  * a data frame with two columns: the first column is a vector of unique
125  * values in the vector; the second column is a vector of aggregation
126  * result for each unique value in the first column.
127  * If `with_val' is false, this method returns a data frame with only
128  * one column, which contains the aggregation result for each unique value.
129  */
130  std::shared_ptr<data_frame> groupby(
131  std::shared_ptr<const agg_operate> op, bool with_val) const;
132 
133  vector::ptr sapply(bulk_uoperate::const_ptr op) const;
134  scalar_variable::ptr aggregate(const bulk_operate &op) const;
135  scalar_variable::ptr dot_prod(const vector &vec) const;
136 
137  template<class T>
138  T max() const {
139  const bulk_operate &max_op = *get_type().get_basic_ops().get_op(
140  basic_ops::op_idx::MAX);
141  scalar_variable::ptr res = aggregate(max_op);
142  return *(T *) res->get_raw();
143  }
144  template<class T>
145  T sum() const {
146  const bulk_operate &sum_op = *get_type().get_basic_ops().get_op(
147  basic_ops::op_idx::ADD);
148  scalar_variable::ptr res = aggregate(sum_op);
149  return *(T *) res->get_raw();
150  }
151 
152  bool export2(FILE *f) const;
153 };
154 
155 /*
156  * Create a sequence of values in [start, end]. `end' is inclusive.
157  */
158 template<class EntryType>
159 vector::ptr create_seq_vector(EntryType start, EntryType end, EntryType stride,
160  int num_nodes = -1, bool in_mem = true)
161 {
162  detail::vec_store::ptr store = detail::create_seq_vec_store(start, end, stride,
163  num_nodes, in_mem);
164  if (store == NULL)
165  return vector::ptr();
166  return vector::create(store);
167 }
168 
169 /*
170  * Create a vector filled with a constant value.
171  */
172 template<class EntryType>
173 vector::ptr create_rep_vector(size_t length, EntryType initv,
174  int num_nodes = -1, bool in_mem = true)
175 {
176  detail::vec_store::ptr store = detail::create_rep_vec_store(length, initv,
177  num_nodes, in_mem);
178  if (store == NULL)
179  return vector::ptr();
180  return vector::create(store);
181 }
182 
183 }
184 
185 #endif