FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
data_frame.h
1 /*
2  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
3  * Written by Da Zheng (zhengda1936@gmail.com)
4  *
5  * This file is part of FlashMatrix.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 
20 #ifndef __DATA_FRAME_H__
21 #define __DATA_FRAME_H__
22 
23 #include <assert.h>
24 
25 #include <vector>
26 #include <string>
27 #include <memory>
28 #include <unordered_map>
29 
30 #include "vec_store.h"
31 
32 namespace fm
33 {
34 
35 class local_vec_store;
36 
37 typedef std::pair<std::string, detail::vec_store::ptr> named_vec_t;
38 typedef std::pair<std::string, detail::vec_store::const_ptr> named_cvec_t;
39 typedef std::vector<std::shared_ptr<const local_vec_store> > sub_data_frame;
40 
41 template<class T> class gr_apply_operate;
42 class vector_vector;
43 class data_frame;
44 
45 std::shared_ptr<data_frame> merge_data_frame(
46  const std::vector<std::shared_ptr<const data_frame> > &dfs, bool in_mem);
47 
54 {
55  std::vector<named_vec_t> named_vecs;
56  std::unordered_map<std::string, detail::vec_store::ptr> vec_map;
57 
58 protected:
59  void set_vec(size_t off, detail::vec_store::ptr vec) {
60  std::string name = named_vecs[off].first;
61  named_vecs[off].second = vec;
62  vec_map[name] = vec;
63  }
64 
65  data_frame() {
66  }
67 
68  data_frame(const std::vector<named_vec_t> &named_vecs);
69 public:
70  typedef std::shared_ptr<data_frame> ptr;
71  typedef std::shared_ptr<const data_frame> const_ptr;
72 
73  static ptr create() {
74  return ptr(new data_frame());
75  }
76 
77  static ptr create(const std::vector<named_vec_t> &named_vecs) {
78  return ptr(new data_frame(named_vecs));
79  }
80 
81  bool add_vec(const std::string &name, detail::vec_store::ptr vec);
82 
83  /*
84  * This method appends multiple data frames to this data frame.
85  * All data frames should have the same number of columns and the columns
86  * should have the same names.
87  */
88  bool append(std::vector<data_frame::ptr>::const_iterator begin,
89  std::vector<data_frame::ptr>::const_iterator end);
90  bool append(data_frame::ptr df);
91 
92  const std::string &get_vec_name(size_t off) const {
93  return named_vecs[off].first;
94  }
95 
96  const detail::vec_store &get_vec_ref(size_t off) const {
97  return *named_vecs[off].second;
98  }
99 
100  const detail::vec_store &get_vec_ref(const std::string &name) const {
101  auto it = vec_map.find(name);
102  assert(it != vec_map.end());
103  return *it->second;
104  }
105 
106  detail::vec_store::const_ptr get_vec(size_t off) const {
107  return named_vecs[off].second;
108  }
109 
110  detail::vec_store::const_ptr get_vec(const std::string &name) const {
111  auto it = vec_map.find(name);
112  if (it == vec_map.end())
113  return detail::vec_store::const_ptr();
114  else
115  return it->second;
116  }
117 
118  detail::vec_store::ptr get_vec(size_t off) {
119  return named_vecs[off].second;
120  }
121 
122  detail::vec_store::ptr get_vec(const std::string &name) {
123  auto it = vec_map.find(name);
124  if (it == vec_map.end())
125  return detail::vec_store::ptr();
126  else
127  return it->second;
128  }
129 
130  size_t get_num_vecs() const {
131  return named_vecs.size();
132  }
133 
134  size_t get_num_entries() const {
135  return named_vecs[0].second->get_length();
136  }
137 
141  virtual std::shared_ptr<vector_vector> groupby(const std::string &col_name,
142  const gr_apply_operate<sub_data_frame> &op) const;
143  /*
144  * This method sorts all rows in the data frame according to the give column.
145  */
146  data_frame::const_ptr sort(const std::string &col_name) const;
147  bool is_sorted(const std::string &col_name) const;
148 
149  data_frame::const_ptr shallow_copy() const;
150 
151  friend data_frame::ptr merge_data_frame(
152  const std::vector<data_frame::const_ptr> &dfs, bool in_mem);
153 };
154 
155 }
156 
157 #endif
virtual std::shared_ptr< vector_vector > groupby(const std::string &col_name, const gr_apply_operate< sub_data_frame > &op) const
Definition: data_frame.h:53