FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
graph.h
1 #ifndef __GRAPH_H__
2 #define __GRAPH_H__
3 
4 /*
5  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
6  * Written by Da Zheng (zhengda1936@gmail.com)
7  *
8  * This file is part of FlashGraph.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include <string>
24 #include <set>
25 
26 #include "native_file.h"
27 
28 #include "vertex_index.h"
29 #include "vertex.h"
30 
31 namespace fg
32 {
33 
34 class graph
35 {
36 public:
37  typedef std::shared_ptr<graph> ptr;
38 
39  virtual ~graph() {
40  }
41 
42  virtual bool is_directed() const = 0;
43  virtual const in_mem_vertex &get_vertex(vertex_id_t id) const = 0;
44  virtual in_mem_vertex &get_vertex(vertex_id_t id) = 0;
45  virtual void add_vertex(const in_mem_vertex &v) = 0;
46  virtual void get_all_vertices(std::vector<vertex_id_t> &ids) const = 0;
47  virtual void dump(const std::string &index_file,
48  const std::string &graph_file) = 0;
49  virtual void dump_as_edge_list(const std::string &file) const {
50  // TODO
51  ABORT_MSG("dump_as_edge_list isn't implemented");
52  }
53  virtual size_t get_num_edges() const = 0;
54  virtual size_t get_num_vertices() const = 0;
55  virtual bool has_edge_data() const = 0;
56  virtual size_t get_num_non_empty_vertices() const = 0;
57  virtual void print() const = 0;
58  virtual void check_ext_graph(const std::string &index_file,
59  const std::string &adj_file) const = 0;
60  // Merge the graph to this graph.
61  virtual void merge(graph::ptr g) {
62  ABORT_MSG("merge isn't implemented");
63  }
64 };
65 
66 class in_mem_graph;
67 
68 class in_mem_subgraph: public graph
69 {
70 protected:
71  size_t num_non_empty;
72  bool has_data;
73  size_t num_edges;
74 
75  in_mem_subgraph(bool has_data) {
76  num_edges = 0;
77  num_non_empty = 0;
78  this->has_data = has_data;
79  }
80 public:
81  typedef std::shared_ptr<in_mem_subgraph> ptr;
82 
83  static ptr create(graph_type type, bool has_data);
84 
85  virtual void print() const {
86  ABORT_MSG("print isn't implemented");
87  }
88  virtual void check_ext_graph(const std::string &index_file,
89  const std::string &adj_file) const {
90  ABORT_MSG("check_ext_graph isn't implemented");
91  }
92  virtual void dump(const std::string &index_file,
93  const std::string &graph_file) {
94  ABORT_MSG("dump isn't implemented");
95  }
96  virtual void dump_as_edge_list(const std::string &file) const {
97  // TODO
98  ABORT_MSG("dump_as_edge_list isn't implemented");
99  }
100  virtual size_t get_num_edges() const {
101  return num_edges;
102  }
103  virtual bool has_edge_data() const {
104  return has_data;
105  }
106  virtual size_t get_num_non_empty_vertices() const {
107  return num_non_empty;
108  }
109  /*
110  * This compresses the subgraph and generates a graph whose vertex IDs
111  * are adjacent to each other.
112  */
113  std::pair<std::shared_ptr<in_mem_graph>, std::shared_ptr<vertex_index> > serialize(
114  const std::string &name, bool compress) const;
115  void compress();
116 
117  // Merge the graph to this graph.
118  virtual void merge(graph::ptr g) {
119  std::vector<vertex_id_t> vertices;
120  g->get_all_vertices(vertices);
121  for (size_t i = 0; i < vertices.size(); i++) {
122  vertex_id_t id = vertices[i];
123  add_vertex(g->get_vertex(id));
124  }
125  }
126 };
127 
128 template<class edge_data_type = empty_data>
129 class in_mem_undirected_subgraph: public in_mem_subgraph
130 {
131  typedef std::map<vertex_id_t, in_mem_undirected_vertex<edge_data_type> > vmap_t;
132  vmap_t vertices;
133 
134  in_mem_undirected_subgraph(bool has_data): in_mem_subgraph(has_data) {
135  }
136 public:
137  static in_mem_subgraph::ptr create(bool has_data) {
138  return in_mem_subgraph::ptr(new in_mem_undirected_subgraph<edge_data_type>(
139  has_data));
140  }
141 
142  virtual bool is_directed() const {
143  return false;
144  }
145 
146  virtual void add_vertex(const in_mem_vertex &v) {
147  const in_mem_undirected_vertex<edge_data_type> &un_v
148  = (const in_mem_undirected_vertex<edge_data_type> &) v;
149  vertices.insert(typename vmap_t::value_type(v.get_id(), un_v));
150  num_edges += un_v.get_num_edges();
151  if (un_v.get_num_edges() > 0)
152  num_non_empty++;
153  }
154 
155  virtual const in_mem_vertex &get_vertex(vertex_id_t id) const {
156  typename vmap_t::const_iterator it = vertices.find(id);
157  assert(it != vertices.end());
158  return it->second;
159  }
160 
161  virtual in_mem_vertex &get_vertex(vertex_id_t id) {
162  typename vmap_t::iterator it = vertices.find(id);
163  assert(it != vertices.end());
164  return it->second;
165  }
166 
167  virtual void get_all_vertices(std::vector<vertex_id_t> &ids) const {
168  for (typename vmap_t::const_iterator it = vertices.begin();
169  it != vertices.end(); it++)
170  ids.push_back(it->first);
171  }
172  virtual size_t get_num_vertices() const {
173  return vertices.size();
174  }
175 };
176 
177 template<class edge_data_type = empty_data>
178 class in_mem_directed_subgraph: public in_mem_subgraph
179 {
180  typedef std::map<vertex_id_t, in_mem_directed_vertex<edge_data_type> > vmap_t;
181  vmap_t vertices;
182 
183  in_mem_directed_subgraph(bool has_data): in_mem_subgraph(has_data) {
184  }
185 public:
186  static in_mem_subgraph::ptr create(bool has_data) {
187  return in_mem_subgraph::ptr(
188  new in_mem_directed_subgraph<edge_data_type>(has_data));
189  }
190 
191  virtual bool is_directed() const {
192  return true;
193  }
194 
195  virtual void add_vertex(const in_mem_vertex &v) {
196  const in_mem_directed_vertex<edge_data_type> &dv
197  = (const in_mem_directed_vertex<edge_data_type> &) v;
198  vertices.insert(typename vmap_t::value_type(v.get_id(), dv));
199  num_edges += dv.get_num_edges(edge_type::IN_EDGE);
200  if (dv.get_num_edges(edge_type::BOTH_EDGES) > 0)
201  num_non_empty++;
202  }
203 
204  virtual const in_mem_vertex &get_vertex(vertex_id_t id) const {
205  typename vmap_t::const_iterator it = vertices.find(id);
206  assert(it != vertices.end());
207  return it->second;
208  }
209 
210  virtual in_mem_vertex &get_vertex(vertex_id_t id) {
211  typename vmap_t::iterator it = vertices.find(id);
212  assert(it != vertices.end());
213  return it->second;
214  }
215 
216  virtual void get_all_vertices(std::vector<vertex_id_t> &ids) const {
217  for (typename vmap_t::const_iterator it = vertices.begin();
218  it != vertices.end(); it++)
219  ids.push_back(it->first);
220  }
221  virtual size_t get_num_vertices() const {
222  return vertices.size();
223  }
224 };
225 
226 static inline void unique_merge(const std::vector<vertex_id_t> &v1,
227  const std::vector<vertex_id_t> &v2, std::vector<vertex_id_t> &v)
228 {
229  std::vector<vertex_id_t>::const_iterator it1 = v1.begin();
230  std::vector<vertex_id_t>::const_iterator it2 = v2.begin();
231  while (it1 != v1.end() && it2 != v2.end()) {
232  if (*it1 > *it2) {
233  v.push_back(*it2);
234  it2++;
235  }
236  else if (*it1 < *it2) {
237  v.push_back(*it1);
238  it1++;
239  }
240  else {
241  v.push_back(*it1);
242  it1++;
243  it2++;
244  }
245  }
246 
247  while (it1 != v1.end()) {
248  v.push_back(*it1);
249  it1++;
250  }
251 
252  while (it2 != v2.end()) {
253  v.push_back(*it2);
254  it2++;
255  }
256 }
257 
258 }
259 
260 #endif