FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
scan_graph.h
1 #ifndef __SCAN_GRAPH_H__
2 #define __SCAN_GRAPH_H__
3 
4 /*
5  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
6  * Written by Da Zheng (zhengda1936@gmail.com)
7  *
8  * This file is part of FlashGraph.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include <memory>
24 
25 #include "graphlab/cuckoo_set_pow2.hpp"
26 #include "graph_engine.h"
27 
28 /*
29  * The edge has two attributes:
30  * The number of duplicated edges with the neighbor;
31  * The number of edges that contributes to the neighbor's neighborhood.
32  */
33 class attributed_neighbor
34 {
35  fg::vertex_id_t id;
36  int num_dups;
37 public:
38  attributed_neighbor() {
39  id = -1;
40  num_dups = 0;
41  }
42 
43  attributed_neighbor(fg::vertex_id_t id) {
44  this->id = id;
45  num_dups = 1;
46  }
47 
48  attributed_neighbor(fg::vertex_id_t id, int num_dups) {
49  this->id = id;
50  this->num_dups = num_dups;
51  }
52 
53  fg::vertex_id_t get_id() const {
54  return id;
55  }
56 
57  int get_num_dups() const {
58  return num_dups;
59  }
60 
61  bool operator<(const attributed_neighbor &e) const {
62  return this->id < e.id;
63  }
64 
65  bool operator==(fg::vertex_id_t id) const {
66  return this->id == id;
67  }
68 
69  bool operator==(const attributed_neighbor &e) const {
70  return this->id == e.id;
71  }
72 };
73 
74 template<class InputIterator1, class InputIterator2, class Skipper,
75  class Merger, class OutputIterator>
76 size_t unique_merge(InputIterator1 it1, InputIterator1 last1,
77  InputIterator2 it2, InputIterator2 last2, Skipper skip,
78  Merger merge, OutputIterator result)
79 {
80  OutputIterator result_begin = result;
81  while (it1 != last1 && it2 != last2) {
82  if (*it2 < *it1) {
83  typename std::iterator_traits<OutputIterator>::value_type v = *it2;
84  ++it2;
85  while (it2 != last2 && v == *it2) {
86  v = merge(v, *it2);
87  ++it2;
88  }
89  if (!skip(v))
90  *(result++) = v;
91  }
92  else if (*it1 < *it2) {
93  typename std::iterator_traits<OutputIterator>::value_type v = *it1;
94  ++it1;
95  while (it1 != last1 && v == *it1) {
96  v = merge(v, *it1);
97  ++it1;
98  }
99  if (!skip(v))
100  *(result++) = v;
101  }
102  else {
103  typename std::iterator_traits<OutputIterator>::value_type v = *it1;
104  v = merge(v, *it2);
105  ++it2;
106  while (it2 != last2 && v == *it2) {
107  v = merge(v, *it2);
108  ++it2;
109  }
110  ++it1;
111  while (it1 != last1 && v == *it1) {
112  v = merge(v, *it1);
113  ++it1;
114  }
115  if (!skip(v))
116  *(result++) = v;
117  }
118  }
119 
120  while (it1 != last1) {
121  typename std::iterator_traits<OutputIterator>::value_type v = *it1;
122  ++it1;
123  while (it1 != last1 && v == *it1) {
124  v = merge(v, *it1);
125  ++it1;
126  }
127  if (!skip(v))
128  *(result++) = v;
129  }
130 
131  while (it2 != last2) {
132  typename std::iterator_traits<OutputIterator>::value_type v = *it2;
133  ++it2;
134  while (it2 != last2 && v == *it2) {
135  v = merge(v, *it2);
136  ++it2;
137  }
138  if (!skip(v))
139  *(result++) = v;
140  }
141  return result - result_begin;
142 }
143 
144 class neighbor_list
145 {
146 public:
147  class index_entry
148  {
149  fg::vertex_id_t id;
150  uint32_t idx;
151  public:
152  index_entry() {
153  id = -1;
154  idx = -1;
155  }
156 
157  index_entry(fg::vertex_id_t id) {
158  this->id = id;
159  this->idx = -1;
160  }
161 
162  index_entry(fg::vertex_id_t id, uint32_t idx) {
163  this->id = id;
164  this->idx = idx;
165  }
166 
167  fg::vertex_id_t get_id() const {
168  return id;
169  }
170 
171  uint32_t get_idx() const {
172  return idx;
173  }
174 
175  bool operator==(const index_entry &e) const {
176  return id == e.get_id();
177  }
178  };
179 
180  class index_hash
181  {
182  boost::hash<fg::vertex_id_t> id_hash;
183  public:
184  size_t operator()(const index_entry &e) const {
185  return id_hash(e.get_id());
186  }
187  };
188 
189  typedef graphlab::cuckoo_set_pow2<index_entry, 3, size_t,
190  index_hash> edge_set_t;
191 
192 protected:
193  // The vertex Id that the neighbor list belongs to.
194  fg::vertex_id_t id;
195  std::vector<fg::vertex_id_t> id_list;
196  std::vector<int> num_dup_list;
197  edge_set_t *neighbor_set;
198 public:
199  class id_iterator: public std::iterator<std::random_access_iterator_tag, fg::vertex_id_t>
200  {
201  std::vector<fg::vertex_id_t>::const_iterator it;
202  public:
203  typedef typename std::iterator<std::random_access_iterator_tag,
204  fg::vertex_id_t>::difference_type difference_type;
205 
206  id_iterator() {
207  }
208 
209  id_iterator(const std::vector<fg::vertex_id_t> &v) {
210  it = v.begin();
211  }
212 
213  difference_type operator-(const id_iterator &it) const {
214  return this->it - it.it;
215  }
216 
217  fg::vertex_id_t operator*() const {
218  return *it;
219  }
220 
221  id_iterator &operator++() {
222  it++;
223  return *this;
224  }
225 
226  id_iterator operator++(int) {
227  id_iterator ret = *this;
228  it++;
229  return ret;
230  }
231 
232  bool operator==(const id_iterator &it) const {
233  return it.it == this->it;
234  }
235 
236  bool operator!=(const id_iterator &it) const {
237  return it.it != this->it;
238  }
239 
240  id_iterator &operator+=(size_t num) {
241  it += num;
242  return *this;
243  }
244  };
245 
246  neighbor_list(const fg::page_vertex &vertex,
247  const std::vector<attributed_neighbor> &neighbors) {
248  this->id = vertex.get_id();
249  size_t num_neighbors = neighbors.size();
250  id_list.resize(num_neighbors);
251  num_dup_list.resize(num_neighbors);
252  for (size_t i = 0; i < num_neighbors; i++) {
253  id_list[i] = neighbors[i].get_id();
254  num_dup_list[i] = neighbors[i].get_num_dups();
255  }
256  neighbor_set = NULL;
257  if (num_neighbors > 0) {
258  neighbor_set = new edge_set_t(index_entry(),
259  0, 2 * neighbors.size());
260  for (size_t i = 0; i < neighbors.size(); i++)
261  neighbor_set->insert(index_entry(neighbors[i].get_id(), i));
262  }
263  }
264 
265  virtual ~neighbor_list() {
266  if (neighbor_set)
267  delete neighbor_set;
268  }
269 
270  fg::vertex_id_t get_neighbor_id(size_t idx) const {
271  return id_list[idx];
272  }
273 
274  bool contains(fg::vertex_id_t id) const {
275  edge_set_t::const_iterator it = neighbor_set->find(id);
276  return it != neighbor_set->end();
277  }
278 
279  id_iterator get_id_begin() const {
280  return id_iterator(id_list);
281  }
282 
283  id_iterator get_id_end() const {
284  id_iterator ret(id_list);
285  ret += id_list.size();
286  return ret;
287  }
288 
289  size_t size() const {
290  return id_list.size();
291  }
292 
293  bool empty() const {
294  return id_list.empty();
295  }
296 
297  size_t get_neighbors(std::vector<fg::vertex_id_t> &neighbors) const {
298  neighbors = id_list;
299  return neighbors.size();
300  }
301 
302  /*
303  * This return the vertex Id that the neighbor list belongs to.
304  */
305  fg::vertex_id_t get_id() const {
306  return id;
307  }
308 
309  virtual size_t count_edges(const fg::page_vertex *v);
310  virtual size_t count_edges(const fg::page_vertex *v, fg::edge_type type,
311  std::vector<fg::vertex_id_t> *common_neighs) const;
312  virtual size_t count_edges_hash(const fg::page_vertex *v,
313  fg::edge_iterator other_it, fg::edge_iterator other_end,
314  std::vector<fg::vertex_id_t> *common_neighs) const;
315 #if 0
316  virtual size_t count_edges_bin_search_this(const fg::page_vertex *v,
317  neighbor_list::id_iterator this_it,
318  neighbor_list::id_iterator this_end,
319  fg::edge_iterator other_it, fg::edge_iterator other_end) const;
320 #endif
321  virtual size_t count_edges_bin_search_other(const fg::page_vertex *v,
322  neighbor_list::id_iterator this_it,
323  neighbor_list::id_iterator this_end,
324  fg::edge_iterator other_it, fg::edge_iterator other_end,
325  std::vector<fg::vertex_id_t> *common_neighs) const;
326  virtual size_t count_edges_scan(const fg::page_vertex *v,
327  neighbor_list::id_iterator this_it,
328  neighbor_list::id_iterator this_end, fg::edge_seq_iterator other_it,
329  std::vector<fg::vertex_id_t> *common_neighs) const;
330 };
331 
332 /*
333  * This data structure contains all data required when the vertex is
334  * computing local scan.
335  * It doesn't need to exist before or after the vertex computes local scan.
336  */
337 struct runtime_data_t
338 {
339  // All neighbors (in both in-edges and out-edges)
340  std::shared_ptr<neighbor_list> neighbors;
341  // The number of vertices that have joined with the vertex.
342  unsigned num_joined;
343  size_t local_scan;
344 
345  runtime_data_t(std::shared_ptr<neighbor_list> neighbors) {
346  this->neighbors = std::move(neighbors);
347  num_joined = 0;
348  local_scan = 0;
349  }
350 
351  runtime_data_t(std::shared_ptr<neighbor_list> neighbors,
352  size_t curr_local_scan) {
353  this->neighbors = std::move(neighbors);
354  num_joined = 0;
355  local_scan = curr_local_scan;
356  }
357 };
358 
359 enum multi_func_flags
360 {
361  EST_LOCAL,
362  REAL_LOCAL,
363  RUNTIME_POINTER,
364  PART_LOCAL_POINTER,
365  NUM_FLAGS,
366 };
367 
368 class part_local_t;
369 
370 /*
371  * A data structure of containing vertex state for computing local scan.
372  * This is Linux-specific.
373  */
374 class scan_multi_func_value
375 {
376  static const int VALUE_BITS = sizeof(size_t) * 8 - NUM_FLAGS;
377  static const size_t FLAGS_MASK = ((1UL << VALUE_BITS) - 1);
378  size_t value;
379 
380  void set_flag(int flag) {
381  value |= 1UL << (VALUE_BITS + flag);
382  }
383 
384  bool has_flag(int flag) const {
385  return value & (1UL << (VALUE_BITS + flag));
386  }
387 public:
388  scan_multi_func_value() {
389  value = 0;
390  }
391 
392  /*
393  * Estimated local scan.
394  */
395 
396  void set_est_local(size_t num) {
397  value = num;
398  set_flag(EST_LOCAL);
399  }
400 
401  bool has_est_local() const {
402  return has_flag(EST_LOCAL);
403  }
404 
405  size_t get_est_local() const {
406  assert(has_flag(EST_LOCAL));
407  return value & FLAGS_MASK;
408  }
409 
410  /*
411  * Real local scan.
412  */
413 
414  void set_real_local(size_t num) {
415  value = num;
416  set_flag(REAL_LOCAL);
417  }
418 
419  void inc_real_local(size_t num) {
420  assert(has_real_local());
421  value += num;
422  }
423 
424  bool has_real_local() const {
425  return has_flag(REAL_LOCAL);
426  }
427 
428  size_t get_real_local() const {
429  assert(has_flag(REAL_LOCAL));
430  return value & FLAGS_MASK;
431  }
432 
433  /*
434  * Pointer to the runtime data.
435  */
436 
437  void set_runtime_data(runtime_data_t *data) {
438  value = (size_t) data;
439  set_flag(RUNTIME_POINTER);
440  }
441 
442  bool has_runtime_data() const {
443  return has_flag(RUNTIME_POINTER);
444  }
445 
446  runtime_data_t *get_runtime_data() const {
447  assert(has_flag(RUNTIME_POINTER));
448  return (runtime_data_t *) (value & FLAGS_MASK);
449  }
450 
451  /*
452  * Pointer to the partial local scan data.
453  */
454 
455  void set_part_local(part_local_t *data) {
456  value = (size_t) data;
457  set_flag(PART_LOCAL_POINTER);
458  }
459 
460  bool has_part_local() const {
461  return has_flag(PART_LOCAL_POINTER);
462  }
463 
464  part_local_t *get_part_local() const {
465  assert(has_flag(PART_LOCAL_POINTER));
466  return (part_local_t *) (value & FLAGS_MASK);
467  }
468 };
469 
470 enum scan_stage_t
471 {
472  INIT,
473  RUN,
474 };
475 
476 #endif
edge_type
Edge type of an edge in the graph.
Definition: vertex.h:43
virtual vertex_id_t get_id() const =0
Get the vertex unique ID.
Vertex representation when in the page cache.
Definition: vertex.h:575