FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
vertex.h
1 #ifndef __EXT_VERTEX_H__
2 #define __EXT_VERTEX_H__
3 
4 /*
5  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
6  * Written by Da Zheng (zhengda1936@gmail.com)
7  *
8  * This file is part of FlashGraph.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include <assert.h>
26 
27 #include <memory>
28 #include <vector>
29 #include <algorithm>
30 #include <unordered_map>
31 
32 #include "comm_exception.h"
33 #include "container.h"
34 #include "cache.h"
35 #include "FG_basic_types.h"
36 
37 namespace fg
38 {
39 
43 enum edge_type {
44  NONE,
49 };
50 
51 class vertex_index;
52 
53 /*
54  * \brief This class contains the basic information about a vertex on the disk.
55  *
56  */
57 class ext_mem_vertex_info
58 {
59  vertex_id_t id;
60  vsize_t size;
61  off_t off;
62 public:
63  ext_mem_vertex_info() {
64  id = INVALID_VERTEX_ID;
65  off = 0;
66  size = 0;
67  }
68 
69  ext_mem_vertex_info(vertex_id_t id, off_t off, size_t size) {
70  this->id = id;
71  this->off = off;
72  this->size = size;
73  }
74 
75  vertex_id_t get_id() const {
76  return id;
77  }
78 
79  off_t get_off() const {
80  return off;
81  }
82 
83  vsize_t get_size() const {
84  return size;
85  }
86 
87  bool has_edges() const;
88 
89  bool is_valid() const {
90  return id != INVALID_VERTEX_ID;
91  }
92 };
93 
94 /*
95  * The information of vertex header.
96  * It contains the vertex ID and the number of edges.
97  */
98 class vertex_header
99 {
100  vertex_id_t id;
101  vsize_t num_edges;
102 public:
103  vertex_header(vertex_id_t id, vsize_t num_edges) {
104  this->id = id;
105  this->num_edges = num_edges;
106  }
107 
108  vertex_id_t get_id() const {
109  return id;
110  }
111 
112  vsize_t get_num_edges() const {
113  return num_edges;
114  }
115 };
116 
117 /*
118  * The information of directed vertex header.
119  * In addition to the vertex header, it has the number of in-edges
120  * and out-edges.
121  */
122 class directed_vertex_header: public vertex_header
123 {
124  vsize_t num_in_edges;
125  vsize_t num_out_edges;
126 public:
127  directed_vertex_header(vertex_id_t id, vsize_t num_in_edges,
128  vsize_t num_out_edges): vertex_header(id,
129  num_in_edges + num_out_edges) {
130  this->num_in_edges = num_in_edges;
131  this->num_out_edges = num_out_edges;
132  }
133 
134  vsize_t get_num_in_edges() const {
135  return num_in_edges;
136  }
137 
138  vsize_t get_num_out_edges() const {
139  return num_out_edges;
140  }
141 };
142 
143 class empty_data
144 {
145 public:
146  bool operator==(const empty_data &data) const {
147  return true;
148  }
149 };
150 
151 static inline std::ostream& operator<<(std::ostream& cout, empty_data obj)
152 {
153  return cout;
154 }
155 
156 template<class data_type = empty_data>
157 class edge
158 {
159  bool has_data;
160  vertex_id_t from;
161  vertex_id_t to;
162  data_type data;
163 public:
164  edge() {
165  this->from = -1;
166  this->to = -1;
167  has_data = false;
168  }
169 
170  edge(vertex_id_t from, vertex_id_t to) {
171  this->from = from;
172  this->to = to;
173  has_data = false;
174  }
175 
176  edge(vertex_id_t from, vertex_id_t to, const data_type &data) {
177  this->from = from;
178  this->to = to;
179  this->data = data;
180  has_data = true;
181  }
182 
183  vertex_id_t get_from() const {
184  return from;
185  }
186 
187  vertex_id_t get_to() const {
188  return to;
189  }
190 
191  bool has_edge_data() const {
192  return has_data;
193  }
194 
195  const data_type &get_data() const {
196  assert(has_data);
197  return data;
198  }
199 
200  void reverse_dir() {
201  vertex_id_t tmp = from;
202  from = to;
203  to = tmp;
204  }
205 };
206 
207 template<>
208 class edge<empty_data>
209 {
210  static empty_data data;
211  vertex_id_t from;
212  vertex_id_t to;
213 public:
214  edge() {
215  this->from = -1;
216  this->to = -1;
217  }
218 
219  edge(vertex_id_t from, vertex_id_t to) {
220  this->from = from;
221  this->to = to;
222  }
223 
224  edge(vertex_id_t from, vertex_id_t to, const empty_data &data) {
225  this->from = from;
226  this->to = to;
227  }
228 
229  vertex_id_t get_from() const {
230  return from;
231  }
232 
233  vertex_id_t get_to() const {
234  return to;
235  }
236 
237  bool has_edge_data() const {
238  return false;
239  }
240 
241  const empty_data &get_data() const {
242  return data;
243  }
244 
245  void reverse_dir() {
246  vertex_id_t tmp = from;
247  from = to;
248  to = tmp;
249  }
250 };
251 
256 {
257  time_t timestamp;
258 public:
259  ts_edge_data() {
260  timestamp = 0;
261  }
262 
263  ts_edge_data(time_t timestamp) {
264  this->timestamp = timestamp;
265  }
266 
267  time_t get_timestamp() const {
268  return timestamp;
269  }
270 
271  bool operator==(const ts_edge_data &data) const {
272  return this->timestamp == data.timestamp;
273  }
274 
275  bool operator<(const ts_edge_data &data) const {
276  return this->timestamp < data.timestamp;
277  }
278 };
279 
280 static inline std::ostream& operator<<(std::ostream& cout,
281  const ts_edge_data &obj)
282 {
283  return cout << obj.get_timestamp();
284 }
285 
286 template<>
287 class edge<ts_edge_data>
288 {
289  vertex_id_t from;
290  vertex_id_t to;
291  ts_edge_data data;
292 public:
293  edge() {
294  this->from = -1;
295  this->to = -1;
296  }
297 
298  edge(vertex_id_t from, vertex_id_t to) {
299  this->from = from;
300  this->to = to;
301  }
302 
303  edge(vertex_id_t from, vertex_id_t to, const ts_edge_data &data) {
304  this->from = from;
305  this->to = to;
306  this->data = data;
307  }
308 
309  vertex_id_t get_from() const {
310  return from;
311  }
312 
313  vertex_id_t get_to() const {
314  return to;
315  }
316 
317  bool has_edge_data() const {
318  return true;
319  }
320 
321  const ts_edge_data &get_data() const {
322  return data;
323  }
324 
325  void reverse_dir() {
326  vertex_id_t tmp = from;
327  from = to;
328  to = tmp;
329  }
330 };
331 
337 {
338  uint32_t num;
339 public:
340  edge_count() {
341  num = 1;
342  }
343 
344  edge_count(uint32_t n) {
345  num = n;
346  }
347 
348  uint32_t get_count() const {
349  return num;
350  }
351 
352  bool operator==(const edge_count &count) const {
353  return this->num == count.num;
354  }
355 
356  edge_count& operator+=(const edge_count& rhs) {
357  this->num += rhs.get_count();
358  return *this;
359  }
360 };
361 
362 static inline std::ostream& operator<<(std::ostream& cout,
363  const edge_count &obj)
364 {
365  return cout << obj.get_count();
366 }
367 
368 template<class edge_data_type>
369 class in_mem_directed_vertex;
370 
371 template<class edge_data_type>
372 class in_mem_undirected_vertex;
373 
374 template<class edge_data_type>
375 class edge_const_iterator
376 {
377  bool is_in_edge;
378  vertex_id_t id;
379  const vertex_id_t *ptr;
380  const edge_data_type *data_ptr;
381 public:
382  edge_const_iterator(vertex_id_t id,
383  const vertex_id_t *edge_ptr,
384  const edge_data_type *data_ptr, bool is_in_edge) {
385  this->is_in_edge = is_in_edge;
386  this->id = id;
387  this->ptr = edge_ptr;
388  this->data_ptr = data_ptr;
389  }
390 
391  edge<edge_data_type> operator*() const {
392  if (data_ptr) {
393  if (is_in_edge)
394  return edge<edge_data_type>(*ptr, id, *data_ptr);
395  else
396  return edge<edge_data_type>(id, *ptr, *data_ptr);
397  }
398  else {
399  if (is_in_edge)
400  return edge<edge_data_type>(*ptr, id);
401  else
402  return edge<edge_data_type>(id, *ptr);
403  }
404  }
405 
406  edge_const_iterator &operator++() {
407  ptr++;
408  if (data_ptr)
409  data_ptr++;
410  return *this;
411  }
412 
413  bool operator==(const edge_const_iterator &it) const {
414  return this->ptr == it.ptr;
415  }
416 
417  bool operator!=(const edge_const_iterator &it) const {
418  return this->ptr != it.ptr;
419  }
420 
421  edge_const_iterator &operator+=(int num) {
422  ptr += num;
423  if (data_ptr)
424  data_ptr += num;
425  return *this;
426  }
427 };
428 
429 template<class T>
430 struct delete_as_chararr
431 {
432 public:
433  void operator()(T *obj) const {
434  char *char_p = (char *) obj;
435  delete [] char_p;
436  }
437 };
438 
439 class in_mem_vertex;
440 
441 /*
442  * This vertex represents an undirected vertex in the external memory.
443  */
444 class ext_mem_undirected_vertex
445 {
446  vertex_id_t id;
447  uint32_t edge_data_size;
448  vsize_t num_edges;
449  vertex_id_t neighbors[0];
450 
451  void set_id(vertex_id_t id) {
452  this->id = id;
453  }
454 
455  /*
456  * The size of the vertex without counting the edge data list.
457  */
458  size_t get_size0() const {
459  return get_header_size() + sizeof(neighbors[0]) * num_edges;
460  }
461 
462  char *get_edge_data_addr() const {
463  return ((char *) this) + ROUNDUP(get_size0(), edge_data_size);
464  }
465 
466  template<class edge_data_type = empty_data>
467  const edge_data_type *get_edge_data_begin() const {
468  assert(sizeof(edge_data_type) == edge_data_size);
469  return (edge_data_type *) get_edge_data_addr();
470  }
471 
472  template<class edge_data_type = empty_data>
473  edge_data_type *get_edge_data_begin() {
474  assert(sizeof(edge_data_type) == edge_data_size);
475  return (edge_data_type *) get_edge_data_addr();
476  }
477 public:
478  static size_t get_header_size() {
479  return offsetof(ext_mem_undirected_vertex, neighbors);
480  }
481 
482  static size_t get_edge_data_offset(vsize_t num_edges,
483  uint32_t edge_data_size) {
484  ext_mem_undirected_vertex v(0, num_edges, edge_data_size);
485  return v.get_edge_data_addr() - (char *) &v;
486  }
487 
488  static ext_mem_undirected_vertex *deserialize(char *buf, size_t size) {
489  assert(size >= ext_mem_undirected_vertex::get_header_size());
490  ext_mem_undirected_vertex *v = (ext_mem_undirected_vertex *) buf;
491  assert(size >= v->get_size());
492  return v;
493  }
494 
495  static size_t serialize(const in_mem_vertex &v, char *buf,
496  size_t size, edge_type type);
497 
498  static vsize_t vsize2num_edges(size_t vertex_size, size_t edge_data_size) {
499  return (vertex_size - get_header_size()) / (sizeof(vertex_id_t)
500  + edge_data_size);
501  }
502 
503  static size_t num_edges2vsize(vsize_t num_edges, size_t edge_data_size) {
504  ext_mem_undirected_vertex v(0, num_edges, edge_data_size);
505  return v.get_size();
506  }
507 
508  ext_mem_undirected_vertex() {
509  this->id = 0;
510  this->num_edges = 0;
511  this->edge_data_size = 0;
512  }
513 
514  ext_mem_undirected_vertex(vertex_id_t id, vsize_t num_edges,
515  uint32_t edge_data_size) {
516  this->id = id;
517  this->num_edges = num_edges;
518  this->edge_data_size = edge_data_size;
519  }
520 
521  size_t get_size() const {
522  if (has_edge_data())
523  return ROUNDUP(((size_t) get_edge_data_addr()) - ((size_t) this)
524  + (num_edges) * edge_data_size, sizeof(vertex_id_t));
525  else
526  return ext_mem_undirected_vertex::get_header_size()
527  + (num_edges) * sizeof(neighbors[0]);
528  }
529 
530  bool has_edge_data() const {
531  return edge_data_size > 0;
532  }
533 
534  size_t get_edge_data_size() const {
535  return edge_data_size;
536  }
537 
538  size_t get_num_edges() const {
539  return num_edges;
540  }
541 
542  vertex_id_t get_neighbor(size_t idx) const {
543  return neighbors[idx];
544  }
545 
546  void set_neighbor(size_t idx, vertex_id_t id) {
547  neighbors[idx] = id;
548  }
549 
550  char *get_raw_edge_data(size_t idx) const {
551  return this->get_edge_data_addr() + idx * edge_data_size;
552  }
553 
554  template<class edge_data_type = empty_data>
555  const edge_data_type &get_edge_data(size_t idx) const {
556  return ((edge_data_type *) this->get_edge_data_addr())[idx];
557  }
558 
559  vertex_id_t get_id() const {
560  return id;
561  }
562 };
563 
564 inline bool ext_mem_vertex_info::has_edges() const
565 {
566  return size > ext_mem_undirected_vertex::get_header_size();
567 }
568 
571 
576 {
577  bool directed;
578 public:
579  page_vertex(bool directed) {
580  this->directed = directed;
581  }
588  virtual size_t get_num_edges(edge_type type) const = 0;
589 
598  virtual edge_iterator get_neigh_begin(edge_type type) const = 0;
599 
607  virtual edge_iterator get_neigh_end(edge_type type) const = 0;
620  size_t start = 0, size_t end = -1) const = 0;
621 
626  virtual vertex_id_t get_id() const = 0;
627 
635  virtual size_t read_edges(edge_type type, vertex_id_t edges[],
636  size_t num) const {
637  ABORT_MSG("read_edges isn't implemented");
638  return 0;
639  }
640 
645  virtual bool is_directed() const {
646  return directed;
647  }
648 
652  virtual void print() const {
653  }
654 };
655 
656 /*
657  * These two ranges are defined as [first, second),
658  * i.e., inclusive in the beginning and exclusive in the end.
659  */
660 typedef std::pair<int, int> timestamp_pair;
661 typedef std::pair<off_t, off_t> offset_pair;
662 
668 {
669 public:
670  TS_page_vertex(bool directed): page_vertex(directed) {
671  }
672 
674 
679  virtual size_t get_num_edges() const = 0;
680 
687  virtual size_t get_num_edges(int timestamp, edge_type type) const = 0;
688 
693  virtual int get_num_timestamps() const = 0;
696 
705  virtual edge_iterator get_neigh_begin(int timestamp,
706  edge_type type) const = 0;
707 
716  virtual edge_iterator get_neigh_end(int timestamp, edge_type type) const = 0;
717 
724  virtual offset_pair get_edge_list_offset(
725  const timestamp_pair &range) const = 0;
726 };
727 
732 {
733  vertex_id_t id;
734  vsize_t num_in_edges;
735  vsize_t num_out_edges;
736  size_t in_size;
737  size_t out_size;
738  const safs::page_byte_array *in_array;
739  const safs::page_byte_array *out_array;
740 public:
741  static vertex_id_t get_id(const safs::page_byte_array &arr) {
742  BOOST_VERIFY(arr.get_size()
743  >= ext_mem_undirected_vertex::get_header_size());
744  ext_mem_undirected_vertex v = arr.get<ext_mem_undirected_vertex>(0);
745  return v.get_id();
746  }
747 
755  bool in_part): page_vertex(true) {
756  size_t size = arr.get_size();
757  BOOST_VERIFY(size >= ext_mem_undirected_vertex::get_header_size());
758  ext_mem_undirected_vertex v = arr.get<ext_mem_undirected_vertex>(0);
759 
760  if (in_part) {
761  in_size = v.get_size();
762  assert(size >= in_size);
763  out_size = 0;
764  this->in_array = &arr;
765  this->out_array = NULL;
766  num_in_edges = v.get_num_edges();
767  num_out_edges = 0;
768  }
769  else {
770  out_size = v.get_size();
771  in_size = 0;
772  assert(size >= out_size);
773  this->out_array = &arr;
774  this->in_array = NULL;
775  num_out_edges = v.get_num_edges();
776  num_in_edges = 0;
777  }
778  id = v.get_id();
779  }
780 
782  const safs::page_byte_array &out_arr): page_vertex(true) {
783  this->in_array = &in_arr;
784  this->out_array = &out_arr;
785 
786  size_t size = in_arr.get_size();
787  BOOST_VERIFY(size >= ext_mem_undirected_vertex::get_header_size());
788  ext_mem_undirected_vertex v = in_arr.get<ext_mem_undirected_vertex>(0);
789  in_size = v.get_size();
790  assert(size >= in_size);
791  id = v.get_id();
792  num_in_edges = v.get_num_edges();
793 
794  size = out_arr.get_size();
795  assert(size >= ext_mem_undirected_vertex::get_header_size());
796  v = out_arr.get<ext_mem_undirected_vertex>(0);
797  out_size = v.get_size();
798  assert(size >= out_size);
799  assert(id == v.get_id());
800  num_out_edges = v.get_num_edges();
801  }
802 
803  size_t get_in_size() const {
804  return in_size;
805  }
806 
807  size_t get_out_size() const {
808  return out_size;
809  }
810 
816  size_t get_num_edges(edge_type type) const {
817  switch(type) {
818  case IN_EDGE:
819  return num_in_edges;
820  case OUT_EDGE:
821  return num_out_edges;
822  case BOTH_EDGES:
823  return num_in_edges + num_out_edges;
824  default:
825  throw invalid_arg_exception("invalid edge type");
826  }
827  }
828 
838  switch(type) {
839  case IN_EDGE:
840  assert(in_array);
841  return in_array->begin<vertex_id_t>(
842  ext_mem_undirected_vertex::get_header_size());
843  case OUT_EDGE:
844  assert(out_array);
845  return out_array->begin<vertex_id_t>(
846  ext_mem_undirected_vertex::get_header_size());
847  default:
848  throw invalid_arg_exception("invalid edge type");
849  }
850  }
851 
852  /*
853  * \brief Get an STL-style const iterator pointing to the *end* of the
854  * neighbor list of a vertex.
855  * \param type The type of edges a user wishes to iterate over. A user
856  * can iterate over `IN_EDGE` or `OUT_EDGE`.
857  * \return A const iterator pointing to the *end* of the
858  * neighbor list of a vertex.
859  */
861  edge_iterator it = get_neigh_begin(type);
862  it += get_num_edges(type);
863  return it;
864  }
865 
866  /*
867  * \brief Get a java-style sequential const iterator that iterates
868  * the neighbors in the specified range.
869  * \return A sequential const iterator.
870  * \param type The type of edges a user wishes to iterate over. A user
871  * can iterate over `IN_EDGE` or `OUT_EDGE`.
872  * \param start The starting offset in the neighbor list iterated by
873  * the sequential iterator.
874  * \param end The end offset in the neighbor list iterated by the sequential
875  * iterator.
876  */
878  size_t end = -1) const {
879  end = std::min(end, get_num_edges(type));
880  assert(start <= end);
881  switch(type) {
882  case IN_EDGE:
883  assert(in_array);
884  return in_array->get_seq_iterator<vertex_id_t>(
885  ext_mem_undirected_vertex::get_header_size()
886  + start * sizeof(vertex_id_t),
887  ext_mem_undirected_vertex::get_header_size()
888  + end * sizeof(vertex_id_t));
889  case OUT_EDGE:
890  assert(out_array);
891  return out_array->get_seq_iterator<vertex_id_t>(
892  ext_mem_undirected_vertex::get_header_size()
893  + start * sizeof(vertex_id_t),
894  ext_mem_undirected_vertex::get_header_size()
895  + end * sizeof(vertex_id_t));
896  default:
897  throw invalid_arg_exception("invalid edge type");
898  }
899  }
900 
909  template<class edge_data_type>
911  edge_type type) const {
912  switch(type) {
913  case IN_EDGE:
914  assert(in_array);
915  return in_array->begin<edge_data_type>(
916  ext_mem_undirected_vertex::get_edge_data_offset(
917  num_in_edges, sizeof(edge_data_type)));
918  case OUT_EDGE:
919  assert(out_array);
920  return out_array->begin<edge_data_type>(
921  ext_mem_undirected_vertex::get_edge_data_offset(
922  num_out_edges, sizeof(edge_data_type)));
923  default:
924  throw invalid_arg_exception("invalid edge type");
925  }
926  }
927 
936  template<class edge_data_type>
938  edge_type type) const {
939  auto it = get_data_begin<edge_data_type>(type);
940  it += get_num_edges(type);
941  return it;
942  }
943 
953  template<class edge_data_type>
955  edge_type type, size_t start, size_t end) const {
956  off_t edge_end;
957  switch(type) {
958  case IN_EDGE:
959  assert(in_array);
960  edge_end = ext_mem_undirected_vertex::get_edge_data_offset(
961  num_in_edges, sizeof(edge_data_type));
962  return in_array->get_seq_iterator<edge_data_type>(
963  edge_end + start * sizeof(edge_data_type),
964  edge_end + end * sizeof(edge_data_type));
965  case OUT_EDGE:
966  assert(out_array);
967  edge_end = ext_mem_undirected_vertex::get_edge_data_offset(
968  num_out_edges, sizeof(edge_data_type));
969  return out_array->get_seq_iterator<edge_data_type>(
970  edge_end + start * sizeof(edge_data_type),
971  edge_end + end * sizeof(edge_data_type));
972  default:
973  throw invalid_arg_exception("invalid edge type");
974  }
975  }
976 
984  template<class edge_data_type>
986  edge_type type) const {
987  size_t start = 0;
988  size_t end = get_num_edges(type);
989  return get_data_seq_it<edge_data_type>(type, start, end);
990  }
991 
992  virtual size_t read_edges(edge_type type, vertex_id_t edges[],
993  size_t num) const {
994  size_t num_edges;
995  switch(type) {
996  case IN_EDGE:
997  assert(num_in_edges <= num);
998  assert(in_array);
999  num_edges = num_in_edges;
1000  in_array->memcpy(ext_mem_undirected_vertex::get_header_size(),
1001  (char *) edges, sizeof(vertex_id_t) * num_edges);
1002  break;
1003  case OUT_EDGE:
1004  assert(num_out_edges <= num);
1005  assert(out_array);
1006  num_edges = num_out_edges;
1007  out_array->memcpy(ext_mem_undirected_vertex::get_header_size(),
1008  (char *) edges, sizeof(vertex_id_t) * num_edges);
1009  break;
1010  default:
1011  abort();
1012  }
1013  return num_edges;
1014  }
1015 
1019  vertex_id_t get_id() const {
1020  return id;
1021  }
1022 
1026  bool has_in_part() const {
1027  return in_array;
1028  }
1029 
1033  bool has_out_part() const {
1034  return out_array;
1035  }
1036 };
1037 
1042 {
1043  vertex_id_t id;
1044  vsize_t vertex_size;
1045  vsize_t num_edges;
1046  const safs::page_byte_array &array;
1047 public:
1049  false), array(arr) {
1050  size_t size = arr.get_size();
1051  BOOST_VERIFY(size >= ext_mem_undirected_vertex::get_header_size());
1052  // We only want to know the header of the vertex, so we don't need to
1053  // know what data type an edge has.
1054  ext_mem_undirected_vertex v = arr.get<ext_mem_undirected_vertex>(0);
1055  BOOST_VERIFY((unsigned) size >= v.get_size());
1056  vertex_size = v.get_size();
1057 
1058  id = v.get_id();
1059  num_edges = v.get_num_edges();
1060  }
1061 
1062  size_t get_size() const {
1063  return vertex_size;
1064  }
1065 
1072  size_t get_num_edges(edge_type type = edge_type::IN_EDGE) const {
1073  return num_edges;
1074  }
1075 
1085  return array.begin<vertex_id_t>(
1086  ext_mem_undirected_vertex::get_header_size());
1087  }
1088 
1098  auto it = get_neigh_begin(type);
1099  it += num_edges;
1100  return it;
1101  }
1102 
1116  size_t end = -1) const {
1117  end = std::min(end, get_num_edges(type));
1118  assert(start <= end);
1119  assert(end <= get_num_edges(type));
1120  return array.get_seq_iterator<vertex_id_t>(
1121  ext_mem_undirected_vertex::get_header_size()
1122  + start * sizeof(vertex_id_t),
1123  ext_mem_undirected_vertex::get_header_size()
1124  + end * sizeof(vertex_id_t));
1125  }
1126 
1134  virtual size_t read_edges(edge_type type, vertex_id_t edges[],
1135  size_t num) const {
1136  vsize_t num_edges = get_num_edges(type);
1137  assert(num_edges <= num);
1138  array.memcpy(ext_mem_undirected_vertex::get_header_size(),
1139  (char *) edges, sizeof(vertex_id_t) * num_edges);
1140  return num_edges;
1141  }
1142 
1150  template<class edge_data_type>
1152  size_t start, size_t end) const {
1153  off_t edge_end = ext_mem_undirected_vertex::get_edge_data_offset(
1154  num_edges, sizeof(edge_data_type));
1155  return array.get_seq_iterator<edge_data_type>(
1156  edge_end + start * sizeof(edge_data_type),
1157  edge_end + end * sizeof(edge_data_type));
1158  }
1159 
1165  template<class edge_data_type>
1167  ) const {
1168  size_t start = 0;
1169  size_t end = get_num_edges();
1170  return get_data_seq_it<edge_data_type>(start, end);
1171  }
1172 
1177  vertex_id_t get_id() const {
1178  return id;
1179  }
1180 };
1181 
1182 /*
1183  * The offset of in- and out-edges in the edge list of a time-series vertex.
1184  */
1185 struct edge_off
1186 {
1187  // TODO vsize_t may not be enough. A graph with many timestamps may have
1188  // many edges.
1189  vsize_t in_off;
1190  vsize_t out_off;
1191 };
1192 
1193 class in_mem_vertex
1194 {
1195 public:
1196  typedef std::shared_ptr<in_mem_vertex> ptr;
1197 
1198  virtual vertex_id_t get_id() const = 0;
1199  virtual bool has_edge_data() const = 0;
1200  virtual size_t get_edge_data_size() const = 0;
1201  virtual void serialize_edges(vertex_id_t ids[], edge_type type) const = 0;
1202  virtual void serialize_edge_data(char *data, edge_type type) const = 0;
1203  virtual size_t get_serialize_size(edge_type type) const = 0;
1204  virtual size_t get_num_edges(edge_type type) const = 0;
1205  virtual ptr create_remapped_vertex(
1206  const std::unordered_map<vertex_id_t, vertex_id_t> &map) const = 0;
1207  virtual void remap(
1208  const std::unordered_map<vertex_id_t, vertex_id_t> &map) = 0;
1209 };
1210 
1211 /*
1212  * This is the size of a page vertex (either directed or undirected).
1213  * It's mainly used for allocating a buffer from the stack for a page vertex.
1214  */
1215 const size_t STACK_PAGE_VERTEX_SIZE = sizeof(page_directed_vertex);
1216 
1217 template<class edge_data_type = empty_data>
1218 class in_mem_directed_vertex: public in_mem_vertex
1219 {
1220  vertex_id_t id;
1221  bool has_data;
1222  std::vector<vertex_id_t> out_edges;
1223  std::vector<vertex_id_t> in_edges;
1224  std::vector<edge_data_type> out_data;
1225  std::vector<edge_data_type> in_data;
1226 public:
1227  in_mem_directed_vertex(vertex_id_t id, bool has_data) {
1228  this->id = id;
1229  this->has_data = has_data;
1230  }
1231 
1232  in_mem_directed_vertex(const page_directed_vertex &vertex,
1233  bool has_data) {
1234  this->id = vertex.get_id();
1235  this->has_data = has_data;
1236  in_edges.resize(vertex.get_num_edges(edge_type::IN_EDGE));
1237  vertex.read_edges(edge_type::IN_EDGE, in_edges.data(),
1238  vertex.get_num_edges(edge_type::IN_EDGE));
1239  out_edges.resize(vertex.get_num_edges(edge_type::OUT_EDGE));
1240  vertex.read_edges(edge_type::OUT_EDGE, out_edges.data(),
1241  vertex.get_num_edges(edge_type::OUT_EDGE));
1242  assert(!has_data);
1243  }
1244 
1245  vertex_id_t get_id() const {
1246  return id;
1247  }
1248 
1249  bool has_edge_data() const {
1250  return has_data;
1251  }
1252 
1253  size_t get_edge_data_size() const {
1254  return has_data ? sizeof(edge_data_type) : 0;
1255  }
1256 
1257  virtual void serialize_edges(vertex_id_t ids[], edge_type type) const {
1258  switch (type) {
1259  case edge_type::IN_EDGE:
1260  memcpy(ids, in_edges.data(), in_edges.size() * sizeof(ids[0]));
1261  break;
1262  case edge_type::OUT_EDGE:
1263  memcpy(ids, out_edges.data(), out_edges.size() * sizeof(ids[0]));
1264  break;
1265  default:
1266  abort();
1267  }
1268  }
1269 
1270  virtual void serialize_edge_data(char *data, edge_type type) const {
1271  assert(has_data);
1272  switch (type) {
1273  case edge_type::IN_EDGE:
1274  memcpy(data, in_data.data(),
1275  in_data.size() * sizeof(edge_data_type));
1276  break;
1277  case edge_type::OUT_EDGE:
1278  memcpy(data, out_data.data(),
1279  out_data.size() * sizeof(edge_data_type));
1280  break;
1281  default:
1282  abort();
1283  }
1284  }
1285 
1286  /*
1287  * Add an in-edge to the vertex.
1288  * We allow to have multiple same edges, but all edges must be added
1289  * in a sorted order.
1290  */
1291  void add_in_edge(const edge<edge_data_type> &e) {
1292  assert(e.get_to() == id);
1293  in_edges.push_back(e.get_from());
1294  if (has_edge_data())
1295  in_data.push_back(e.get_data());
1296  }
1297 
1298  /*
1299  * Add an out-edge to the vertex.
1300  * We allow to have multiple same edges, but all edges must be added
1301  * in a sorted order.
1302  */
1303  void add_out_edge(const edge<edge_data_type> &e) {
1304  assert(e.get_from() == id);
1305  out_edges.push_back(e.get_to());
1306  if (has_edge_data())
1307  out_data.push_back(e.get_data());
1308  }
1309 
1310  size_t get_num_edges(edge_type type) const {
1311  switch(type) {
1312  case edge_type::IN_EDGE:
1313  return get_num_in_edges();
1314  case edge_type::OUT_EDGE:
1315  return get_num_out_edges();
1316  case edge_type::BOTH_EDGES:
1317  return get_num_in_edges() + get_num_out_edges();
1318  default:
1319  throw invalid_arg_exception("invalid edge type");
1320  }
1321  }
1322 
1323  size_t get_num_in_edges() const {
1324  return in_edges.size();
1325  }
1326 
1327  size_t get_num_out_edges() const {
1328  return out_edges.size();
1329  }
1330 
1331  edge_const_iterator<edge_data_type> get_in_edge_begin() const {
1332  return edge_const_iterator<edge_data_type>(id,
1333  in_edges.data(), in_data.data(), true);
1334  }
1335 
1336  edge_const_iterator<edge_data_type> get_in_edge_end() const {
1337  edge_const_iterator<edge_data_type> it = get_in_edge_begin();
1338  it += get_num_in_edges();
1339  return it;
1340  }
1341 
1342  edge_const_iterator<edge_data_type> get_out_edge_begin() const {
1343  return edge_const_iterator<edge_data_type>(id,
1344  out_edges.data(), out_data.data(), false);
1345  }
1346 
1347  edge_const_iterator<edge_data_type> get_out_edge_end() const {
1348  edge_const_iterator<edge_data_type> it = get_out_edge_begin();
1349  it += get_num_out_edges();
1350  return it;
1351  }
1352 
1353  const edge<edge_data_type> get_in_edge(int idx) const {
1354  if (has_edge_data())
1355  return edge<edge_data_type>(in_edges[idx], id, in_data[idx]);
1356  else
1357  return edge<edge_data_type>(in_edges[idx], id);
1358  }
1359 
1360  const edge<edge_data_type> get_out_edge(int idx) const {
1361  if (has_edge_data())
1362  return edge<edge_data_type>(id, out_edges[idx], out_data[idx]);
1363  else
1364  return edge<edge_data_type>(id, out_edges[idx]);
1365  }
1366 
1367  size_t get_serialize_size(edge_type type) const {
1368  assert(type == edge_type::IN_EDGE || type == edge_type::OUT_EDGE);
1369  if (type == edge_type::IN_EDGE) {
1370  ext_mem_undirected_vertex v(0, in_edges.size(),
1371  has_data ? sizeof(edge_data_type) : 0);
1372  return v.get_size();
1373  }
1374  else {
1375  ext_mem_undirected_vertex v(0, out_edges.size(),
1376  has_data ? sizeof(edge_data_type) : 0);
1377  return v.get_size();
1378  }
1379  }
1380 
1381  in_mem_vertex::ptr create_remapped_vertex(
1382  const std::unordered_map<vertex_id_t, vertex_id_t> &map) const {
1383  in_mem_directed_vertex<edge_data_type> *new_v
1384  = new in_mem_directed_vertex<edge_data_type>(*this);
1385  std::unordered_map<vertex_id_t, vertex_id_t>::const_iterator it
1386  = map.find(new_v->id);
1387  assert(it != map.end());
1388  new_v->id = it->second;
1389  for (size_t i = 0; i < new_v->out_edges.size(); i++) {
1390  it = map.find(new_v->out_edges[i]);
1391  assert(it != map.end());
1392  new_v->out_edges[i] = it->second;
1393  }
1394  for (size_t i = 0; i < new_v->in_edges.size(); i++) {
1395  it = map.find(new_v->in_edges[i]);
1396  assert(it != map.end());
1397  new_v->in_edges[i] = it->second;
1398  }
1399  return in_mem_vertex::ptr(new_v);
1400  }
1401 
1402  virtual void remap(
1403  const std::unordered_map<vertex_id_t, vertex_id_t> &map) {
1404  std::unordered_map<vertex_id_t, vertex_id_t>::const_iterator it
1405  = map.find(this->id);
1406  assert(it != map.end());
1407  this->id = it->second;
1408  for (size_t i = 0; i < this->out_edges.size(); i++) {
1409  it = map.find(this->out_edges[i]);
1410  assert(it != map.end());
1411  this->out_edges[i] = it->second;
1412  }
1413  for (size_t i = 0; i < this->in_edges.size(); i++) {
1414  it = map.find(this->in_edges[i]);
1415  assert(it != map.end());
1416  this->in_edges[i] = it->second;
1417  }
1418  }
1419 
1420  void print() const {
1421  printf("v%ld has edge data: %d\n", (unsigned long) get_id(), has_edge_data());
1422  printf("There are %ld in-edges: ", in_edges.size());
1423  for (size_t i = 0; i < in_edges.size(); i++)
1424  printf("%ld, ", (unsigned long) in_edges[i]);
1425  printf("\n");
1426  printf("There are %ld out-edges: ", out_edges.size());
1427  for (size_t i = 0; i < out_edges.size(); i++)
1428  printf("%ld, ", (unsigned long) out_edges[i]);
1429  printf("\n");
1430  }
1431 };
1432 
1433 template<class edge_data_type = empty_data>
1434 class in_mem_undirected_vertex: public in_mem_vertex
1435 {
1436  bool has_data;
1437  vertex_id_t id;
1438  std::vector<vertex_id_t> edges;
1439  std::vector<edge_data_type> data_arr;
1440 public:
1441  in_mem_undirected_vertex(vertex_id_t id, bool has_data) {
1442  this->id = id;
1443  this->has_data = has_data;
1444  }
1445 
1446  in_mem_undirected_vertex(const page_undirected_vertex &vertex,
1447  bool has_data) {
1448  this->id = vertex.get_id();
1449  this->has_data = has_data;
1450  edges.resize(vertex.get_num_edges());
1451  vertex.read_edges(edge_type::IN_EDGE, edges.data(),
1452  vertex.get_num_edges());
1453  assert(!has_data);
1454  }
1455 
1456  vertex_id_t get_id() const {
1457  return id;
1458  }
1459 
1460  bool has_edge_data() const {
1461  return has_data;
1462  }
1463 
1464  size_t get_edge_data_size() const {
1465  return has_data ? sizeof(edge_data_type) : 0;
1466  }
1467 
1468  virtual void serialize_edges(vertex_id_t ids[], edge_type type) const {
1469  memcpy(ids, edges.data(), edges.size() * sizeof(ids[0]));
1470  }
1471 
1472  virtual void serialize_edge_data(char *data, edge_type type) const {
1473  memcpy(data, data_arr.data(), data_arr.size() * sizeof(edge_data_type));
1474  }
1475 
1476  /*
1477  * Add an edge to the vertex.
1478  * We allow to have multiple same edges, but all edges must be added
1479  * in a sorted order.
1480  */
1481  void add_edge(const edge<edge_data_type> &e) {
1482  assert(e.get_from() == id);
1483  if (!edges.empty())
1484  assert(e.get_to() >= edges.back());
1485  edges.push_back(e.get_to());
1486  if (has_edge_data())
1487  data_arr.push_back(e.get_data());
1488  }
1489 
1490  size_t get_num_edges(edge_type type = edge_type::IN_EDGE) const {
1491  return edges.size();
1492  }
1493 
1494  bool has_edge(vertex_id_t id) const {
1495  for (size_t i = 0; i < edges.size(); i++)
1496  if (edges[i] == id)
1497  return true;
1498  return false;
1499  }
1500 
1501  const edge<edge_data_type> get_edge(int idx) const {
1502  if (has_edge_data())
1503  return edge<edge_data_type>(id, edges[idx], data_arr[idx]);
1504  else
1505  return edge<edge_data_type>(id, edges[idx]);
1506  }
1507 
1508  size_t get_serialize_size(edge_type type) const {
1509  ext_mem_undirected_vertex v(0, edges.size(),
1510  has_data ? sizeof(edge_data_type) : 0);
1511  return v.get_size();
1512  }
1513 
1514  in_mem_vertex::ptr create_remapped_vertex(
1515  const std::unordered_map<vertex_id_t, vertex_id_t> &map) const {
1516  in_mem_undirected_vertex<edge_data_type> *new_v
1517  = new in_mem_undirected_vertex<edge_data_type>(*this);
1518  std::unordered_map<vertex_id_t, vertex_id_t>::const_iterator it
1519  = map.find(new_v->id);
1520  assert(it != map.end());
1521  new_v->id = it->second;
1522  for (size_t i = 0; i < new_v->edges.size(); i++) {
1523  it = map.find(new_v->edges[i]);
1524  assert(it != map.end());
1525  new_v->edges[i] = it->second;
1526  }
1527  return in_mem_vertex::ptr(new_v);
1528  }
1529 
1530  virtual void remap(
1531  const std::unordered_map<vertex_id_t, vertex_id_t> &map) {
1532  std::unordered_map<vertex_id_t, vertex_id_t>::const_iterator it
1533  = map.find(this->id);
1534  assert(it != map.end());
1535  this->id = it->second;
1536  for (size_t i = 0; i < this->edges.size(); i++) {
1537  it = map.find(this->edges[i]);
1538  assert(it != map.end());
1539  this->edges[i] = it->second;
1540  }
1541  }
1542 };
1543 
1544 }
1545 
1546 #endif
virtual edge_seq_iterator get_neigh_seq_it(edge_type type, size_t start=0, size_t end=-1) const =0
Get a java-style sequential const iterator that iterates the neighbors in the specified range...
virtual size_t get_num_edges(edge_type type) const =0
Get the number of edges connecting the vertex to othe vertices.
edge_seq_iterator get_neigh_seq_it(edge_type type, size_t start=0, size_t end=-1) const
Get a java-style sequential const iterator for the specified range in a vertex's neighbor list...
Definition: vertex.h:1115
size_t get_num_edges(edge_type type) const
Get the number of edges associated with a vertex.
Definition: vertex.h:816
unsigned int vsize_t
Basic data types used in FlashGraph.
Definition: FG_basic_types.h:33
virtual size_t read_edges(edge_type type, vertex_id_t edges[], size_t num) const
Read the edges of the specified type.
Definition: vertex.h:992
Definition: vertex.h:48
virtual edge_iterator get_neigh_end(edge_type type) const =0
Get an STL-style const iterator pointing to the end of a vertex's neighbor list.
Definition: cache.h:550
const_iterator< T > begin(off_t byte_off=0) const
Definition: cache.h:969
void memcpy(off_t rel_off, char buf[], size_t size) const
virtual bool is_directed() const
Whether the vertex is directed.
Definition: vertex.h:645
edge_type
Edge type of an edge in the graph.
Definition: vertex.h:43
edge_seq_iterator get_neigh_seq_it(edge_type type, size_t start=0, size_t end=-1) const
Get a java-style sequential const iterator that iterates the neighbors in the specified range...
Definition: vertex.h:877
vertex_id_t get_id() const
Get the id of the vertex.
Definition: vertex.h:1019
safs::page_byte_array::const_iterator< edge_data_type > get_data_begin(edge_type type) const
Get an STL-style const iterator pointing to the first element in the edge data list of a vertex...
Definition: vertex.h:910
edge_iterator get_neigh_begin(edge_type type) const
Get an STL-style const iterator pointing to the first element in the neighbor list of a vertex...
Definition: vertex.h:1084
edge_iterator get_neigh_end(edge_type type) const
Get an STL-style const iterator pointing to the end of the neighbor list of a vertex.
Definition: vertex.h:1097
safs::page_byte_array::seq_const_iterator< edge_data_type > get_data_seq_it(size_t start, size_t end) const
Get a java-style sequential const iterator for edge data with additional parameters to define the ran...
Definition: vertex.h:1151
safs::page_byte_array::seq_const_iterator< edge_data_type > get_data_seq_it(edge_type type) const
Get a java-style sequential const iterator that iterates the edge data list.
Definition: vertex.h:985
virtual vertex_id_t get_id() const =0
Get the vertex unique ID.
safs::page_byte_array::seq_const_iterator< edge_data_type > get_data_seq_it() const
Get a java-style sequential const iterator that iterates the edge data list.
Definition: vertex.h:1166
safs::page_byte_array::const_iterator< edge_data_type > get_data_end(edge_type type) const
Get an STL-style const iterator pointing to the end of the edge data list of a vertex.
Definition: vertex.h:937
This vertex class represents an undirected vertex in the page cache.
Definition: vertex.h:1041
Vertex representation when in the page cache.
Definition: vertex.h:575
Definition: vertex.h:731
virtual size_t read_edges(edge_type type, vertex_id_t edges[], size_t num) const
Read the edges of the specified type.
Definition: vertex.h:635
virtual offset_pair get_edge_list_offset(const timestamp_pair &range) const =0
This method should translate the timestamp range to the absolute location of the adjacency list in th...
size_t get_num_edges(edge_type type=edge_type::IN_EDGE) const
Get the number of edges of a specific edge_type associated with the vertex.
Definition: vertex.h:1072
Definition: vertex.h:667
virtual size_t get_size() const =0
Definition: vertex.h:255
virtual edge_iterator get_neigh_end(int timestamp, edge_type type) const =0
Get an STL-style const iterator pointing to the end of the neighbor list of a vertex at a specific ti...
Definition: vertex.h:45
edge_iterator get_neigh_end(edge_type type) const
Get an STL-style const iterator pointing to the end of a vertex's neighbor list.
Definition: vertex.h:860
Definition: vertex.h:47
Definition: vertex.h:336
virtual size_t get_num_edges() const =0
Get the global number of edges associated with a vertex.
virtual size_t read_edges(edge_type type, vertex_id_t edges[], size_t num) const
Read the edges of the specified type.
Definition: vertex.h:1134
Definition: vertex.h:46
safs::page_byte_array::seq_const_iterator< edge_data_type > get_data_seq_it(edge_type type, size_t start, size_t end) const
Get a java-style sequential const iterator for edge data with additional parameters to define the ran...
Definition: vertex.h:954
bool has_in_part() const
Determine whether the page vertex contains in-edges.
Definition: vertex.h:1026
vertex_id_t get_id() const
Get the vertex ID.
Definition: vertex.h:1177
seq_const_iterator< T > get_seq_iterator(off_t byte_off, off_t byte_end) const
Definition: cache.h:1007
virtual int get_num_timestamps() const =0
Get the number of time stamps the vertex has in the graph.
virtual edge_iterator get_neigh_begin(edge_type type) const =0
Get an STL-style const iterator pointing to the first neighbor in a vertex's neighbor list...
bool has_out_part() const
Determine whether the page vertex contains out-edges.
Definition: vertex.h:1033
virtual edge_iterator get_neigh_begin(int timestamp, edge_type type) const =0
Get an STL-style const iterator pointing to the first element in the neighbor list of a vertex at a s...
edge_iterator get_neigh_begin(edge_type type) const
Get an STL-style const iterator pointing to the first element in the neighbor list of a vertex...
Definition: vertex.h:837