FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
vertex_index.h
1 #ifndef __VERTEX_INDEX_H__
2 #define __VERTEX_INDEX_H__
3 
4 /*
5  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
6  * Written by Da Zheng (zhengda1936@gmail.com)
7  *
8  * This file is part of FlashGraph.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include <string.h>
24 
25 #include <string>
26 #include <unordered_map>
27 #include <boost/format.hpp>
28 
29 #include "native_file.h"
30 
31 #include "vertex.h"
32 #include "graph_file_header.h"
33 
34 namespace fg
35 {
36 
37 class vertex_index
38 {
39 protected:
40  union {
41  struct {
42  struct graph_header_struct header;
43  size_t entry_size;
44  size_t num_entries;
45  off_t out_part_loc;
46 
47  // These are used for compressed vertx index.
48  bool compressed;
49  size_t num_large_in_vertices;
50  size_t num_large_out_vertices;
51  } data;
52  char page[graph_header::HEADER_SIZE];
53  } h;
54 
55  vertex_index(size_t entry_size) {
56  assert(sizeof(*this) == graph_header::HEADER_SIZE);
57  memset(this, 0, sizeof(*this));
58  graph_header::init(h.data.header);
59  h.data.entry_size = entry_size;
60  h.data.num_entries = 0;
61  h.data.out_part_loc = 0;
62 
63  h.data.compressed = false;
64  h.data.num_large_in_vertices = 0;
65  h.data.num_large_out_vertices = 0;
66  }
67 
68  class destroy_index
69  {
70  public:
71  void operator()(vertex_index *index) {
72  free(index);
73  }
74  };
75 
76 public:
77  typedef std::shared_ptr<vertex_index> ptr;
78 
79  /*
80  * Load the vertex index from SAFS.
81  */
82  static vertex_index::ptr safs_load(const std::string &index_file);
83  /*
84  * Load the vertex index from the Linux filesystem.
85  */
86  static vertex_index::ptr load(const std::string &index_file);
87 
88  static size_t get_header_size() {
89  return sizeof(vertex_index);
90  }
91 
92  const graph_header &get_graph_header() const {
93  return (const graph_header &) *this;
94  }
95 
96  size_t get_num_vertices() const {
97  return h.data.header.num_vertices;
98  }
99 
100  size_t get_num_entries() const {
101  return h.data.num_entries;
102  }
103 
104  vertex_id_t get_max_id() const {
105  return get_num_vertices() - 1;
106  }
107 
108  size_t get_index_size() const;
109 
110  off_t get_out_part_loc() const {
111  return h.data.out_part_loc;
112  }
113 
114  bool is_compressed() const {
115  return h.data.compressed;
116  }
117 
118  void dump(const std::string &file) const {
119  FILE *f = fopen(file.c_str(), "w");
120  if (f == NULL) {
121  perror("fopen");
122  abort();
123  }
124  BOOST_VERIFY(fwrite(this, vertex_index::get_index_size(), 1, f));
125 
126  fclose(f);
127  }
128 };
129 
130 /*
131  * This vertex index maps a vertex id to the location of the vertex in a file.
132  */
133 template <class vertex_entry_type>
134 class vertex_index_temp: public vertex_index
135 {
136  vertex_entry_type vertices[1];
137 
138 protected:
139  vertex_index_temp(const graph_header &header): vertex_index(
140  sizeof(vertex_entry_type)) {
141  memcpy(this, &header, sizeof(h.data.header));
142  h.data.num_entries = 1;
143  vertices[0] = vertex_entry_type(sizeof(graph_header));
144  }
145 public:
146  typedef std::shared_ptr<vertex_index_temp<vertex_entry_type> > ptr;
147 
148  static typename vertex_index_temp<vertex_entry_type>::ptr cast(
149  vertex_index::ptr index) {
150  return std::static_pointer_cast<vertex_index_temp<vertex_entry_type>,
151  vertex_index>(index);
152  }
153 
154  static vertex_index::ptr create(const graph_header &header,
155  const std::vector<vertex_entry_type> &vertices) {
156  char *buf = (char *) malloc(vertex_index::get_header_size()
157  + vertices.size() * sizeof(vertices[0]));
158  vertex_index_temp<vertex_entry_type> *index
159  = new (buf) vertex_index_temp<vertex_entry_type>(header);
160  index->h.data.num_entries = vertices.size();
161  assert(header.get_num_vertices() + 1 == vertices.size());
162  memcpy(buf + vertex_index::get_header_size(), vertices.data(),
163  vertices.size() * sizeof(vertices[0]));
164  return vertex_index::ptr(index, destroy_index());
165  }
166 
167  static void dump(const std::string &file, const graph_header &header,
168  const std::vector<vertex_entry_type> &vertices) {
169  vertex_index_temp<vertex_entry_type> index(header);
170  index.h.data.num_entries = vertices.size();
171  assert(header.get_num_vertices() + 1 == vertices.size());
172  FILE *f = fopen(file.c_str(), "w");
173  if (f == NULL)
174  ABORT_MSG(boost::format("fail to open %1%: %2%")
175  % file % strerror(errno));
176 
177  BOOST_VERIFY(fwrite(&index, vertex_index::get_header_size(), 1, f));
178  BOOST_VERIFY(fwrite(vertices.data(),
179  vertices.size() * sizeof(vertices[0]), 1, f));
180 
181  fclose(f);
182  }
183 
184  const vertex_entry_type &get_vertex(vertex_id_t id) const {
185  assert(id < h.data.num_entries);
186  return vertices[id];
187  }
188 
189  vertex_entry_type *get_data() {
190  return vertices;
191  }
192 
193  const vertex_entry_type *get_data() const {
194  return vertices;
195  }
196 
197  size_t cal_index_size() const {
198  return sizeof(vertex_index)
199  + h.data.num_entries * h.data.entry_size;
200  }
201 
202  void verify() const {
203  assert(h.data.entry_size == sizeof(vertex_entry_type));
204  assert(h.data.header.num_vertices + 1 == h.data.num_entries);
205  }
206 };
207 
208 class vertex_offset
209 {
210  off_t off;
211 public:
212  vertex_offset() {
213  off = 0;
214  }
215 
216  vertex_offset(off_t off) {
217  this->off = off;
218  }
219 
220  void init(off_t off) {
221  this->off = off;
222  }
223 
224  void init(const vertex_offset &prev, const in_mem_vertex &v) {
225  this->off = prev.off + v.get_serialize_size(edge_type::IN_EDGE);
226  }
227 
228  off_t get_off() const {
229  return off;
230  }
231 };
232 
233 class undirected_vertex_index: public vertex_index_temp<vertex_offset>
234 {
235 public:
236  typedef std::shared_ptr<undirected_vertex_index> ptr;
237 
238  static undirected_vertex_index::ptr cast(vertex_index::ptr index) {
239  assert(!index->is_compressed());
240  assert(!index->get_graph_header().is_directed_graph());
241  return std::static_pointer_cast<undirected_vertex_index, vertex_index>(
242  index);
243  }
244 
245  static undirected_vertex_index::ptr load(const std::string &index_file) {
246  undirected_vertex_index::ptr ret
247  = cast(vertex_index_temp<vertex_offset>::load(index_file));
248  ret->verify();
249  return ret;
250  }
251 
252  void verify() const {
253  vertex_index_temp<vertex_offset>::verify();
254  // Right now all other types of graphs use the default vertex index.
255  assert(get_graph_header().get_graph_type() != graph_type::DIRECTED);
256  assert(get_vertex(0).get_off() == sizeof(graph_header));
257  assert(h.data.out_part_loc == 0);
258  }
259 
260  size_t get_graph_size() const {
261  off_t last_idx = h.data.num_entries - 1;
262  assert((size_t) last_idx == h.data.header.num_vertices);
263  return get_vertex(last_idx).get_off();
264  }
265 
266  ext_mem_vertex_info get_vertex_info(vertex_id_t id) const {
267  off_t next_off = get_vertex(id + 1).get_off();
268  off_t off = get_vertex(id).get_off();
269  return ext_mem_vertex_info(id, off, next_off - off);
270  }
271 };
272 
273 class directed_vertex_entry
274 {
275  off_t in_off;
276  off_t out_off;
277 public:
278  directed_vertex_entry() {
279  in_off = 0;
280  out_off = 0;
281  }
282 
283  // When the vertex index is constructed, we assume in-part and out-part
284  // of vertices are stored in two separate files.
285  directed_vertex_entry(off_t off) {
286  in_off = off;
287  out_off = off;
288  }
289 
290  directed_vertex_entry(off_t in_off, off_t out_off) {
291  this->in_off = in_off;
292  this->out_off = out_off;
293  }
294 
295  void init(const directed_vertex_entry &prev, const in_mem_vertex &v) {
296  this->in_off = prev.in_off + v.get_serialize_size(IN_EDGE);
297  this->out_off = prev.out_off + v.get_serialize_size(OUT_EDGE);
298  }
299 
300  off_t get_in_off() const {
301  return in_off;
302  }
303 
304  off_t get_out_off() const {
305  return out_off;
306  }
307 };
308 
309 class directed_vertex_index: public vertex_index_temp<directed_vertex_entry>
310 {
311  directed_vertex_index(const graph_header &header): vertex_index_temp<directed_vertex_entry>(header) {
312  }
313 public:
314  typedef std::shared_ptr<directed_vertex_index> ptr;
315 
316  static directed_vertex_index::ptr cast(vertex_index::ptr index) {
317  assert(!index->is_compressed());
318  assert(index->get_graph_header().is_directed_graph());
319  return std::static_pointer_cast<directed_vertex_index, vertex_index>(
320  index);
321  }
322 
323  static directed_vertex_index::ptr load(const std::string &index_file) {
324  directed_vertex_index::ptr ret
325  = cast(vertex_index_temp<directed_vertex_entry>::load(index_file));
326  ret->verify();
327  return ret;
328  }
329 
330  static vertex_index::ptr create(const graph_header &header,
331  const std::vector<directed_vertex_entry> &vertices) {
332  char *buf = (char *) malloc(vertex_index::get_header_size()
333  + vertices.size() * sizeof(vertices[0]));
334  directed_vertex_index *index = new (buf) directed_vertex_index(header);
335  index->h.data.num_entries = vertices.size();
336  index->h.data.out_part_loc = vertices.front().get_out_off();
337  assert(header.get_num_vertices() + 1 == vertices.size());
338  memcpy(buf + vertex_index::get_header_size(), vertices.data(),
339  vertices.size() * sizeof(vertices[0]));
340  return vertex_index::ptr(index, destroy_index());
341  }
342 
343  static void dump(const std::string &file, const graph_header &header,
344  const std::vector<directed_vertex_entry> &vertices) {
345  directed_vertex_index index(header);
346  index.h.data.num_entries = vertices.size();
347  index.h.data.out_part_loc = vertices.front().get_out_off();
348  assert(header.get_num_vertices() + 1 == vertices.size());
349  FILE *f = fopen(file.c_str(), "w");
350  if (f == NULL)
351  ABORT_MSG(boost::format("fail to open %1%: %2%")
352  % file % strerror(errno));
353 
354  BOOST_VERIFY(fwrite(&index, vertex_index::get_header_size(), 1, f));
355  BOOST_VERIFY(fwrite(vertices.data(),
356  vertices.size() * sizeof(vertices[0]), 1, f));
357 
358  fclose(f);
359  }
360 
361  void verify() const {
362  vertex_index_temp<directed_vertex_entry>::verify();
363  TEST(get_graph_header().get_graph_type() == graph_type::DIRECTED);
364  TEST(get_vertex(0).get_in_off() == sizeof(graph_header));
365  // All out-part of vertices are stored behind the in-part of vertices.
366  TEST(get_vertex(0).get_out_off()
367  == get_vertex(get_num_vertices()).get_in_off());
368  TEST(h.data.out_part_loc == get_vertex(0).get_out_off());
369  }
370 
371  size_t get_graph_size() const {
372  off_t last_idx = h.data.num_entries - 1;
373  assert((size_t) last_idx == h.data.header.num_vertices);
374  return get_vertex(last_idx).get_out_off();
375  }
376 
377  ext_mem_vertex_info get_vertex_info_in(vertex_id_t id) const {
378  off_t next_off = get_vertex(id + 1).get_in_off();
379  off_t off = get_vertex(id).get_in_off();
380  return ext_mem_vertex_info(id, off, next_off - off);
381  }
382 
383  ext_mem_vertex_info get_vertex_info_out(vertex_id_t id) const {
384  off_t next_off = get_vertex(id + 1).get_out_off();
385  off_t off = get_vertex(id).get_out_off();
386  return ext_mem_vertex_info(id, off, next_off - off);
387  }
388 };
389 
390 /*
391  * A compressed vertex index groups multiple vertices into a single data
392  * structure, called compressed vertex entry. Each entry contains the location
393  * of the first vertex in the entry and the number of edges of each vertex
394  * in the entry.
395  */
396 
397 class compressed_vertex_entry
398 {
399 public:
400  typedef unsigned char edge_size_t;
401  static const size_t ENTRY_SIZE = 32;
402  static const size_t LARGE_VERTEX_SIZE
403  = std::numeric_limits<edge_size_t>::max();
404 };
405 
406 /*
407  * This defines the compressed vertex entry for a directed graph.
408  */
409 class compressed_directed_vertex_entry: public compressed_vertex_entry
410 {
411  typedef std::pair<edge_size_t, edge_size_t> edge_pair_t;
412  directed_vertex_entry start_offs;
413  edge_pair_t edges[ENTRY_SIZE];
414 public:
415  compressed_directed_vertex_entry() {
416  memset(this, 0, sizeof(*this));
417  }
418 
419  compressed_directed_vertex_entry(const directed_vertex_entry offs[],
420  size_t edge_data_size, size_t num);
421  compressed_directed_vertex_entry(const directed_vertex_entry offs,
422  const vsize_t num_in_edges[], const vsize_t num_out_edges[],
423  size_t num);
424 
425  void reset_start_offs(off_t in_off, off_t out_off) {
426  start_offs = directed_vertex_entry(in_off, out_off);
427  }
428 
429  vsize_t get_num_in_edges(int idx) const {
430  return edges[idx].first;
431  }
432 
433  vsize_t get_num_out_edges(int idx) const {
434  return edges[idx].second;
435  }
436 
437  bool is_large_in_vertex(int idx) const {
438  return edges[idx].first == LARGE_VERTEX_SIZE;
439  }
440 
441  bool is_large_out_vertex(int idx) const {
442  return edges[idx].second == LARGE_VERTEX_SIZE;
443  }
444 
445  off_t get_start_in_off() const {
446  return start_offs.get_in_off();
447  }
448 
449  off_t get_start_out_off() const {
450  return start_offs.get_out_off();
451  }
452 
453  const directed_vertex_entry &get_start_offs() const {
454  return start_offs;
455  }
456 };
457 
458 /*
459  * This defines the compressed vertex entry for an undirected graph.
460  */
461 class compressed_undirected_vertex_entry: public compressed_vertex_entry
462 {
463  vertex_offset start_offs;
464  edge_size_t edges[ENTRY_SIZE];
465 public:
466  compressed_undirected_vertex_entry() {
467  memset(this, 0, sizeof(*this));
468  }
469 
470  compressed_undirected_vertex_entry(const vertex_offset offs[],
471  size_t edge_data_size, size_t num);
472  compressed_undirected_vertex_entry(const vertex_offset off,
473  const vsize_t num_edges[], size_t num);
474 
475  vsize_t get_num_edges(int idx) const {
476  return edges[idx];
477  }
478 
479  bool is_large_vertex(int idx) const {
480  return edges[idx] == LARGE_VERTEX_SIZE;
481  }
482 
483  off_t get_start_off() const {
484  return start_offs.get_off();
485  }
486 
487  const vertex_offset &get_start_offs() const {
488  return start_offs;
489  }
490 };
491 
492 /*
493  * This class defines the data layout of a compressed vertex index for
494  * an undirected graph in the external memory. It's not used to answer
495  * queries in memory. The data layout of the index is
496  * header,
497  * vertex entries,
498  * large vertices.
499  */
500 typedef std::pair<vertex_id_t, vsize_t> large_vertex_t;
501 class cundirected_vertex_index: public vertex_index
502 {
503 public:
504  typedef compressed_undirected_vertex_entry entry_type;
505 private:
506  static const size_t ENTRY_SIZE = entry_type::ENTRY_SIZE;
507  entry_type entries[0];
508 
509  large_vertex_t *get_large_vertices() {
510  return (large_vertex_t *) &entries[h.data.num_entries];
511  }
512 public:
513  typedef std::shared_ptr<cundirected_vertex_index> ptr;
514 
515  static typename cundirected_vertex_index::ptr cast(vertex_index::ptr index) {
516  assert(index->is_compressed());
517  assert(!index->get_graph_header().is_directed_graph());
518  return std::static_pointer_cast<cundirected_vertex_index, vertex_index>(
519  index);
520  }
521 
522  static ptr construct(undirected_vertex_index &index);
523  static ptr construct(const std::vector<entry_type> &entries,
524  const std::vector<large_vertex_t> &large_vertices,
525  const graph_header &header);
526  static ptr construct(size_t num_vertices, const vsize_t num_out_edges[],
527  const graph_header &header);
528 
529  const entry_type *get_entries() const {
530  return entries;
531  }
532 
533  const large_vertex_t *get_large_vertices() const {
534  return (const large_vertex_t *) &entries[h.data.num_entries];
535  }
536 
537  size_t cal_index_size() const {
538  return sizeof(cundirected_vertex_index)
539  + sizeof(entries[0]) * h.data.num_entries
540  // We use num_large_in_vertices to store the number of large
541  // undirected vertices.
542  + sizeof(large_vertex_t) * h.data.num_large_in_vertices;
543  }
544 
545  size_t get_num_large_vertices() const {
546  return h.data.num_large_in_vertices;
547  }
548 
549  void verify() const {
550  // We use num_large_in_vertices to store the number of large
551  // undirected vertices and set num_large_out_vertices to 0.
552  assert(h.data.num_large_out_vertices == 0);
553  assert(h.data.entry_size == sizeof(entry_type));
554  assert(ROUNDUP(h.data.header.num_vertices, ENTRY_SIZE) / ENTRY_SIZE
555  == h.data.num_entries);
556  }
557 };
558 
559 /*
560  * This class defines the data layout of a compressed vertex index for
561  * a directed graph in the external memory. It's not used to answer
562  * queries in memory. The data layout of the index is
563  * header,
564  * vertex entries,
565  * large in-vertices,
566  * large out-vertices.
567  */
568 class cdirected_vertex_index: public vertex_index
569 {
570 public:
571  typedef compressed_directed_vertex_entry entry_type;
572 private:
573  static const size_t ENTRY_SIZE = entry_type::ENTRY_SIZE;
574  entry_type entries[0];
575 
576  large_vertex_t *get_large_in_vertices() {
577  return (large_vertex_t *) &entries[h.data.num_entries];
578  }
579 
580  large_vertex_t *get_large_out_vertices() {
581  return get_large_in_vertices() + h.data.num_large_in_vertices;
582  }
583 public:
584  typedef std::shared_ptr<cdirected_vertex_index> ptr;
585 
586  static typename cdirected_vertex_index::ptr cast(vertex_index::ptr index) {
587  assert(index->is_compressed());
588  assert(index->get_graph_header().is_directed_graph());
589  return std::static_pointer_cast<cdirected_vertex_index, vertex_index>(
590  index);
591  }
592 
593  static ptr construct(directed_vertex_index &index);
594  static ptr construct(const std::vector<entry_type> &entries,
595  const std::vector<large_vertex_t> &large_in_vertices,
596  const std::vector<large_vertex_t> &large_out_vertices,
597  const graph_header &header);
598  static ptr construct(size_t num_vertices, const vsize_t num_in_edges[],
599  const vsize_t num_out_edges[], const graph_header &header);
600 
601  const entry_type *get_entries() const {
602  return entries;
603  }
604 
605  const large_vertex_t *get_large_in_vertices() const {
606  return (const large_vertex_t *) &entries[h.data.num_entries];
607  }
608 
609  const large_vertex_t *get_large_out_vertices() const {
610  return get_large_in_vertices() + h.data.num_large_in_vertices;
611  }
612 
613  size_t cal_index_size() const {
614  return sizeof(cdirected_vertex_index)
615  + sizeof(entries[0]) * h.data.num_entries
616  + sizeof(large_vertex_t) * h.data.num_large_in_vertices
617  + sizeof(large_vertex_t) * h.data.num_large_out_vertices;
618  }
619 
620  size_t get_num_large_in_vertices() const {
621  return h.data.num_large_in_vertices;
622  }
623 
624  size_t get_num_large_out_vertices() const {
625  return h.data.num_large_out_vertices;
626  }
627 
628  void verify() const {
629  assert(h.data.entry_size == sizeof(entry_type));
630  assert(ROUNDUP(h.data.header.num_vertices, ENTRY_SIZE) / ENTRY_SIZE
631  == h.data.num_entries);
632  }
633 };
634 
635 /*
636  * This defines the interface that other parts of FlashGraph can query
637  * the in-memory vertex index.
638  */
639 class in_mem_query_vertex_index
640 {
641  bool directed;
642  bool compressed;
643 protected:
644  in_mem_query_vertex_index(bool directed, bool compressed) {
645  this->directed = directed;
646  this->compressed = compressed;
647  }
648 public:
649  typedef std::shared_ptr<in_mem_query_vertex_index> ptr;
650  static ptr create(vertex_index::ptr index, bool compress);
651 
652  bool is_directed() const {
653  return directed;
654  }
655 
656  bool is_compressed() const {
657  return compressed;
658  }
659 
660  virtual vsize_t get_num_edges(vertex_id_t id, edge_type type) const = 0;
661  virtual vertex_index::ptr get_raw_index() const = 0;
662 };
663 
664 /*
665  * This is the in-memory compressed vertex index for an undirected graph.
666  * This is used to answer queries from other parts of FlashGraph.
667  */
668 class in_mem_cundirected_vertex_index: public in_mem_query_vertex_index
669 {
670  static const size_t ENTRY_SIZE = compressed_undirected_vertex_entry::ENTRY_SIZE;
671  static const size_t ENTRY_MASK = ENTRY_SIZE - 1;
672 
673  typedef std::unordered_map<vertex_id_t, vsize_t> vertex_map_t;
674 
675  size_t num_vertices;
676  size_t edge_data_size;
677  vertex_map_t large_vmap;
678  std::vector<compressed_undirected_vertex_entry> entries;
679 
680  in_mem_cundirected_vertex_index(vertex_index &index);
681 
682  void init(const undirected_vertex_index &index);
683  void init(const cundirected_vertex_index &index);
684 public:
685  typedef std::shared_ptr<in_mem_cundirected_vertex_index> ptr;
686 
687  static ptr cast(in_mem_query_vertex_index::ptr index) {
688  assert(index->is_compressed());
689  assert(!index->is_directed());
690  return std::static_pointer_cast<in_mem_cundirected_vertex_index,
691  in_mem_query_vertex_index>(index);
692  }
693 
694  static ptr create(vertex_index &index) {
695  return ptr(new in_mem_cundirected_vertex_index(index));
696  }
697 
698  size_t get_num_vertices() const {
699  return num_vertices;
700  }
701 
702  vsize_t get_num_edges(vertex_id_t id, edge_type type) const {
703  size_t entry_idx = id / ENTRY_SIZE;
704  vsize_t num_edges = entries[entry_idx].get_num_edges(id % ENTRY_SIZE);
705  if (num_edges >= compressed_undirected_vertex_entry::LARGE_VERTEX_SIZE) {
706  vertex_map_t::const_iterator it = large_vmap.find(id);
707  assert(it != large_vmap.end());
708  num_edges = it->second;
709  }
710  return num_edges;
711  }
712 
713  vertex_index::ptr get_raw_index() const {
714  return vertex_index::ptr();
715  }
716 
717  size_t get_size(vertex_id_t id) const {
718  vsize_t num_edges = get_num_edges(id, edge_type::IN_EDGE);
719  return ext_mem_undirected_vertex::num_edges2vsize(num_edges,
720  edge_data_size);
721  }
722 
723  vertex_offset get_vertex(vertex_id_t id) const;
724 
725  void verify_against(undirected_vertex_index &index);
726 };
727 
728 /*
729  * This is the in-memory compressed vertex index for a directed graph.
730  * This is used to answer queries from other parts of FlashGraph.
731  */
732 class in_mem_cdirected_vertex_index: public in_mem_query_vertex_index
733 {
734  static const size_t ENTRY_SIZE = compressed_directed_vertex_entry::ENTRY_SIZE;
735  static const size_t ENTRY_MASK = ENTRY_SIZE - 1;
736 
737  typedef std::unordered_map<vertex_id_t, vsize_t> vertex_map_t;
738 
739  size_t num_vertices;
740  size_t edge_data_size;
741  vertex_map_t large_in_vmap;
742  vertex_map_t large_out_vmap;
743  std::vector<compressed_directed_vertex_entry> entries;
744 
745  in_mem_cdirected_vertex_index(vertex_index &index);
746 
747  void init(const directed_vertex_index &index);
748  void init(const cdirected_vertex_index &index);
749 public:
750  typedef std::shared_ptr<in_mem_cdirected_vertex_index> ptr;
751 
752  static ptr cast(in_mem_query_vertex_index::ptr index) {
753  assert(index->is_compressed());
754  assert(index->is_directed());
755  return std::static_pointer_cast<in_mem_cdirected_vertex_index,
756  in_mem_query_vertex_index>(index);
757  }
758 
759  static ptr create(vertex_index &index) {
760  return ptr(new in_mem_cdirected_vertex_index(index));
761  }
762 
763  size_t get_num_vertices() const {
764  return num_vertices;
765  }
766 
767  vsize_t get_num_in_edges(vertex_id_t id) const {
768  size_t entry_idx = id / ENTRY_SIZE;
769  vsize_t num_edges = entries[entry_idx].get_num_in_edges(id % ENTRY_SIZE);
770  if (num_edges >= compressed_directed_vertex_entry::LARGE_VERTEX_SIZE) {
771  vertex_map_t::const_iterator it = large_in_vmap.find(id);
772  assert(it != large_in_vmap.end());
773  num_edges = it->second;
774  }
775  return num_edges;
776  }
777 
778  vsize_t get_num_out_edges(vertex_id_t id) const {
779  size_t entry_idx = id / ENTRY_SIZE;
780  vsize_t num_edges = entries[entry_idx].get_num_out_edges(id % ENTRY_SIZE);
781  if (num_edges >= compressed_directed_vertex_entry::LARGE_VERTEX_SIZE) {
782  vertex_map_t::const_iterator it = large_out_vmap.find(id);
783  assert(it != large_out_vmap.end());
784  num_edges = it->second;
785  }
786  return num_edges;
787  }
788 
789  virtual vsize_t get_num_edges(vertex_id_t id, edge_type type) const {
790  switch (type) {
791  case edge_type::IN_EDGE:
792  return get_num_in_edges(id);
793  case edge_type::OUT_EDGE:
794  return get_num_out_edges(id);
795  case edge_type::BOTH_EDGES:
796  return get_num_in_edges(id) + get_num_out_edges(id);
797  default:
798  ABORT_MSG("wrong edge type");
799  }
800  }
801 
802  vertex_index::ptr get_raw_index() const {
803  return vertex_index::ptr();
804  }
805 
806  size_t get_in_size(vertex_id_t id) const {
807  vsize_t num_edges = get_num_in_edges(id);
808  return ext_mem_undirected_vertex::num_edges2vsize(num_edges,
809  edge_data_size);
810  }
811 
812  size_t get_out_size(vertex_id_t id) const {
813  vsize_t num_edges = get_num_out_edges(id);
814  return ext_mem_undirected_vertex::num_edges2vsize(num_edges,
815  edge_data_size);
816  }
817 
818  directed_vertex_entry get_vertex(vertex_id_t id) const;
819 
820  void verify_against(directed_vertex_index &index);
821 };
822 
823 static inline int get_index_entry_size(graph_type type)
824 {
825  switch (type) {
826  case graph_type::UNDIRECTED:
827  return sizeof(vertex_offset);
828  case graph_type::DIRECTED:
829  return sizeof(directed_vertex_entry);
830  default:
831  ABORT_MSG("wrong graph type");
832  }
833 }
834 
835 }
836 
837 #endif
unsigned int vsize_t
Basic data types used in FlashGraph.
Definition: FG_basic_types.h:33
edge_type
Edge type of an edge in the graph.
Definition: vertex.h:43
Definition: vertex.h:45
Definition: vertex.h:46