FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
partitioner.h
1 #ifndef __GRAPH_PARTITIONER_H__
2 #define __GRAPH_PARTITIONER_H__
3 
4 /*
5  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
6  * Written by Da Zheng (zhengda1936@gmail.com)
7  *
8  * This file is part of FlashGraph.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include <math.h>
24 
25 #include <utility>
26 
27 #include "vertex.h"
28 
29 namespace fg
30 {
31 
37 {
38  vertex_id_t id;
39 
40  local_vid_t() {
41  id = INVALID_VERTEX_ID;
42  }
43 
44  explicit local_vid_t(vertex_id_t id) {
45  this->id = id;
46  }
47 };
48 
54 typedef std::pair<int, struct local_vid_t> vertex_loc_t;
55 
56 class graph_partitioner
57 {
58 public:
59  virtual int get_num_partitions() const = 0;
60  virtual int map(vertex_id_t id) const = 0;
61  virtual void map2loc(vertex_id_t id, int &part_id, off_t &off) const = 0;
62  virtual void map2loc(vertex_id_t ids[], int num,
63  std::vector<local_vid_t> locs[], int num_parts) const = 0;
64  virtual size_t map2loc(edge_seq_iterator &,
65  vertex_loc_t locs[], size_t size) const = 0;
66  virtual void map2loc(edge_seq_iterator &, std::vector<local_vid_t> locs[],
67  int num_parts) const = 0;
68  virtual void loc2map(int part_id, off_t off, vertex_id_t &id) const = 0;
69  virtual size_t get_all_vertices_in_part(int part_id,
70  size_t tot_num_vertices, std::vector<vertex_id_t> &ids) const = 0;
71  virtual size_t get_part_size(int part_id, size_t num_vertices) const = 0;
72 };
73 
74 class modulo_graph_partitioner: public graph_partitioner
75 {
76  int num_parts_log;
77  vertex_id_t mask;
78  int get_num_parts() const {
79  return 1 << num_parts_log;
80  }
81 public:
82  modulo_graph_partitioner(int num_parts) {
83  this->num_parts_log = log2(num_parts);
84  assert((1 << num_parts_log) == num_parts);
85  mask = (1 << num_parts_log) - 1;
86  }
87 
88  int get_num_partitions() const {
89  return 1 << num_parts_log;
90  }
91 
92  int map(vertex_id_t id) const {
93  return id & mask;
94  }
95 
96  void map2loc(vertex_id_t id, int &part_id, off_t &off) const {
97  part_id = id & mask;
98  off = id >> num_parts_log;
99  }
100  void map2loc(vertex_id_t ids[], int num,
101  std::vector<local_vid_t> locs[], int num_parts) const;
102  void map2loc(edge_seq_iterator &, std::vector<local_vid_t> locs[],
103  int num_parts) const;
104  size_t map2loc(edge_seq_iterator &,
105  vertex_loc_t locs[], size_t size) const;
106 
107  void loc2map(int part_id, off_t off, vertex_id_t &id) const {
108  id = (off << num_parts_log) + part_id;
109  }
110 
111  size_t get_all_vertices_in_part(int part_id, size_t tot_num_vertices,
112  std::vector<vertex_id_t> &ids) const;
113 
114  virtual size_t get_part_size(int part_id, size_t num_vertices) const {
115  int num_parts = 1 << num_parts_log;
116  return (num_vertices - part_id + num_parts - 1) / num_parts;
117  }
118 };
119 
120 class range_graph_partitioner: public graph_partitioner
121 {
122  const int RANGE_SIZE_LOG;
123  const int RANGE_SIZE;
124  const unsigned long RANGE_MASK;
125 
126  const int num_parts_log;
127  const vertex_id_t mask;
128 
129  int get_num_parts() const {
130  return 1 << num_parts_log;
131  }
132 
133  vertex_id_t get_part_end(int part_id, size_t num_vertices) const;
134 public:
135  range_graph_partitioner(int num_parts);
136 
137  int get_num_partitions() const {
138  return 1 << num_parts_log;
139  }
140 
141  virtual int map(vertex_id_t id) const {
142  return (id >> RANGE_SIZE_LOG) & mask;
143  }
144 
145  virtual void map2loc(vertex_id_t id, int &part_id, off_t &off) const {
146  vertex_id_t shifted_id = id >> RANGE_SIZE_LOG;
147  part_id = shifted_id & mask;
148  off = ((shifted_id >> num_parts_log) << RANGE_SIZE_LOG) + (id & RANGE_MASK);
149  }
150 
151  virtual void map2loc(vertex_id_t ids[], int num,
152  std::vector<local_vid_t> locs[], int num_parts) const;
153  virtual void map2loc(edge_seq_iterator &, std::vector<local_vid_t> locs[],
154  int num_parts) const;
155  virtual size_t map2loc(edge_seq_iterator &,
156  vertex_loc_t locs[], size_t num) const;
157 
158  virtual void loc2map(int part_id, off_t off, vertex_id_t &id) const {
159  off_t local_range_id = off >> RANGE_SIZE_LOG;
160  off_t in_range_off = off & RANGE_MASK;
161  id = (local_range_id << (num_parts_log + RANGE_SIZE_LOG))
162  + (part_id << RANGE_SIZE_LOG) + in_range_off;
163  }
164 
165  virtual size_t get_all_vertices_in_part(int part_id,
166  size_t tot_num_vertices, std::vector<vertex_id_t> &ids) const;
167 
168  virtual size_t get_part_size(int part_id, size_t num_vertices) const {
169  vertex_id_t vid_end = get_part_end(part_id, num_vertices);
170  size_t num_local_ranges = vid_end >> (RANGE_SIZE_LOG + num_parts_log);
171  size_t num_vertices_last_range = vid_end
172  - (num_local_ranges << (RANGE_SIZE_LOG + num_parts_log))
173  - (part_id << RANGE_SIZE_LOG);
174  num_vertices_last_range = max(0, num_vertices_last_range);
175  return (num_local_ranges << RANGE_SIZE_LOG) + num_vertices_last_range;
176  }
177 };
178 
179 }
180 
181 #endif
std::pair< int, struct local_vid_t > vertex_loc_t
Definition: partitioner.h:54
Definition: partitioner.h:36