FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
bitmap.h
1 /*
2  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
3  * Written by Da Zheng (zhengda1936@gmail.com)
4  *
5  * This file is part of FlashGraph.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  */
19 
20 #ifndef __MY_BITMAP_H__
21 #define __MY_BITMAP_H__
22 
23 #include <stdlib.h>
24 
25 #include <vector>
26 #include <atomic>
27 
28 #include "common.h"
29 
30 static const int NUM_BITS_LONG = sizeof(long) * 8;
31 
32 /*
33  * The functionality of this bitmap is very similar to std::vector<bool>
34  * in STL. But this one is optimized for extra operations such as merge
35  * and iterating on true values or on false values.
36  */
37 class bitmap
38 {
39  long num_set_bits;
40  size_t max_num_bits;
41  long *ptr;
42 
43  template<class T>
44  static void get_set_bits_long(long value, size_t idx, std::vector<T> &v) {
45  for (int i = 0; i < NUM_BITS_LONG; i++) {
46  if (value & (1L << i))
47  v.push_back(i + idx * NUM_BITS_LONG);
48  }
49  }
50 public:
51 #if 0
52  class const_iterator
53  {
54  size_t idx;
55  long *ptr;
56  public:
57  const_iterator(long *ptr) {
58  this->ptr = ptr;
59  idx = 0;
60  }
61 
62  bool operator*() const {
63  }
64 
65  const_iterator &operator++() {
66  }
67 
68  bool operator==(const const_iterator &it) const {
69  return this->idx == it.idx;
70  }
71 
72  bool operator!=(const const_iterator &it) const {
73  return this->idx != it.idx;
74  }
75 
76  const_iterator &operator+=(int num) {
77  idx += num;
78  return *this;
79  }
80  };
81 #endif
82 
83  bitmap(size_t max_num_bits, int node_id) {
84  this->max_num_bits = max_num_bits;
85  this->num_set_bits = 0;
86  if (max_num_bits > 0) {
87  size_t num_longs = get_num_longs();
88  ptr = (long *) malloc_large(num_longs * sizeof(ptr[0]));
89  memset(ptr, 0, sizeof(ptr[0]) * num_longs);
90  }
91  else
92  ptr = NULL;
93  }
94 
95  ~bitmap() {
96  if (ptr)
97  free_large(ptr, get_num_longs() * sizeof(ptr[0]));
98  }
99 
100  size_t get_num_longs() const {
101  return ROUNDUP(max_num_bits, NUM_BITS_LONG) / NUM_BITS_LONG;
102  }
103 
104  size_t get_num_bits() const {
105  return max_num_bits;
106  }
107 
108  size_t get_num_set_bits() const {
109  return num_set_bits;
110  }
111 
112  void set_all() {
113  if (max_num_bits > 0) {
114  memset(ptr, 0xff, sizeof(ptr[0]) * (get_num_longs() - 1));
115  num_set_bits = NUM_BITS_LONG * (get_num_longs() - 1);
116  for (size_t i = num_set_bits; i < max_num_bits; i++)
117  set(i);
118  }
119  }
120 
121  void reset(size_t idx) {
122  assert(idx < max_num_bits);
123  size_t arr_off = idx / NUM_BITS_LONG;
124  size_t inside_off = idx % NUM_BITS_LONG;
125  // If the bit has beeen set, we need to decrease the count.
126  if ((ptr[arr_off] & (1L << inside_off))) {
127  num_set_bits--;
128  ptr[arr_off] &= ~(1L << inside_off);
129  }
130  }
131 
132  void set(size_t idx) {
133  assert(idx < max_num_bits);
134  size_t arr_off = idx / NUM_BITS_LONG;
135  size_t inside_off = idx % NUM_BITS_LONG;
136  // If the bit hasn't been set, we now need to increase the count.
137  if (!(ptr[arr_off] & (1L << inside_off))) {
138  num_set_bits++;
139  ptr[arr_off] |= (1L << inside_off);
140  }
141  }
142 
143  bool get(size_t idx) const {
144  assert(idx < max_num_bits);
145  size_t arr_off = idx / NUM_BITS_LONG;
146  size_t inside_off = idx % NUM_BITS_LONG;
147  return ptr[arr_off] & (1L << inside_off);
148  }
149 
150  void clear() {
151  memset(ptr, 0, sizeof(ptr[0]) * get_num_longs());
152  num_set_bits = 0;
153  }
154 
155  /*
156  * This method collects all bits that have been set to 1.
157  */
158  template<class T>
159  size_t get_set_bits(std::vector<T> &v) const {
160  size_t size = get_num_longs();
161  for (size_t i = 0; i < size; i++) {
162  if (ptr[i])
163  get_set_bits_long(ptr[i], i, v);
164  }
165  assert(v.size() == (size_t) num_set_bits);
166  return v.size();
167  }
168 
169  template<class T>
170  size_t get_reset_set_bits(std::vector<T> &v) {
171  size_t size = get_num_longs();
172  for (size_t i = 0; i < size; i++) {
173  if (ptr[i]) {
174  get_set_bits_long(ptr[i], i, v);
175  ptr[i] = 0;
176  }
177  }
178  assert(v.size() == (size_t) num_set_bits);
179  num_set_bits = 0;
180  return v.size();
181  }
182 
183  /*
184  * This method collects a specified number of bits that have been
185  * set to 1.
186  */
187  template<class T>
188  size_t get_set_bits(size_t begin_idx, size_t end_idx, std::vector<T> &v) const {
189  // For simplicity, begin_index has to referent to the beginning
190  // of a long.
191  assert(begin_idx % NUM_BITS_LONG == 0);
192  if (end_idx == get_num_bits())
193  end_idx = ROUNDUP(end_idx, NUM_BITS_LONG);
194  assert(end_idx % NUM_BITS_LONG == 0);
195  // Find the last long we should visit (excluded).
196  size_t long_end = end_idx / NUM_BITS_LONG;
197  if (long_end > get_num_longs())
198  long_end = get_num_longs();
199  size_t orig_size = v.size();
200  for (size_t i = begin_idx / NUM_BITS_LONG; i < long_end; i++) {
201  if (ptr[i])
202  get_set_bits_long(ptr[i], i, v);
203  }
204  return v.size() - orig_size;
205  }
206 
207  template<class T>
208  size_t get_reset_set_bits(size_t begin_idx, size_t end_idx, std::vector<T> &v) {
209  // For simplicity, begin_index has to referent to the beginning
210  // of a long.
211  assert(begin_idx % NUM_BITS_LONG == 0);
212  if (end_idx == get_num_bits())
213  end_idx = ROUNDUP(end_idx, NUM_BITS_LONG);
214  assert(end_idx % NUM_BITS_LONG == 0);
215  // Find the last long we should visit (excluded).
216  size_t long_end = end_idx / NUM_BITS_LONG;
217  if (long_end > get_num_longs())
218  long_end = get_num_longs();
219  size_t orig_size = v.size();
220  for (size_t i = begin_idx / NUM_BITS_LONG; i < long_end; i++) {
221  if (ptr[i]) {
222  get_set_bits_long(ptr[i], i, v);
223  ptr[i] = 0;
224  }
225  }
226  num_set_bits -= (v.size() - orig_size);
227  assert(num_set_bits >= 0);
228  return v.size() - orig_size;
229  }
230 
231  void copy_to(bitmap &map) const {
232  assert(max_num_bits == map.max_num_bits);
233  map.num_set_bits = num_set_bits;
234  memcpy(map.ptr, ptr, map.get_num_longs() * sizeof(long));
235  }
236 };
237 
238 /*
239  * This is a thread-safe bitmap.
240  * All set/clear operations on the bitmap is atomic. However, users of
241  * the bitmap need to insert memory barrier themselves.
242  */
243 class thread_safe_bitmap
244 {
245  size_t max_num_bits;
246  std::atomic_ulong *ptr;
247 public:
248  thread_safe_bitmap(size_t max_num_bits, int node_id) {
249  this->max_num_bits = max_num_bits;
250  size_t num_longs = ROUNDUP(max_num_bits, NUM_BITS_LONG) / NUM_BITS_LONG;
251  ptr = (std::atomic_ulong *) numa_alloc_onnode(
252  num_longs * sizeof(*ptr), node_id);
253  clear();
254  }
255 
256  ~thread_safe_bitmap() {
257  size_t num_longs = ROUNDUP(max_num_bits, NUM_BITS_LONG) / NUM_BITS_LONG;
258  numa_free(ptr, num_longs * sizeof(*ptr));
259  }
260 
261  size_t get_num_bits() const {
262  return max_num_bits;
263  }
264 
265  void set(size_t idx) {
266  assert(idx < max_num_bits);
267  size_t arr_off = idx / NUM_BITS_LONG;
268  size_t inside_off = idx % NUM_BITS_LONG;
269  // We only want atomicity here.
270  ptr[arr_off].fetch_or(1L << inside_off, std::memory_order_relaxed);
271  }
272 
273  bool get(size_t idx) const {
274  assert(idx < max_num_bits);
275  size_t arr_off = idx / NUM_BITS_LONG;
276  size_t inside_off = idx % NUM_BITS_LONG;
277  return ptr[arr_off].load(std::memory_order_relaxed) & (1L << inside_off);
278  }
279 
280  void clear(size_t idx) {
281  assert(idx < max_num_bits);
282  size_t arr_off = idx / NUM_BITS_LONG;
283  size_t inside_off = idx % NUM_BITS_LONG;
284  ptr[arr_off].fetch_and(~(1L << inside_off), std::memory_order_relaxed);
285  }
286 
287  void clear() {
288  size_t num_longs = ROUNDUP(max_num_bits, NUM_BITS_LONG) / NUM_BITS_LONG;
289  for (size_t i = 0; i < num_longs; i++)
290  new (ptr + i) std::atomic_ulong();
291  }
292 };
293 
294 #endif