FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
FG_vector.h
1 #ifndef __FG_VECTOR_H__
2 #define __FG_VECTOR_H__
3 
4 /*
5  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
6  * Written by Da Zheng (zhengda1936@gmail.com)
7  *
8  * This file is part of FlashGraph.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include <memory>
24 #include <set>
25 #include <fstream>
26 
27 #include "graph_engine.h"
28 #include "stat.h"
29 
30 namespace fg
31 {
32 
40 template<class T>
41 class FG_vector
42 {
43  // TODO I might need to split the vector into partitions.
44  std::vector<T> eles;
45 
46  FG_vector(graph_engine::ptr graph) {
47  eles.resize(graph->get_num_vertices());
48  }
49 
50  FG_vector(size_t size) {
51  eles.resize(size);
52  }
53 
54  public:
55  typedef typename std::shared_ptr<FG_vector<T> > ptr;
65  static ptr create(graph_engine::ptr graph) {
66  return ptr(new FG_vector<T>(graph));
67  }
68 
75  static ptr create(size_t size) {
76  return ptr(new FG_vector<T>(size));
77  }
78 
85  void init(T v) {
86 #pragma omp parallel for
87  for (size_t i = 0; i < eles.size(); i++)
88  eles[i] = v;
89  }
90 
97  void plus_eq(FG_vector<T>::ptr other) {
98  assert(get_size() == other->get_size());
99  for (size_t i = 0; i < get_size(); i++) {
100  eles[i] += other->get(i);
101  }
102  }
103 
109  void assign(size_t num, T val) {
110  eles.assign(num, val);
111  }
112 
118  void shallow_copy(FG_vector<T>::ptr other) {
119  assert(this->get_size() == other->get_size());
120 #pragma omp parallel for
121  for (size_t i = 0; i < get_size(); i++) {
122  this->eles[i] = other->eles[i];
123  }
124  }
125 
126  template<class T1>
127  void copy_to(T1 *arr, size_t size) {
128  size_t num = std::min(size, eles.size());
129  for (size_t i = 0; i < num; i++)
130  arr[i] = eles[i];
131  }
132 
138  // TODO DM: Make parallel / smarter
139  bool eq_all(FG_vector<T>::ptr other) {
140  return std::equal(this->eles.begin(), this->eles.end(), other->eles.begin());
141  }
142 
143  void init_rand(long max = std::numeric_limits<T>::max(),
144  unsigned int seed = 0) {
145  if (seed > 0)
146  srandom(seed);
147  if (max >= std::numeric_limits<T>::max())
148  max = std::numeric_limits<T>::max();
149 #pragma omp parallel for
150  for (size_t i = 0; i < eles.size(); i++)
151  eles[i] = random() % max;
152  }
153 
161  void unique(std::set<T> &set) const {
162  // TODO we need a parallel implementation.
163 
164  assert(set.empty()); // FIXME: `new` a shared/unique ptr & remove param
165  BOOST_FOREACH(T v, eles) {
166  set.insert(v);
167  }
168  }
169 
177  void count_unique(count_map<T> &map) const {
178  // TODO we need a parallel implementation.
179 
180  assert(map.get_size() == 0); // FIXME: `new` a shared/unique ptr & remove param
181  BOOST_FOREACH(T v, eles) {
182  map.add(v);
183  }
184  }
185 
191  size_t get_size() const {
192  return eles.size();
193  }
194 
201  T *get_data() {
202  return eles.data();
203  }
204 
212  const T*get_data() const {
213  return eles.data();
214  }
215 
223  T dot_product(const FG_vector<T> &other) const {
224  assert(this->get_size() == other.get_size());
225  T ret = 0;
226  for (size_t i = 0; i < get_size(); i++)
227  ret += get(i) * other.get(i);
228  return ret;
229  }
230 
239  T norm2() const {
240  T ret = 0;
241  for (size_t i = 0; i < get_size(); i++)
242  ret += get(i) * get(i);
243  return sqrt(ret);
244  }
245 
254  T norm1() const {
255  T ret = 0;
256  for (size_t i = 0; i < get_size(); i++)
257  ret += fabs(get(i));
258  return ret;
259  }
260 
267  T sum() const {
268  return sum<T>();
269  }
270 
278  template<class ResType>
279  ResType sum() const {
280  struct identity_func {
281  ResType operator()(T v) {
282  return v;
283  }
284  };
285  return aggregate<identity_func, ResType>(identity_func());
286  }
287 
288  template<class Func, class ResType>
289  ResType aggregate(Func func) const {
290  ResType ret = 0;
291  for (size_t i = 0; i < get_size(); i++)
292  ret += func(eles[i]);
293  return ret;
294  }
295 
300  T max() const {
301  return max_val_loc().first;
302  }
303 
310  std::pair<T, off_t> max_val_loc() const {
311  T ret = std::numeric_limits<T>::min();
312  off_t idx = 0;
313  for (size_t i = 0; i < get_size(); i++) {
314  if (ret < get(i)) {
315  ret = get(i);
316  idx = i;
317  }
318  }
319  return std::pair<T, off_t>(ret, idx);
320  }
321 
322  void max_val_locs(size_t num, std::vector<std::pair<T, off_t> > &pairs) const {
323  typedef std::pair<T, off_t> val_loc_t;
324  struct comp_val {
325  bool operator()(const val_loc_t &v1, const val_loc_t &v2) {
326  return v1.first > v2.first;
327  }
328  };
329  std::priority_queue<val_loc_t, std::vector<val_loc_t>, comp_val> queue;
330  for (size_t i = 0; i < get_size(); i++) {
331  T val = get(i);
332  queue.push(val_loc_t(val, i));
333  if (queue.size() > num)
334  queue.pop();
335  }
336  while (!queue.empty()) {
337  val_loc_t pair = queue.top();
338  queue.pop();
339  pairs.push_back(pair);
340  }
341  }
342 
348  T min() const {
349  T ret = std::numeric_limits<T>::max();
350  for (size_t i = 0; i < get_size(); i++)
351  ret = std::min(get(i), ret);
352  return ret;
353  }
354 
360  size_t argmin() {
361  typename std::vector<T>::iterator res = std::min_element(eles.begin(), eles.end());
362  size_t ret = std::distance(eles.begin(), res);
363  return ret;
364  }
365 
370  void print(vsize_t max_print_size=100) {
371  vsize_t print_len = get_size() > max_print_size ? max_print_size : get_size();
372 
373  std::cout << "[";
374  for (vsize_t i=0; i < print_len; i++) {
375  std::cout << " " << get(i);
376  }
377 
378  if (print_len == max_print_size && print_len != get_size()) {
379  std::cout << "...";
380  }
381  std::cout << " ]\n\n";
382  }
383 
388  void to_file(std::string fn) {
389  std::ofstream f;
390  f.open(fn);
391  for (vsize_t i=0; i < get_size(); i++) {
392  f << get(i) << " ";
393  }
394  f.close();
395  }
396 
397  void neg_in_place() {
398  for (size_t i = 0; i < get_size(); i++)
399  eles[i] = -eles[i];
400  }
401 
407  void div_by_in_place(T v) {
408 #pragma omp parallel for
409  for (size_t i = 0; i < get_size(); i++)
410  eles[i] /= v;
411  }
412 
420  template<class MergeFunc, class VecType>
421  void merge_in_place(typename FG_vector<VecType>::ptr vec, MergeFunc func) {
422  assert(this->get_size() == vec->get_size());
423 #pragma omp parallel for
424  for (size_t i = 0; i < get_size(); i++)
425  eles[i] = func(eles[i], vec->get(i));
426  }
427 
433  template<class T2>
434  void add_in_place(typename FG_vector<T2>::ptr vec) {
435  struct add_func {
436  T operator()(const T &v1, const T2 &v2) {
437  return v1 + v2;
438  }
439  };
440  merge_in_place<add_func, T2>(vec, add_func());
441  }
442 
448  template<class T2>
449  void subtract_in_place(typename FG_vector<T2>::ptr &vec) {
450  struct sub_func {
451  T operator()(const T &v1, const T2 &v2) {
452  return v1 - v2;
453  }
454  };
455  merge_in_place<sub_func, T2>(vec, sub_func());
456  }
457 
458  template<class T2>
459  void multiply_in_place(T2 v) {
460  for (size_t i = 0; i < get_size(); i++)
461  eles[i] *= v;
462  }
463 
464  template<class IN_TYPE, class OUT_TYPE>
465  typename FG_vector<OUT_TYPE>::ptr multiply(IN_TYPE v) const {
466  typename FG_vector<OUT_TYPE>::ptr ret = FG_vector<OUT_TYPE>::create(get_size());
467  for (size_t i = 0; i < get_size(); i++)
468  ret->set(i, this->eles[i] * v);
469  return ret;
470  }
471 
472  template<class IN_TYPE, class OUT_TYPE>
473  typename FG_vector<OUT_TYPE>::ptr multiply(typename FG_vector<IN_TYPE>::ptr vec) const {
474  if (vec->get_size() != this->get_size())
475  return typename FG_vector<OUT_TYPE>::ptr();
476 
477  typename FG_vector<OUT_TYPE>::ptr ret = FG_vector<OUT_TYPE>::create(get_size());
478  for (size_t i = 0; i < get_size(); i++)
479  ret->eles[i] = this->eles[i] * vec->get(i);
480  return ret;
481  }
482 
487  void normalize(int type) {
488  T norm;
489  switch(type) {
490  case 2:
491  norm = norm2();
492  break;
493  case 1:
494  norm = norm1();
495  break;
496  default:
497  ABORT_MSG("normalize on wrong type");
498  }
499  div_by_in_place(norm);
500  }
501 
510  template<class ApplyFunc>
511  void apply(ApplyFunc func, FG_vector<T> &output) {
512 #pragma omp parallel for
513  for (size_t i = 0; i < get_size(); i++)
514  output.set(i, func(eles[i]));
515  }
516 
517  // TODO these interfaces assume shared memory.
518 
526  void set(vertex_id_t id, const T &v) {
527  eles[id] = v;
528  }
529 
536  const T &get(vertex_id_t id) const {
537  return eles[id];
538  }
539 
546  T &get(vertex_id_t id) {
547  return eles[id];
548  }
549 
550  log_histogram log_hist(int power) const {
551  T max_v = max();
552  int num_buckets = ceil(log(max_v) / log(power));
553  log_histogram hist(std::max(num_buckets, 1));
554  for (size_t i = 0; i < get_size(); i++) {
555  hist.add_value(eles[i]);
556  }
557  return hist;
558  }
559 };
560 
568  template<class T, class ApplyFunc>
569 void multi_vec_apply(const std::vector<typename FG_vector<T>::ptr> &inputs,
570  typename FG_vector<T>::ptr output, ApplyFunc apply)
571 {
572  for (size_t i = 0; i < inputs.size(); i++)
573  assert(output->get_size() == inputs[i]->get_size());
574 #pragma omp parallel for
575  for (size_t i = 0; i < output->get_size(); i++)
576  output->set(i, apply(i, inputs));
577 }
578 
579 }
580 
581 #endif
T norm1() const
Compute the L1 Norm#Taxicab_norm_or_Manhattan_norm) (also Taxicab norm) of an FG_vector. parallel
Definition: FG_vector.h:254
const T * get_data() const
Const method to get a pointer to the memory array used internally by the vector to store its owned el...
Definition: FG_vector.h:212
void shallow_copy(FG_vector< T >::ptr other)
Make a shallow copy of the vector.
Definition: FG_vector.h:118
unsigned int vsize_t
Basic data types used in FlashGraph.
Definition: FG_basic_types.h:33
void assign(size_t num, T val)
Assign a value num many times to the vector.
Definition: FG_vector.h:109
void to_file(std::string fn)
Write the space separated vector to file.
Definition: FG_vector.h:388
FlashGraph vector that provides several parallelized methods when compared to an STL-vector. NOTE: Not an STL-compatible data structure. This vector is also ideally used with numeric data types. Methods marked with the keyword parallel are parallelized implementations.
Definition: FG_vector.h:41
std::pair< T, off_t > max_val_loc() const
Find the maximal value in the vector and return its value and its location.
Definition: FG_vector.h:310
T dot_product(const FG_vector< T > &other) const
Compute the dot product of two FG vectors. parallel
Definition: FG_vector.h:223
ResType sum() const
Compute the sum of all elements in the vector. This sum() allows users to specify the type of the r...
Definition: FG_vector.h:279
T norm2() const
Compute the L2 Norm#Euclidean_norm) (also know as Euclidean distance) of a vector. parallel
Definition: FG_vector.h:239
void print(vsize_t max_print_size=100)
Serial element-wise print of the vector. Not intended for very large vectors
Definition: FG_vector.h:370
void set(vertex_id_t id, const T &v)
Definition: FG_vector.h:526
T min() const
Find the index with the minmimal value in the vector and return its value.
Definition: FG_vector.h:348
T max() const
Find the maximal value in the vector and return its value.
Definition: FG_vector.h:300
size_t get_size() const
Get the number of elements contained in the vector.
Definition: FG_vector.h:191
void plus_eq(FG_vector< T >::ptr other)
Equivalent to += operator. Element by element addition of one FG_vector to another.
Definition: FG_vector.h:97
void apply(ApplyFunc func, FG_vector< T > &output)
Apply a function to every element in an FG_vector.
Definition: FG_vector.h:511
void merge_in_place(typename FG_vector< VecType >::ptr vec, MergeFunc func)
element-wise merge with another vector and store the result in this vector.
Definition: FG_vector.h:421
void subtract_in_place(typename FG_vector< T2 >::ptr &vec)
In place subtraction of the vector by another vector.
Definition: FG_vector.h:449
void count_unique(count_map< T > &map) const
Count the number of unique items in the vector using a count map.
Definition: FG_vector.h:177
T * get_data()
Get a pointer to the memory array used internally by the vector to store its owned elements...
Definition: FG_vector.h:201
void normalize(int type)
Normalize vector using an Lx form. parallel
Definition: FG_vector.h:487
static ptr create(size_t size)
Create a vector of the specified length. An object of this class should be created using this or the ...
Definition: FG_vector.h:75
void add_in_place(typename FG_vector< T2 >::ptr vec)
In place element-wise addition by another vector.
Definition: FG_vector.h:434
void init(T v)
Initialize the vector a single value as specified by parameter 1.
Definition: FG_vector.h:85
T sum() const
Compute the sum of all elements in the vector. If the type is integer, the sum can overflow...
Definition: FG_vector.h:267
const T & get(vertex_id_t id) const
Const get the value of a particular index.
Definition: FG_vector.h:536
size_t argmin()
Find the index with the minimal value in the vector and return the index.
Definition: FG_vector.h:360
void div_by_in_place(T v)
In place division of vector by a single value.
Definition: FG_vector.h:407
void unique(std::set< T > &set) const
Populate an STL set with the unique elements in the vector. All duplicates are ignored.
Definition: FG_vector.h:161
bool eq_all(FG_vector< T >::ptr other)
Check for equality between two FG_vectors element by element.
Definition: FG_vector.h:139
static ptr create(graph_engine::ptr graph)
Create a vector of the length the same as the number of vertices in the graph. An object of this clas...
Definition: FG_vector.h:65
void multi_vec_apply(const std::vector< typename FG_vector< T >::ptr > &inputs, typename FG_vector< T >::ptr output, ApplyFunc apply)
Apply a user defined function to multipl FG_vectors. parallel
Definition: FG_vector.h:569