FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
bulk_operate.h
1 #ifndef __BULK_OPERATE_H__
2 #define __BULK_OPERATE_H__
3 
4 /*
5  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
6  * Written by Da Zheng (zhengda1936@gmail.com)
7  *
8  * This file is part of FlashMatrix.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include <assert.h>
24 
25 #include <memory>
26 #include <vector>
27 #include <cmath>
28 
29 #include "generic_type.h"
30 
31 namespace fm
32 {
33 
40 {
41  struct empty_deleter {
42  void operator()(const bulk_uoperate *addr) {
43  }
44  };
45 public:
46  typedef std::shared_ptr<const bulk_uoperate> const_ptr;
47 
48  static const_ptr conv2ptr(const bulk_uoperate &op) {
49  return const_ptr(&op, empty_deleter());
50  }
51 
52  virtual void runA(size_t num_eles, const void *in_arr,
53  void *out_arr) const = 0;
54  virtual const scalar_type &get_input_type() const = 0;
55  virtual const scalar_type &get_output_type() const = 0;
56 
57  size_t input_entry_size() const {
58  return get_input_type().get_size();
59  }
60 
61  size_t output_entry_size() const {
62  return get_output_type().get_size();
63  }
64 };
65 
66 /*
67  * This template implements the interface of a bulk unary operator.
68  */
69 template<class OpType, class InType, class OutType>
70 class bulk_uoperate_impl: public bulk_uoperate
71 {
72  OpType op;
73 public:
74  bulk_uoperate_impl() {
75  }
76 
77  bulk_uoperate_impl(const OpType &_op): op(_op) {
78  }
79 
80  virtual void runA(size_t num_eles, const void *in_arr1,
81  void *output_arr1) const {
82  const InType *in_arr = (const InType *) in_arr1;
83  OutType *output_arr = (OutType *) output_arr1;
84  for (size_t i = 0; i < num_eles; i++)
85  output_arr[i] = op(in_arr[i]);
86  }
87 
88  virtual const scalar_type &get_input_type() const;
89  virtual const scalar_type &get_output_type() const;
90 };
91 
100 {
101  struct empty_deleter {
102  void operator()(const bulk_operate *addr) {
103  }
104  };
105 public:
106  typedef std::shared_ptr<const bulk_operate> const_ptr;
107 
108  static const_ptr conv2ptr(const bulk_operate &op) {
109  return const_ptr(&op, empty_deleter());
110  }
111 
112  /*
113  * This performs element-wise operation on two input arrays, and stores
114  * the result on the output array.
115  */
116  virtual void runAA(size_t num_eles, const void *left_arr,
117  const void *right_arr, void *output_arr) const = 0;
118  /*
119  * This performs operations on the left input array and the right element,
120  * and stores the result on the output array.
121  */
122  virtual void runAE(size_t num_eles, const void *left_arr,
123  const void *right, void *output_arr) const = 0;
124  /*
125  * This performs operations on the left element array and the right array,
126  * and stores the result on the output array.
127  */
128  virtual void runEA(size_t num_eles, const void *left,
129  const void *right_arr, void *output_arr) const = 0;
130 
131  /*
132  * This performs aggregation on the input array, combines the agg result
133  * with the original agg result and stores the result on output.
134  */
135  virtual void runAgg(size_t num_eles, const void *left_arr, const void *orig,
136  void *output) const = 0;
137 
138  virtual const scalar_type &get_left_type() const = 0;
139  virtual const scalar_type &get_right_type() const = 0;
140  virtual const scalar_type &get_output_type() const = 0;
141 
142  size_t left_entry_size() const {
143  return get_left_type().get_size();
144  }
145 
146  size_t right_entry_size() const {
147  return get_right_type().get_size();
148  }
149 
150  size_t output_entry_size() const {
151  return get_output_type().get_size();
152  }
153 };
154 
155 /*
156  * This template implements the interface of the bulk binary operator.
157  */
158 template<class OpType, class LeftType, class RightType, class ResType>
159 class bulk_operate_impl: public bulk_operate
160 {
161 public:
162  virtual void runAA(size_t num_eles, const void *left_arr1,
163  const void *right_arr1, void *output_arr1) const {
164  const LeftType *left_arr = (const LeftType *) left_arr1;
165  const RightType *right_arr = (const RightType *) right_arr1;
166  ResType *output_arr = (ResType *) output_arr1;
167  OpType op;
168  for (size_t i = 0; i < num_eles; i++)
169  output_arr[i] = op(left_arr[i], right_arr[i]);
170  }
171 
172  virtual void runAE(size_t num_eles, const void *left_arr1,
173  const void *right, void *output_arr1) const {
174  const LeftType *left_arr = (const LeftType *) left_arr1;
175  ResType *output_arr = (ResType *) output_arr1;
176  RightType entry = *(const RightType *) right;
177  OpType op;
178  for (size_t i = 0; i < num_eles; i++)
179  output_arr[i] = op(left_arr[i], entry);
180  }
181 
182  virtual void runEA(size_t num_eles, const void *left,
183  const void *right_arr1, void *output_arr1) const {
184  LeftType entry = *(const LeftType *) left;
185  const RightType *right_arr = (const RightType *) right_arr1;
186  ResType *output_arr = (ResType *) output_arr1;
187  OpType op;
188  for (size_t i = 0; i < num_eles; i++)
189  output_arr[i] = op(entry, right_arr[i]);
190  }
191 
192  virtual void runAgg(size_t num_eles, const void *left_arr1,
193  const void *orig, void *output) const {
194  const LeftType *left_arr = (const LeftType *) left_arr1;
195  if (num_eles == 0)
196  return;
197 
198  size_t i;
199  ResType res;
200  if (orig) {
201  i = 0;
202  res = *(const ResType *) orig;
203  }
204  else {
205  i = 1;
206  res = left_arr[0];
207  }
208  OpType op;
209  for (; i < num_eles; i++)
210  res = op(left_arr[i], res);
211  *(ResType *) output = res;
212  }
213 
214  virtual const scalar_type &get_left_type() const;
215  virtual const scalar_type &get_right_type() const;
216  virtual const scalar_type &get_output_type() const;
217 };
218 
219 /*
220  * This interface defines a collection of basic unary operators.
221  */
222 class basic_uops
223 {
224 public:
225  typedef std::shared_ptr<basic_uops> ptr;
226 
227  enum op_idx {
228  NEG,
229  SQRT,
230  ABS,
231  NOT,
232  SQ,
233  CEIL,
234  FLOOR,
235  ROUND,
236  LOG,
237  LOG2,
238  LOG10,
239  NUM_OPS,
240  };
241 
242  virtual const bulk_uoperate *get_op(op_idx idx) const = 0;
243 };
244 
245 /*
246  * This template implements all basic binary operators for different types.
247  */
248 template<class InType, class OutType>
249 class basic_uops_impl: public basic_uops
250 {
251  struct uop_neg {
252  OutType operator()(const InType &e) const {
253  return -e;
254  }
255  };
256 
257  struct uop_sqrt {
258  double operator()(const InType &e) const {
259  return std::sqrt(e);
260  }
261  };
262 
263  struct uop_abs {
264  OutType operator()(const InType &e) const {
265  return (OutType) std::abs(e);
266  }
267  };
268 
269  struct uop_not {
270  bool operator()(const bool &e) const {
271  return !e;
272  }
273  };
274 
275  struct sq {
276  OutType operator()(const InType &e) const {
277  return e * e;
278  }
279  };
280 
281  struct ceil {
282  OutType operator()(const InType &e) const {
283  return std::ceil(e);
284  }
285  };
286 
287  struct floor {
288  OutType operator()(const InType &e) const {
289  return std::floor(e);
290  }
291  };
292 
293  struct round {
294  OutType operator()(const InType &e) const {
295  return std::round(e);
296  }
297  };
298 
299  struct log {
300  OutType operator()(const InType &e) const {
301  return std::log(e);
302  }
303  };
304 
305  struct log2 {
306  OutType operator()(const InType &e) const {
307  return std::log2(e);
308  }
309  };
310 
311  struct log10 {
312  OutType operator()(const InType &e) const {
313  return std::log10(e);
314  }
315  };
316 
317  bulk_uoperate_impl<uop_neg, InType, OutType> neg_op;
318  bulk_uoperate_impl<uop_sqrt, InType, double> sqrt_op;
319  bulk_uoperate_impl<uop_abs, InType, OutType> abs_op;
320  bulk_uoperate_impl<uop_not, bool, bool> not_op;
321  bulk_uoperate_impl<sq, InType, OutType> sq_op;
322  bulk_uoperate_impl<ceil, InType, OutType> ceil_op;
323  bulk_uoperate_impl<floor, InType, OutType> floor_op;
324  bulk_uoperate_impl<round, InType, OutType> round_op;
325  bulk_uoperate_impl<log, InType, OutType> log_op;
326  bulk_uoperate_impl<log2, InType, OutType> log2_op;
327  bulk_uoperate_impl<log10, InType, OutType> log10_op;
328 
329  std::vector<bulk_uoperate *> ops;
330 public:
331  basic_uops_impl() {
332  ops.push_back(&neg_op);
333  ops.push_back(&sqrt_op);
334  ops.push_back(&abs_op);
335  ops.push_back(&not_op);
336  ops.push_back(&sq_op);
337  ops.push_back(&ceil_op);
338  ops.push_back(&floor_op);
339  ops.push_back(&round_op);
340  ops.push_back(&log_op);
341  ops.push_back(&log2_op);
342  ops.push_back(&log10_op);
343  }
344 
345  virtual const bulk_uoperate *get_op(op_idx idx) const {
346  if (idx >= op_idx::NUM_OPS)
347  return NULL;
348  return ops[idx];
349  }
350 };
351 
352 /*
353  * This interface defines a collection of basic binary operators.
354  */
355 class basic_ops
356 {
357 public:
358  typedef std::shared_ptr<basic_ops> ptr;
359 
360  enum op_idx {
361  ADD,
362  SUB,
363  MUL,
364  DIV,
365  MIN,
366  MAX,
367  POW,
368  EQ,
369  GT,
370  GE,
371  LT,
372  LE,
373  NUM_OPS,
374  };
375 
376  virtual const bulk_operate *get_op(op_idx idx) const = 0;
377  virtual const bulk_operate &get_add() const {
378  return *get_op(ADD);
379  }
380 
381  virtual const bulk_operate &get_sub() const {
382  return *get_op(SUB);
383  }
384 
385  virtual const bulk_operate &get_multiply() const {
386  return *get_op(MUL);
387  }
388 
389  virtual const bulk_operate &get_divide() const {
390  return *get_op(DIV);
391  }
392 };
393 
394 template<class LeftType1, class RightType1, class ResType1>
395 struct multiply
396 {
397  ResType1 operator()(const LeftType1 &e1, const RightType1 &e2) const {
398  return e1 * e2;
399  }
400 };
401 
402 template<>
403 struct multiply<double, double, double>
404 {
405  double operator()(const double &e1, const double &e2) const {
406  long double first = e1;
407  long double second = e2;
408  return first * second;
409  }
410 };
411 
412 /*
413  * This template implements all basic binary operators for different types.
414  */
415 template<class LeftType, class RightType, class ResType>
416 class basic_ops_impl: public basic_ops
417 {
418  struct add {
419  ResType operator()(const LeftType &e1, const RightType &e2) const {
420  return e1 + e2;
421  }
422  };
423 
424  struct sub {
425  ResType operator()(const LeftType &e1, const RightType &e2) const {
426  return e1 - e2;
427  }
428  };
429 
430  // Division is special. Its output should be float point.
431  // Therefore, we convert both input values to float point.
432  struct divide {
433  double operator()(const LeftType &e1, const RightType &e2) const {
434  double d1 = e1;
435  double d2 = e2;
436  return d1 / d2;
437  }
438  };
439 
440  struct min {
441  ResType operator()(const LeftType &e1, const RightType &e2) const {
442  if (e1 < e2)
443  return e1;
444  else
445  return e2;
446  }
447  };
448 
449  struct max {
450  ResType operator()(const LeftType &e1, const RightType &e2) const {
451  if (e1 > e2)
452  return e1;
453  else
454  return e2;
455  }
456  };
457 
458  struct pow {
459  ResType operator()(const LeftType &e1, const RightType &e2) const {
460  return (ResType) std::pow(e1, e2);
461  }
462  };
463 
464  struct eq {
465  bool operator()(const LeftType &e1, const RightType &e2) const {
466  return e1 == e2;
467  }
468  };
469 
470  struct gt {
471  bool operator()(const LeftType &e1, const RightType &e2) const {
472  return e1 > e2;
473  }
474  };
475 
476  struct ge {
477  bool operator()(const LeftType &e1, const RightType &e2) const {
478  return e1 >= e2;
479  }
480  };
481 
482  struct lt {
483  bool operator()(const LeftType &e1, const RightType &e2) const {
484  return e1 < e2;
485  }
486  };
487 
488  struct le {
489  bool operator()(const LeftType &e1, const RightType &e2) const {
490  return e1 <= e2;
491  }
492  };
493 
494  bulk_operate_impl<add, LeftType, RightType, ResType> add_op;
495  bulk_operate_impl<sub, LeftType, RightType, ResType> sub_op;
496  bulk_operate_impl<multiply<LeftType, RightType, ResType>,
497  LeftType, RightType, ResType> mul_op;
498  bulk_operate_impl<divide, LeftType, RightType, double> div_op;
499  bulk_operate_impl<min, LeftType, RightType, ResType> min_op;
500  bulk_operate_impl<max, LeftType, RightType, ResType> max_op;
501  bulk_operate_impl<pow, LeftType, RightType, ResType> pow_op;
502  bulk_operate_impl<eq, LeftType, RightType, bool> eq_op;
503  bulk_operate_impl<gt, LeftType, RightType, bool> gt_op;
504  bulk_operate_impl<ge, LeftType, RightType, bool> ge_op;
505  bulk_operate_impl<lt, LeftType, RightType, bool> lt_op;
506  bulk_operate_impl<le, LeftType, RightType, bool> le_op;
507 
508  std::vector<bulk_operate *> ops;
509 public:
510  basic_ops_impl() {
511  ops.resize(NUM_OPS);
512  ops[ADD] = &add_op;
513  ops[SUB] = &sub_op;
514  ops[MUL] = &mul_op;
515  ops[DIV] = &div_op;
516  ops[MIN] = &min_op;
517  ops[MAX] = &max_op;
518  ops[POW] = &pow_op;
519  ops[EQ] = &eq_op;
520  ops[GT] = &gt_op;
521  ops[GE] = &ge_op;
522  ops[LT] = &lt_op;
523  ops[LE] = &le_op;
524  }
525 
526  virtual const bulk_operate *get_op(op_idx idx) const {
527  if (idx >= op_idx::NUM_OPS)
528  return NULL;
529  return ops[idx];
530  }
531 };
532 
533 /*
534  * This operate is used to set values on a matrix.
535  */
536 class set_operate
537 {
538 public:
539  virtual void set(void *arr, size_t num_eles, off_t row_idx,
540  off_t col_idx) const = 0;
541  virtual const scalar_type &get_type() const = 0;
542 };
543 
544 template<class T>
545 class type_set_operate: public set_operate
546 {
547  virtual void set(void *arr, size_t num_eles, off_t row_idx,
548  off_t col_idx) const {
549  set((T *) arr, num_eles, row_idx, col_idx);
550  }
551 public:
552  virtual void set(T *arr, size_t num_eles, off_t row_idx,
553  off_t col_idx) const = 0;
554 
555  virtual const scalar_type &get_type() const {
556  return get_scalar_type<T>();
557  }
558 };
559 
560 template<class T>
561 class const_set_operate: public type_set_operate<T>
562 {
563  T val;
564 public:
565  const_set_operate(T val) {
566  this->val = val;
567  }
568 
569  virtual void set(T *arr, size_t num_eles, off_t row_idx,
570  off_t col_idx) const {
571  for (size_t i = 0; i < num_eles; i++)
572  arr[i] = val;
573  }
574 };
575 
576 /*
577  * This operate set values in a vector.
578  */
579 
580 class set_vec_operate
581 {
582 public:
583  typedef std::shared_ptr<const set_vec_operate> const_ptr;
584 
585  virtual void set(void *arr, size_t num_eles, off_t start_idx) const = 0;
586  virtual const scalar_type &get_type() const = 0;
587 };
588 
589 template<class T>
590 class type_set_vec_operate: public set_vec_operate
591 {
592  virtual void set(void *arr, size_t num_eles, off_t start_idx) const {
593  set((T *) arr, num_eles, start_idx);
594  }
595 public:
596  virtual void set(T *arr, size_t num_eles, off_t start_idx) const = 0;
597  virtual const scalar_type &get_type() const {
598  return get_scalar_type<T>();
599  }
600 };
601 
602 template<class T>
603 class const_set_vec_operate: public type_set_vec_operate<T>
604 {
605  T val;
606 public:
607  const_set_vec_operate(T val) {
608  this->val = val;
609  }
610  virtual void set(T *arr, size_t num_eles, off_t start_idx) const {
611  for (size_t i = 0; i < num_eles; i++)
612  arr[i] = val;
613  }
614 };
615 
616 class set_vv_operate
617 {
618 public:
619  typedef std::shared_ptr<const set_vv_operate> const_ptr;
620 
621  virtual void set(off_t arr_idx, void *arr, size_t num_eles) const = 0;
622  virtual const scalar_type &get_type() const = 0;
623 };
624 
625 template<class OpType, class InType, class OutType>
626 const scalar_type &bulk_uoperate_impl<OpType, InType, OutType>::get_input_type() const
627 {
628  return get_scalar_type<InType>();
629 }
630 
631 template<class OpType, class InType, class OutType>
632 const scalar_type &bulk_uoperate_impl<OpType, InType, OutType>::get_output_type() const
633 {
634  return get_scalar_type<OutType>();
635 }
636 
637 template<class OpType, class LeftType, class RightType, class ResType>
638 const scalar_type &bulk_operate_impl<OpType, LeftType, RightType, ResType>::get_left_type() const
639 {
640  return get_scalar_type<LeftType>();
641 }
642 
643 template<class OpType, class LeftType, class RightType, class ResType>
644 const scalar_type &bulk_operate_impl<OpType, LeftType, RightType, ResType>::get_right_type() const
645 {
646  return get_scalar_type<RightType>();
647 }
648 
649 template<class OpType, class LeftType, class RightType, class ResType>
650 const scalar_type &bulk_operate_impl<OpType, LeftType, RightType, ResType>::get_output_type() const
651 {
652  return get_scalar_type<ResType>();
653 }
654 
655 class local_vec_store;
656 
657 /*
658  * This operator is different from bulk_uoperate. It treats an array
659  * as a single input and outputs an array of potentially different length.
660  */
661 class arr_apply_operate
662 {
663  size_t num_out_eles;
664 public:
665  typedef std::shared_ptr<const arr_apply_operate> const_ptr;
666 
667  arr_apply_operate(size_t num_out_eles) {
668  this->num_out_eles = num_out_eles;
669  }
670  /*
671  * This virtual method accepts an input array and stores the result
672  * in an output array.
673  */
674  virtual void run(const local_vec_store &in,
675  local_vec_store &out) const = 0;
676 
677  virtual const scalar_type &get_input_type() const = 0;
678  virtual const scalar_type &get_output_type() const = 0;
679 
680  size_t input_entry_size() const {
681  return get_input_type().get_size();
682  }
683 
684  size_t output_entry_size() const {
685  return get_output_type().get_size();
686  }
687 
688  size_t get_num_out_eles() const {
689  return num_out_eles;
690  }
691 };
692 
693 /*
694  * This operator applies to an entry generated by groupby.
695  * It takes a key and a value as input and the type of the value varies.
696  * TODO let's not worry about the output yet.
697  */
698 template<class T>
699 class gr_apply_operate
700 {
701 public:
702  virtual void run(const void *key, const T &val,
703  local_vec_store &vec) const = 0;
704 
705  virtual const scalar_type &get_key_type() const = 0;
706  virtual const scalar_type &get_output_type() const = 0;
707  // This method tells how many elements output for a key. If it's unknown
708  // or isn't fixed, it should return 0.
709  virtual size_t get_num_out_eles() const = 0;
710 };
711 
712 class scatter_gather
713 {
714 public:
715  virtual void scatter(const char *arr, std::vector<char *> &arrs) const = 0;
716  virtual void gather(const std::vector<const char *> &arrs,
717  char *arr) const = 0;
718 };
719 
720 template<class T>
721 class type_scatter_gather: public scatter_gather
722 {
723 public:
724  virtual void scatter(const char *arr, std::vector<char *> &arrs) const {
725  const T *t_arr = (const T *) arr;
726  for (size_t i = 0; i < arrs.size(); i++)
727  *(T *) arrs[i] = t_arr[i];
728  }
729 
730  virtual void gather(const std::vector<const char *> &arrs,
731  char *arr) const {
732  T *t_arr = (T *) arr;
733  for (size_t i = 0; i < arrs.size(); i++)
734  t_arr[i] = *(const T *) arrs[i];
735  }
736 };
737 
738 }
739 
740 #endif
Definition: bulk_operate.h:99
Definition: bulk_operate.h:39
Definition: generic_type.h:138