FlashGraph-ng
A new frontier in large-scale graph analysis and data mining
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Pages
generic_hashtable.h
1 #ifndef __GENERIC_HASHTABLE_H__
2 #define __GENERIC_HASHTABLE_H__
3 
4 /*
5  * Copyright 2014 Open Connectome Project (http://openconnecto.me)
6  * Written by Da Zheng (zhengda1936@gmail.com)
7  *
8  * This file is part of FlashMatrix.
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  */
22 
23 #include<unordered_map>
24 
25 #include "data_frame.h"
26 #include "mem_vec_store.h"
27 
28 namespace fm
29 {
30 
31 class agg_operate;
32 
33 class generic_hashtable
34 {
35 public:
36  typedef std::shared_ptr<generic_hashtable> ptr;
37 
38  /*
39  * This method inserts an array of keys and values.
40  * If a key exists, merge the old value and the new value with
41  * the provided operator.
42  */
43  virtual void insert(size_t num, const void *keys, const void *vals,
44  const agg_operate &op) = 0;
45  /*
46  * Merge the input hashtable to this table.
47  * If a key exists in the original table, merge the corresponding values
48  * with the provided operator.
49  */
50  virtual void merge(const generic_hashtable &table, const agg_operate &op) = 0;
51  /*
52  * Convert the hashtable to a data frame.
53  */
54  virtual data_frame::ptr conv2df() const = 0;
55 };
56 
57 template<class KeyType, int ValSize>
58 class generic_hashtable_impl: public generic_hashtable
59 {
60  const scalar_type &real_val_type;
61 
62  /*
63  * This isn't the real value type of the hashtable, but it has the same
64  * size as the real value, so we can use it to create a C++ hashtable.
65  */
66  typedef struct {
67  char data[ValSize];
68  } ValType;
69  std::unordered_map<KeyType, ValType> table;
70 public:
71  generic_hashtable_impl(const scalar_type &type): real_val_type(type) {
72  }
73 
74  void insert(size_t num, const void *pkeys, const void *pvals,
75  const agg_operate &op) {
76  const KeyType *keys = (const KeyType *) pkeys;
77  const ValType *vals = (const ValType *) pvals;
78  assert(op.has_combine());
79  for (size_t i = 0; i < num; i++) {
80  auto ret = table.insert(std::pair<KeyType, ValType>(keys[i],
81  vals[i]));
82  // TODO this is going to be slow.
83  if (!ret.second)
84  op.runCombine(1, &vals[i], &ret.first->second, &ret.first->second);
85  }
86  }
87 
88  virtual void merge(const generic_hashtable &gtable, const agg_operate &op) {
89  const generic_hashtable_impl<KeyType, ValSize> &gtable1
90  = dynamic_cast<const generic_hashtable_impl<KeyType, ValSize> &>(
91  gtable);
92  assert(op.has_combine());
93  for (auto it = gtable1.table.begin(); it != gtable1.table.end(); it++) {
94  auto ret = table.insert(std::pair<KeyType, ValType>(it->first,
95  it->second));
96  if (!ret.second)
97  op.runCombine(1, &it->second, &ret.first->second,
98  &ret.first->second);
99  }
100  }
101 
102  virtual data_frame::ptr conv2df() const {
103  detail::smp_vec_store::ptr keys = detail::smp_vec_store::create(table.size(),
104  get_scalar_type<KeyType>());
105  detail::smp_vec_store::ptr vals = detail::smp_vec_store::create(table.size(),
106  real_val_type);
107  size_t vec_idx = 0;
108  for (auto it = table.begin(); it != table.end(); it++) {
109  keys->set<KeyType>(vec_idx, it->first);
110  vals->set<ValType>(vec_idx, it->second);
111  vec_idx++;
112  }
113  data_frame::ptr ret = data_frame::create();
114  ret->add_vec("key", keys);
115  ret->add_vec("val", vals);
116  return ret;
117  }
118 };
119 
120 }
121 
122 #endif