dense_hash_map.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. // Copyright (c) 2005, Google Inc.
  2. // All rights reserved.
  3. //
  4. // Redistribution and use in source and binary forms, with or without
  5. // modification, are permitted provided that the following conditions are
  6. // met:
  7. //
  8. // * Redistributions of source code must retain the above copyright
  9. // notice, this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above
  11. // copyright notice, this list of conditions and the following disclaimer
  12. // in the documentation and/or other materials provided with the
  13. // distribution.
  14. // * Neither the name of Google Inc. nor the names of its
  15. // contributors may be used to endorse or promote products derived from
  16. // this software without specific prior written permission.
  17. //
  18. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. // ----
  30. //
  31. // This is just a very thin wrapper over densehashtable.h, just
  32. // like sgi stl's stl_hash_map is a very thin wrapper over
  33. // stl_hashtable. The major thing we define is operator[], because
  34. // we have a concept of a data_type which stl_hashtable doesn't
  35. // (it only has a key and a value).
  36. //
  37. // NOTE: this is exactly like sparse_hash_map.h, with the word
  38. // "sparse" replaced by "dense", except for the addition of
  39. // set_empty_key().
  40. //
  41. // YOU MUST CALL SET_EMPTY_KEY() IMMEDIATELY AFTER CONSTRUCTION.
  42. //
  43. // Otherwise your program will die in mysterious ways. (Note if you
  44. // use the constructor that takes an InputIterator range, you pass in
  45. // the empty key in the constructor, rather than after. As a result,
  46. // this constructor differs from the standard STL version.)
  47. //
  48. // In other respects, we adhere mostly to the STL semantics for
  49. // hash-map. One important exception is that insert() may invalidate
  50. // iterators entirely -- STL semantics are that insert() may reorder
  51. // iterators, but they all still refer to something valid in the
  52. // hashtable. Not so for us. Likewise, insert() may invalidate
  53. // pointers into the hashtable. (Whether insert invalidates iterators
  54. // and pointers depends on whether it results in a hashtable resize).
  55. // On the plus side, delete() doesn't invalidate iterators or pointers
  56. // at all, or even change the ordering of elements.
  57. //
  58. // Here are a few "power user" tips:
  59. //
  60. // 1) set_deleted_key():
  61. // If you want to use erase() you *must* call set_deleted_key(),
  62. // in addition to set_empty_key(), after construction.
  63. // The deleted and empty keys must differ.
  64. //
  65. // 2) resize(0):
  66. // When an item is deleted, its memory isn't freed right
  67. // away. This allows you to iterate over a hashtable,
  68. // and call erase(), without invalidating the iterator.
  69. // To force the memory to be freed, call resize(0).
  70. // For tr1 compatibility, this can also be called as rehash(0).
  71. //
  72. // 3) min_load_factor(0.0)
  73. // Setting the minimum load factor to 0.0 guarantees that
  74. // the hash table will never shrink.
  75. //
  76. // Roughly speaking:
  77. // (1) dense_hash_map: fastest, uses the most memory unless entries are small
  78. // (2) sparse_hash_map: slowest, uses the least memory
  79. // (3) hash_map / unordered_map (STL): in the middle
  80. //
  81. // Typically I use sparse_hash_map when I care about space and/or when
  82. // I need to save the hashtable on disk. I use hash_map otherwise. I
  83. // don't personally use dense_hash_set ever; some people use it for
  84. // small sets with lots of lookups.
  85. //
  86. // - dense_hash_map has, typically, about 78% memory overhead (if your
  87. // data takes up X bytes, the hash_map uses .78X more bytes in overhead).
  88. // - sparse_hash_map has about 4 bits overhead per entry.
  89. // - sparse_hash_map can be 3-7 times slower than the others for lookup and,
  90. // especially, inserts. See time_hash_map.cc for details.
  91. //
  92. // See /usr/(local/)?doc/sparsehash-*/dense_hash_map.html
  93. // for information about how to use this class.
  94. #ifndef _DENSE_HASH_MAP_H_
  95. #define _DENSE_HASH_MAP_H_
  96. #include "internal/sparseconfig.h"
  97. #include <algorithm> // needed by stl_alloc
  98. #include <functional> // for equal_to<>, select1st<>, etc
  99. #include <memory> // for alloc
  100. #include <utility> // for pair<>
  101. #include "internal/densehashtable.h" // IWYU pragma: export
  102. #include "internal/libc_allocator_with_realloc.h"
  103. #include HASH_FUN_H // for hash<>
  104. _START_GOOGLE_NAMESPACE_
  105. template <class Key, class T,
  106. class HashFcn = SPARSEHASH_HASH<Key>, // defined in sparseconfig.h
  107. class EqualKey = std::equal_to<Key>,
  108. class Alloc = libc_allocator_with_realloc<std::pair<const Key, T> > >
  109. class dense_hash_map {
  110. private:
  111. // Apparently select1st is not stl-standard, so we define our own
  112. struct SelectKey {
  113. typedef const Key& result_type;
  114. const Key& operator()(const std::pair<const Key, T>& p) const {
  115. return p.first;
  116. }
  117. };
  118. struct SetKey {
  119. void operator()(std::pair<const Key, T>* value, const Key& new_key) const {
  120. *const_cast<Key*>(&value->first) = new_key;
  121. // It would be nice to clear the rest of value here as well, in
  122. // case it's taking up a lot of memory. We do this by clearing
  123. // the value. This assumes T has a zero-arg constructor!
  124. value->second = T();
  125. }
  126. };
  127. // For operator[].
  128. struct DefaultValue {
  129. std::pair<const Key, T> operator()(const Key& key) {
  130. return std::make_pair(key, T());
  131. }
  132. };
  133. // The actual data
  134. typedef dense_hashtable<std::pair<const Key, T>, Key, HashFcn, SelectKey,
  135. SetKey, EqualKey, Alloc> ht;
  136. ht rep;
  137. public:
  138. typedef typename ht::key_type key_type;
  139. typedef T data_type;
  140. typedef T mapped_type;
  141. typedef typename ht::value_type value_type;
  142. typedef typename ht::hasher hasher;
  143. typedef typename ht::key_equal key_equal;
  144. typedef Alloc allocator_type;
  145. typedef typename ht::size_type size_type;
  146. typedef typename ht::difference_type difference_type;
  147. typedef typename ht::pointer pointer;
  148. typedef typename ht::const_pointer const_pointer;
  149. typedef typename ht::reference reference;
  150. typedef typename ht::const_reference const_reference;
  151. typedef typename ht::iterator iterator;
  152. typedef typename ht::const_iterator const_iterator;
  153. typedef typename ht::local_iterator local_iterator;
  154. typedef typename ht::const_local_iterator const_local_iterator;
  155. // Iterator functions
  156. iterator begin() { return rep.begin(); }
  157. iterator end() { return rep.end(); }
  158. const_iterator begin() const { return rep.begin(); }
  159. const_iterator end() const { return rep.end(); }
  160. // These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements.
  161. local_iterator begin(size_type i) { return rep.begin(i); }
  162. local_iterator end(size_type i) { return rep.end(i); }
  163. const_local_iterator begin(size_type i) const { return rep.begin(i); }
  164. const_local_iterator end(size_type i) const { return rep.end(i); }
  165. // Accessor functions
  166. allocator_type get_allocator() const { return rep.get_allocator(); }
  167. hasher hash_funct() const { return rep.hash_funct(); }
  168. hasher hash_function() const { return hash_funct(); }
  169. key_equal key_eq() const { return rep.key_eq(); }
  170. // Constructors
  171. explicit dense_hash_map(size_type expected_max_items_in_table = 0,
  172. const hasher& hf = hasher(),
  173. const key_equal& eql = key_equal(),
  174. const allocator_type& alloc = allocator_type())
  175. : rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(), alloc) {
  176. }
  177. template <class InputIterator>
  178. dense_hash_map(InputIterator f, InputIterator l,
  179. const key_type& empty_key_val,
  180. size_type expected_max_items_in_table = 0,
  181. const hasher& hf = hasher(),
  182. const key_equal& eql = key_equal(),
  183. const allocator_type& alloc = allocator_type())
  184. : rep(expected_max_items_in_table, hf, eql, SelectKey(), SetKey(), alloc) {
  185. set_empty_key(empty_key_val);
  186. rep.insert(f, l);
  187. }
  188. // We use the default copy constructor
  189. // We use the default operator=()
  190. // We use the default destructor
  191. void clear() { rep.clear(); }
  192. // This clears the hash map without resizing it down to the minimum
  193. // bucket count, but rather keeps the number of buckets constant
  194. void clear_no_resize() { rep.clear_no_resize(); }
  195. void swap(dense_hash_map& hs) { rep.swap(hs.rep); }
  196. // Functions concerning size
  197. size_type size() const { return rep.size(); }
  198. size_type max_size() const { return rep.max_size(); }
  199. bool empty() const { return rep.empty(); }
  200. size_type bucket_count() const { return rep.bucket_count(); }
  201. size_type max_bucket_count() const { return rep.max_bucket_count(); }
  202. // These are tr1 methods. bucket() is the bucket the key is or would be in.
  203. size_type bucket_size(size_type i) const { return rep.bucket_size(i); }
  204. size_type bucket(const key_type& key) const { return rep.bucket(key); }
  205. float load_factor() const {
  206. return size() * 1.0f / bucket_count();
  207. }
  208. float max_load_factor() const {
  209. float shrink, grow;
  210. rep.get_resizing_parameters(&shrink, &grow);
  211. return grow;
  212. }
  213. void max_load_factor(float new_grow) {
  214. float shrink, grow;
  215. rep.get_resizing_parameters(&shrink, &grow);
  216. rep.set_resizing_parameters(shrink, new_grow);
  217. }
  218. // These aren't tr1 methods but perhaps ought to be.
  219. float min_load_factor() const {
  220. float shrink, grow;
  221. rep.get_resizing_parameters(&shrink, &grow);
  222. return shrink;
  223. }
  224. void min_load_factor(float new_shrink) {
  225. float shrink, grow;
  226. rep.get_resizing_parameters(&shrink, &grow);
  227. rep.set_resizing_parameters(new_shrink, grow);
  228. }
  229. // Deprecated; use min_load_factor() or max_load_factor() instead.
  230. void set_resizing_parameters(float shrink, float grow) {
  231. rep.set_resizing_parameters(shrink, grow);
  232. }
  233. void resize(size_type hint) { rep.resize(hint); }
  234. void rehash(size_type hint) { resize(hint); } // the tr1 name
  235. // Lookup routines
  236. iterator find(const key_type& key) { return rep.find(key); }
  237. const_iterator find(const key_type& key) const { return rep.find(key); }
  238. data_type& operator[](const key_type& key) { // This is our value-add!
  239. // If key is in the hashtable, returns find(key)->second,
  240. // otherwise returns insert(value_type(key, T()).first->second.
  241. // Note it does not create an empty T unless the find fails.
  242. return rep.template find_or_insert<DefaultValue>(key).second;
  243. }
  244. size_type count(const key_type& key) const { return rep.count(key); }
  245. std::pair<iterator, iterator> equal_range(const key_type& key) {
  246. return rep.equal_range(key);
  247. }
  248. std::pair<const_iterator, const_iterator> equal_range(const key_type& key)
  249. const {
  250. return rep.equal_range(key);
  251. }
  252. // Insertion routines
  253. std::pair<iterator, bool> insert(const value_type& obj) {
  254. return rep.insert(obj);
  255. }
  256. template <class InputIterator> void insert(InputIterator f, InputIterator l) {
  257. rep.insert(f, l);
  258. }
  259. void insert(const_iterator f, const_iterator l) {
  260. rep.insert(f, l);
  261. }
  262. // Required for std::insert_iterator; the passed-in iterator is ignored.
  263. iterator insert(iterator, const value_type& obj) {
  264. return insert(obj).first;
  265. }
  266. // Deletion and empty routines
  267. // THESE ARE NON-STANDARD! I make you specify an "impossible" key
  268. // value to identify deleted and empty buckets. You can change the
  269. // deleted key as time goes on, or get rid of it entirely to be insert-only.
  270. void set_empty_key(const key_type& key) { // YOU MUST CALL THIS!
  271. rep.set_empty_key(value_type(key, data_type())); // rep wants a value
  272. }
  273. key_type empty_key() const {
  274. return rep.empty_key().first; // rep returns a value
  275. }
  276. void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); }
  277. void clear_deleted_key() { rep.clear_deleted_key(); }
  278. key_type deleted_key() const { return rep.deleted_key(); }
  279. // These are standard
  280. size_type erase(const key_type& key) { return rep.erase(key); }
  281. void erase(iterator it) { rep.erase(it); }
  282. void erase(iterator f, iterator l) { rep.erase(f, l); }
  283. // Comparison
  284. bool operator==(const dense_hash_map& hs) const { return rep == hs.rep; }
  285. bool operator!=(const dense_hash_map& hs) const { return rep != hs.rep; }
  286. // I/O -- this is an add-on for writing hash map to disk
  287. //
  288. // For maximum flexibility, this does not assume a particular
  289. // file type (though it will probably be a FILE *). We just pass
  290. // the fp through to rep.
  291. // If your keys and values are simple enough, you can pass this
  292. // serializer to serialize()/unserialize(). "Simple enough" means
  293. // value_type is a POD type that contains no pointers. Note,
  294. // however, we don't try to normalize endianness.
  295. typedef typename ht::NopointerSerializer NopointerSerializer;
  296. // serializer: a class providing operator()(OUTPUT*, const value_type&)
  297. // (writing value_type to OUTPUT). You can specify a
  298. // NopointerSerializer object if appropriate (see above).
  299. // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a
  300. // pointer to a class providing size_t Write(const void*, size_t),
  301. // which writes a buffer into a stream (which fp presumably
  302. // owns) and returns the number of bytes successfully written.
  303. // Note basic_ostream<not_char> is not currently supported.
  304. template <typename ValueSerializer, typename OUTPUT>
  305. bool serialize(ValueSerializer serializer, OUTPUT* fp) {
  306. return rep.serialize(serializer, fp);
  307. }
  308. // serializer: a functor providing operator()(INPUT*, value_type*)
  309. // (reading from INPUT and into value_type). You can specify a
  310. // NopointerSerializer object if appropriate (see above).
  311. // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a
  312. // pointer to a class providing size_t Read(void*, size_t),
  313. // which reads into a buffer from a stream (which fp presumably
  314. // owns) and returns the number of bytes successfully read.
  315. // Note basic_istream<not_char> is not currently supported.
  316. // NOTE: Since value_type is std::pair<const Key, T>, ValueSerializer
  317. // may need to do a const cast in order to fill in the key.
  318. template <typename ValueSerializer, typename INPUT>
  319. bool unserialize(ValueSerializer serializer, INPUT* fp) {
  320. return rep.unserialize(serializer, fp);
  321. }
  322. };
  323. // We need a global swap as well
  324. template <class Key, class T, class HashFcn, class EqualKey, class Alloc>
  325. inline void swap(dense_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm1,
  326. dense_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm2) {
  327. hm1.swap(hm2);
  328. }
  329. _END_GOOGLE_NAMESPACE_
  330. #endif /* _DENSE_HASH_MAP_H_ */