sparse_hash_set.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. // Copyright (c) 2005, Google Inc.
  2. // All rights reserved.
  3. //
  4. // Redistribution and use in source and binary forms, with or without
  5. // modification, are permitted provided that the following conditions are
  6. // met:
  7. //
  8. // * Redistributions of source code must retain the above copyright
  9. // notice, this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above
  11. // copyright notice, this list of conditions and the following disclaimer
  12. // in the documentation and/or other materials provided with the
  13. // distribution.
  14. // * Neither the name of Google Inc. nor the names of its
  15. // contributors may be used to endorse or promote products derived from
  16. // this software without specific prior written permission.
  17. //
  18. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. // ---
  30. //
  31. // This is just a very thin wrapper over sparsehashtable.h, just
  32. // like sgi stl's stl_hash_set is a very thin wrapper over
  33. // stl_hashtable. The major thing we define is operator[], because
  34. // we have a concept of a data_type which stl_hashtable doesn't
  35. // (it only has a key and a value).
  36. //
  37. // This is more different from sparse_hash_map than you might think,
  38. // because all iterators for sets are const (you obviously can't
  39. // change the key, and for sets there is no value).
  40. //
  41. // We adhere mostly to the STL semantics for hash-map. One important
  42. // exception is that insert() may invalidate iterators entirely -- STL
  43. // semantics are that insert() may reorder iterators, but they all
  44. // still refer to something valid in the hashtable. Not so for us.
  45. // Likewise, insert() may invalidate pointers into the hashtable.
  46. // (Whether insert invalidates iterators and pointers depends on
  47. // whether it results in a hashtable resize). On the plus side,
  48. // delete() doesn't invalidate iterators or pointers at all, or even
  49. // change the ordering of elements.
  50. //
  51. // Here are a few "power user" tips:
  52. //
  53. // 1) set_deleted_key():
  54. // Unlike STL's hash_map, if you want to use erase() you
  55. // *must* call set_deleted_key() after construction.
  56. //
  57. // 2) resize(0):
  58. // When an item is deleted, its memory isn't freed right
  59. // away. This allows you to iterate over a hashtable,
  60. // and call erase(), without invalidating the iterator.
  61. // To force the memory to be freed, call resize(0).
  62. // For tr1 compatibility, this can also be called as rehash(0).
  63. //
  64. // 3) min_load_factor(0.0)
  65. // Setting the minimum load factor to 0.0 guarantees that
  66. // the hash table will never shrink.
  67. //
  68. // Roughly speaking:
  69. // (1) dense_hash_set: fastest, uses the most memory unless entries are small
  70. // (2) sparse_hash_set: slowest, uses the least memory
  71. // (3) hash_set / unordered_set (STL): in the middle
  72. //
  73. // Typically I use sparse_hash_set when I care about space and/or when
  74. // I need to save the hashtable on disk. I use hash_set otherwise. I
  75. // don't personally use dense_hash_set ever; some people use it for
  76. // small sets with lots of lookups.
  77. //
  78. // - dense_hash_set has, typically, about 78% memory overhead (if your
  79. // data takes up X bytes, the hash_set uses .78X more bytes in overhead).
  80. // - sparse_hash_set has about 4 bits overhead per entry.
  81. // - sparse_hash_set can be 3-7 times slower than the others for lookup and,
  82. // especially, inserts. See time_hash_map.cc for details.
  83. //
  84. // See /usr/(local/)?doc/sparsehash-*/sparse_hash_set.html
  85. // for information about how to use this class.
  86. #ifndef _SPARSE_HASH_SET_H_
  87. #define _SPARSE_HASH_SET_H_
  88. #include "internal/sparseconfig.h"
  89. #include <algorithm> // needed by stl_alloc
  90. #include <functional> // for equal_to<>
  91. #include <memory> // for alloc (which we don't use)
  92. #include <utility> // for pair<>
  93. #include "internal/libc_allocator_with_realloc.h"
  94. #include "internal/sparsehashtable.h" // IWYU pragma: export
  95. #include HASH_FUN_H // for hash<>
  96. _START_GOOGLE_NAMESPACE_
  97. template <class Value,
  98. class HashFcn = SPARSEHASH_HASH<Value>, // defined in sparseconfig.h
  99. class EqualKey = std::equal_to<Value>,
  100. class Alloc = libc_allocator_with_realloc<Value> >
  101. class sparse_hash_set {
  102. private:
  103. // Apparently identity is not stl-standard, so we define our own
  104. struct Identity {
  105. typedef const Value& result_type;
  106. const Value& operator()(const Value& v) const { return v; }
  107. };
  108. struct SetKey {
  109. void operator()(Value* value, const Value& new_key) const {
  110. *value = new_key;
  111. }
  112. };
  113. typedef sparse_hashtable<Value, Value, HashFcn, Identity, SetKey,
  114. EqualKey, Alloc> ht;
  115. ht rep;
  116. public:
  117. typedef typename ht::key_type key_type;
  118. typedef typename ht::value_type value_type;
  119. typedef typename ht::hasher hasher;
  120. typedef typename ht::key_equal key_equal;
  121. typedef Alloc allocator_type;
  122. typedef typename ht::size_type size_type;
  123. typedef typename ht::difference_type difference_type;
  124. typedef typename ht::const_pointer pointer;
  125. typedef typename ht::const_pointer const_pointer;
  126. typedef typename ht::const_reference reference;
  127. typedef typename ht::const_reference const_reference;
  128. typedef typename ht::const_iterator iterator;
  129. typedef typename ht::const_iterator const_iterator;
  130. typedef typename ht::const_local_iterator local_iterator;
  131. typedef typename ht::const_local_iterator const_local_iterator;
  132. // Iterator functions -- recall all iterators are const
  133. iterator begin() const { return rep.begin(); }
  134. iterator end() const { return rep.end(); }
  135. // These come from tr1's unordered_set. For us, a bucket has 0 or 1 elements.
  136. local_iterator begin(size_type i) const { return rep.begin(i); }
  137. local_iterator end(size_type i) const { return rep.end(i); }
  138. // Accessor functions
  139. allocator_type get_allocator() const { return rep.get_allocator(); }
  140. hasher hash_funct() const { return rep.hash_funct(); }
  141. hasher hash_function() const { return hash_funct(); } // tr1 name
  142. key_equal key_eq() const { return rep.key_eq(); }
  143. // Constructors
  144. explicit sparse_hash_set(size_type expected_max_items_in_table = 0,
  145. const hasher& hf = hasher(),
  146. const key_equal& eql = key_equal(),
  147. const allocator_type& alloc = allocator_type())
  148. : rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) {
  149. }
  150. template <class InputIterator>
  151. sparse_hash_set(InputIterator f, InputIterator l,
  152. size_type expected_max_items_in_table = 0,
  153. const hasher& hf = hasher(),
  154. const key_equal& eql = key_equal(),
  155. const allocator_type& alloc = allocator_type())
  156. : rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) {
  157. rep.insert(f, l);
  158. }
  159. // We use the default copy constructor
  160. // We use the default operator=()
  161. // We use the default destructor
  162. void clear() { rep.clear(); }
  163. void swap(sparse_hash_set& hs) { rep.swap(hs.rep); }
  164. // Functions concerning size
  165. size_type size() const { return rep.size(); }
  166. size_type max_size() const { return rep.max_size(); }
  167. bool empty() const { return rep.empty(); }
  168. size_type bucket_count() const { return rep.bucket_count(); }
  169. size_type max_bucket_count() const { return rep.max_bucket_count(); }
  170. // These are tr1 methods. bucket() is the bucket the key is or would be in.
  171. size_type bucket_size(size_type i) const { return rep.bucket_size(i); }
  172. size_type bucket(const key_type& key) const { return rep.bucket(key); }
  173. float load_factor() const {
  174. return size() * 1.0f / bucket_count();
  175. }
  176. float max_load_factor() const {
  177. float shrink, grow;
  178. rep.get_resizing_parameters(&shrink, &grow);
  179. return grow;
  180. }
  181. void max_load_factor(float new_grow) {
  182. float shrink, grow;
  183. rep.get_resizing_parameters(&shrink, &grow);
  184. rep.set_resizing_parameters(shrink, new_grow);
  185. }
  186. // These aren't tr1 methods but perhaps ought to be.
  187. float min_load_factor() const {
  188. float shrink, grow;
  189. rep.get_resizing_parameters(&shrink, &grow);
  190. return shrink;
  191. }
  192. void min_load_factor(float new_shrink) {
  193. float shrink, grow;
  194. rep.get_resizing_parameters(&shrink, &grow);
  195. rep.set_resizing_parameters(new_shrink, grow);
  196. }
  197. // Deprecated; use min_load_factor() or max_load_factor() instead.
  198. void set_resizing_parameters(float shrink, float grow) {
  199. rep.set_resizing_parameters(shrink, grow);
  200. }
  201. void resize(size_type hint) { rep.resize(hint); }
  202. void rehash(size_type hint) { resize(hint); } // the tr1 name
  203. // Lookup routines
  204. iterator find(const key_type& key) const { return rep.find(key); }
  205. size_type count(const key_type& key) const { return rep.count(key); }
  206. std::pair<iterator, iterator> equal_range(const key_type& key) const {
  207. return rep.equal_range(key);
  208. }
  209. // Insertion routines
  210. std::pair<iterator, bool> insert(const value_type& obj) {
  211. std::pair<typename ht::iterator, bool> p = rep.insert(obj);
  212. return std::pair<iterator, bool>(p.first, p.second); // const to non-const
  213. }
  214. template <class InputIterator> void insert(InputIterator f, InputIterator l) {
  215. rep.insert(f, l);
  216. }
  217. void insert(const_iterator f, const_iterator l) {
  218. rep.insert(f, l);
  219. }
  220. // Required for std::insert_iterator; the passed-in iterator is ignored.
  221. iterator insert(iterator, const value_type& obj) {
  222. return insert(obj).first;
  223. }
  224. // Deletion routines
  225. // THESE ARE NON-STANDARD! I make you specify an "impossible" key
  226. // value to identify deleted buckets. You can change the key as
  227. // time goes on, or get rid of it entirely to be insert-only.
  228. void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); }
  229. void clear_deleted_key() { rep.clear_deleted_key(); }
  230. key_type deleted_key() const { return rep.deleted_key(); }
  231. // These are standard
  232. size_type erase(const key_type& key) { return rep.erase(key); }
  233. void erase(iterator it) { rep.erase(it); }
  234. void erase(iterator f, iterator l) { rep.erase(f, l); }
  235. // Comparison
  236. bool operator==(const sparse_hash_set& hs) const { return rep == hs.rep; }
  237. bool operator!=(const sparse_hash_set& hs) const { return rep != hs.rep; }
  238. // I/O -- this is an add-on for writing metainformation to disk
  239. //
  240. // For maximum flexibility, this does not assume a particular
  241. // file type (though it will probably be a FILE *). We just pass
  242. // the fp through to rep.
  243. // If your keys and values are simple enough, you can pass this
  244. // serializer to serialize()/unserialize(). "Simple enough" means
  245. // value_type is a POD type that contains no pointers. Note,
  246. // however, we don't try to normalize endianness.
  247. typedef typename ht::NopointerSerializer NopointerSerializer;
  248. // serializer: a class providing operator()(OUTPUT*, const value_type&)
  249. // (writing value_type to OUTPUT). You can specify a
  250. // NopointerSerializer object if appropriate (see above).
  251. // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a
  252. // pointer to a class providing size_t Write(const void*, size_t),
  253. // which writes a buffer into a stream (which fp presumably
  254. // owns) and returns the number of bytes successfully written.
  255. // Note basic_ostream<not_char> is not currently supported.
  256. template <typename ValueSerializer, typename OUTPUT>
  257. bool serialize(ValueSerializer serializer, OUTPUT* fp) {
  258. return rep.serialize(serializer, fp);
  259. }
  260. // serializer: a functor providing operator()(INPUT*, value_type*)
  261. // (reading from INPUT and into value_type). You can specify a
  262. // NopointerSerializer object if appropriate (see above).
  263. // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a
  264. // pointer to a class providing size_t Read(void*, size_t),
  265. // which reads into a buffer from a stream (which fp presumably
  266. // owns) and returns the number of bytes successfully read.
  267. // Note basic_istream<not_char> is not currently supported.
  268. // NOTE: Since value_type is const Key, ValueSerializer
  269. // may need to do a const cast in order to fill in the key.
  270. // NOTE: if Key is not a POD type, the serializer MUST use
  271. // placement-new to initialize its value, rather than a normal
  272. // equals-assignment or similar. (The value_type* passed into
  273. // the serializer points to garbage memory.)
  274. template <typename ValueSerializer, typename INPUT>
  275. bool unserialize(ValueSerializer serializer, INPUT* fp) {
  276. return rep.unserialize(serializer, fp);
  277. }
  278. // The four methods below are DEPRECATED.
  279. // Use serialize() and unserialize() for new code.
  280. template <typename OUTPUT>
  281. bool write_metadata(OUTPUT *fp) { return rep.write_metadata(fp); }
  282. template <typename INPUT>
  283. bool read_metadata(INPUT *fp) { return rep.read_metadata(fp); }
  284. template <typename OUTPUT>
  285. bool write_nopointer_data(OUTPUT *fp) { return rep.write_nopointer_data(fp); }
  286. template <typename INPUT>
  287. bool read_nopointer_data(INPUT *fp) { return rep.read_nopointer_data(fp); }
  288. };
  289. template <class Val, class HashFcn, class EqualKey, class Alloc>
  290. inline void swap(sparse_hash_set<Val, HashFcn, EqualKey, Alloc>& hs1,
  291. sparse_hash_set<Val, HashFcn, EqualKey, Alloc>& hs2) {
  292. hs1.swap(hs2);
  293. }
  294. _END_GOOGLE_NAMESPACE_
  295. #endif /* _SPARSE_HASH_SET_H_ */