1 #ifndef HASH_TABLE_CLASS
2 #define HASH_TABLE_CLASS
4 /*****************************************************************************\
7 * Author : Chris Koeritz *
9 *******************************************************************************
10 * Copyright (c) 2001-$now By Author. This program is free software; you can *
11 * redistribute it and/or modify it under the terms of the GNU General Public *
12 * License as published by the Free Software Foundation; either version 2 of *
13 * the License or (at your option) any later version. This is online at: *
14 * http://www.fsf.org/copyleft/gpl.html *
15 * Please send any updates to: fred@gruntose.com *
16 \*****************************************************************************/
20 #include <basis/array.h>
21 #include <basis/byte_array.h>
22 #include <basis/contracts.h>
23 #include <basis/enhance_cpp.h>
24 #include <basis/functions.h>
28 namespace structures {
31 template <class key, class contents> class internal_hash_array;
33 //! A hashing algorithm takes a key and derives a related integer from it.
35 The hashing_algorithm is used in hash_tables for placing objects into slots
36 that can be easily found again. The numerical hash should be faster than
37 actually doing a search on what might be unsorted data otherwise.
40 class hashing_algorithm : public virtual basis::clonable
43 virtual basis::un_int hash(const void *key_data, int key_length) const = 0;
44 //!< returns a hash value based on the "key_data" and "key_length".
45 /*!< the "key_length" is provided from the sizeof() of the key type used
46 in the hash_table (below). it is up to the implementor to create a hash
47 value that spreads the keys to be hashed appropriately. if similar keys
48 create same or similar hash values, then the efficiency of the hash_table
51 virtual hashing_algorithm *clone() const = 0;
52 //!< supports virtual construction of new algorithm objects.
57 //! Implements hashing into buckets for quick object access.
59 The buckets are currently simple lists, but if the hashing algorithm is well
60 chosen, then that's not a major problem. This makes lookups a lot faster
61 than a linear search, but no particular performance guarantees are made at
65 template <class key_type, class contents>
66 // the key_type must support valid copy constructor, assignment operator and
67 // equality operators. the contents need not support any special operations;
68 // it is always considered as just a pointer.
69 class hash_table : public virtual basis::nameable
72 hash_table(const hashing_algorithm &hasher, int estimated_elements);
73 //!< Creates a table using the "hasher" that is ready to store "estimated_elements".
74 /*!< the "hasher" must provide the hashing algorithm for the two types
75 specified. if the implementation is thread safe, then one object can
76 be constructed to use with all the same types of hash tables.
77 note that "hasher" must exist longer than any classes based on it; do
78 not let "hasher" go out of scope or the hash table will explode. */
80 virtual ~hash_table();
81 //!< destroys any objects left in the hash_table.
83 DEFINE_CLASS_NAME("hash_table");
85 void rehash(int estimated_elements);
86 //!< resizes the hashing structures to optimise for a new size of "estimated_elements".
87 /*!< this is a potentially expensive operation. */
90 IS_NEW = basis::common::IS_NEW,
91 EXISTING = basis::common::EXISTING
94 static int calculate_num_slots(int estimated_elements);
95 //!< a helper method that lets us know what n is for how many 2^n slots we should have.
98 //!< the number of valid items we found by traversing the hash table.
99 /*!< this is a very costly method. */
101 int estimated_elements() const { return c_estim_elements; }
102 //!< returns the size of table we're optimized for.
104 basis::outcome add(const key_type &key, contents *to_store);
105 //!< Stores "to_store" into the table given its "key" for hashing.
106 /*!< This places an entry in the hash table with the contents "to_store",
107 using the "key" structure to identify it. the return value reports whether
108 the "key" was already in the list or not. if the "key" was already in use,
109 then the contents for it get replaced with "to_store". note that the hash
110 table assumes ownership of the "to_store" object but just makes a copy of
111 the key. thus, if an item is replaced, the previous contents are
114 basis::outcome fast_dangerous_add(const key_type &key, contents *to_store);
115 //!< Like the add method above, but doesn't check for duplicates.
116 /*!< This should only ever be used when one has already guaranteed that
117 the table doesn't have a duplicate item for the "key" specified. */
119 bool find(const key_type &key, contents * &item_found) const;
120 //!< locates the item specified by the "key", if possible.
121 /*!< true is returned on success and the "item_found" is filled in. the
122 "item_found" is a pointer to the actual data held in the table, so do not
123 destroy or otherwise damage the "item_found". */
125 contents *find(const key_type &key) const
126 { contents *c = NULL_POINTER; return find(key, c)? c : NULL_POINTER; }
127 //!< simplified form of above find() method.
129 contents *acquire(const key_type &key);
130 //!< retrieves the contents held for "key" out of the table.
131 /*!< afterwards, the contents pointer is the caller's responsibility; it
132 is no longer in the table and must be destroyed externally. if no item
133 was found for the "key", then NULL_POINTER is returned. */
135 bool zap(const key_type &key);
136 //!< removes the entry with the "key" specified.
137 /*!< true is returned if the item was found and destroyed. */
140 //!< removes all entries in the table and returns it to a pristine state.
142 typedef bool apply_function(const key_type &key, contents ¤t,
144 //!< the "apply_function" is what a user of the "apply" method must supply.
145 /*!< the function will be called on every item in the table unless one of
146 the invocations returns false; this causes the apply process to stop.
147 the "data_link" provides a way for the function to refer back to an
148 parent class of some sort. */
150 void apply(apply_function *to_apply, void *data_link);
151 //!< Invokes the function "to_apply" on every entry in the table.
152 /*!< This calls the "to_apply" function on every item in the catalog
153 until the function returns false. The "data_link" pointer is available to
154 the applied method and can be used to communicate an object for use during
155 the iteration. NOTE: it is NOT safe to rearrange or manipulate the table
156 in any way from your "to_apply" function. */
158 basis::outcome add(key_type *key, contents *to_store, bool check_dupes = true);
159 //!< specialized add for a pre-existing pointer "key".
160 /*!< responsibility for the "key" is taken over by the hash table, as of
161 course it is for the "to_store" pointer. this just allows others to
162 generate new keys and hand them over to us when it might otherwise be
163 very costly to copy the key structure. if "check_dupes" is not true,
164 then that asserts that you have independently verified that there's no
165 need to check whether the key is already present. */
168 //!< returns true if the hash table is internally consistent.
169 /*!< this is mainly for debugging. */
172 int c_estim_elements; //!< expected running size for the hash table.
173 hashing_algorithm *_hasher; //!< algorithm for getting hash value.
174 internal_hash_array<key_type, contents> *_table; //!< storage area.
176 //!< tracks where we left off iterating. we restart just after that spot.
178 hash_table(const hash_table &to_copy);
179 //!< not allowed; use the copy_hash_table function below.
180 hash_table &operator =(const hash_table &to_copy);
181 //!< not allowed; use the copy_hash_table function below.
184 internal_hash_array<key_type, contents> &table_access() const;
185 //!< special accessor for the copy_hash_table method only.
190 //! Copies the entire hash table, given a contents type that supports copying.
192 Provides a copy operation on hash tables where the contents object supports
193 a copy constructor, which is not appropriate to require in general. The
194 hash_table held in "target" will be wiped out and replaced with items
195 equivalent to those in "source". */
197 template <class key_type, class contents>
198 void copy_hash_table(hash_table<key_type, contents> &target,
199 const hash_table<key_type, contents> &source);
203 // implementations for longer methods below....
205 // this is a very special micro-managed class. it holds two pointers which
206 // it neither creates nor destroys. thus all interaction with it must be
207 // careful about removing these objects at the appropriate time. we don't
208 // want automatic memory management since we want a cheap copy on the items
211 template <class key_type, class contents>
212 class hash_wrapper : public virtual basis::root_object
218 hash_wrapper(key_type *id = NULL_POINTER, contents *data = NULL_POINTER)
219 : _id(id), _data(data) {}
224 template <class key_type, class contents>
226 : public basis::array<hash_wrapper<key_type, contents> >,
227 public virtual basis::root_object
230 bucket() : basis::array<hash_wrapper<key_type, contents> >(0, NULL_POINTER,
231 basis::byte_array::SIMPLE_COPY | basis::byte_array::EXPONE
232 | basis::byte_array::FLUSH_INVISIBLE) {}
234 int find(const key_type &to_find) {
235 for (int i = 0; i < this->length(); i++) {
236 key_type *curr_key = this->get(i)._id;
237 //hmmm: if curr key is not set, is that a logic error? it seems like we
238 // are seeing the potential for this.
239 if (curr_key && (to_find == *curr_key))
242 return basis::common::NOT_FOUND;
248 template <class key_type, class contents>
249 class internal_hash_array : public amorph<bucket<key_type, contents> >
252 internal_hash_array(int estimated_elements)
253 : amorph<bucket<key_type, contents> >
254 (hash_table<key_type, contents>::calculate_num_slots(estimated_elements)) {}
259 template <class key_type, class contents>
260 hash_table<key_type, contents>::hash_table(const hashing_algorithm &hasher, int estimated_elements)
261 : c_estim_elements(estimated_elements),
262 _hasher(hasher.clone()),
263 _table(new internal_hash_array<key_type, contents>(estimated_elements)),
267 template <class key_type, class contents>
268 hash_table<key_type, contents>::~hash_table()
270 #ifdef EXTREME_CHECKING
271 FUNCDEF("destructor");
272 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
275 basis::WHACK(_table);
276 basis::WHACK(_hasher);
279 template <class key_type, class contents>
280 int hash_table<key_type, contents>::calculate_num_slots(int estimated_elements)
282 //printf("elems wanted = %d\n", estimated_elements);
283 int log_2_truncated = int(log(float(estimated_elements)) / log(2.0));
284 //printf("log 2 of elems, truncated = %d\n", log_2_truncated);
285 int slots_needed_for_elems = (int)pow(2.0, double(log_2_truncated + 1));
286 //printf("slots needed = %d\n", slots_needed_for_elems );
287 return slots_needed_for_elems;
290 // the specialized copy operation.
291 template <class key_type, class contents>
292 void copy_hash_table(hash_table<key_type, contents> &target,
293 const hash_table<key_type, contents> &source)
295 #ifdef EXTREME_CHECKING
296 FUNCDEF("copy_hash_table");
297 if (!source.verify())
298 deadly_error(class_name(), func, "source state did not verify.");
301 for (int i = 0; i < source.table_access().elements(); i++) {
302 bucket<key_type, contents> *buck = source.table_access().borrow(i);
304 for (int j = 0; j < buck->length(); j++) {
305 target.add(*((*buck)[j]._id), new contents(*((*buck)[j]._data)));
308 #ifdef EXTREME_CHECKING
309 if (!target.verify())
310 deadly_error(class_name(), func, "target state did not verify.");
315 template <class key_type, class contents>
316 void hash_table<key_type, contents>::reset()
318 #ifdef EXTREME_CHECKING
320 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
322 // iterate over the list whacking the content items in the buckets.
323 for (int i = 0; i < _table->elements(); i++) {
324 bucket<key_type, contents> *buck = _table->acquire(i);
326 for (int j = 0; j < buck->length(); j++) {
327 basis::WHACK((*buck)[j]._data); // eliminate the stored data.
328 basis::WHACK((*buck)[j]._id); // eliminate the stored key.
329 buck->zap(j, j); // remove the element.
330 j--; // skip back before whack happened.
334 #ifdef EXTREME_CHECKING
336 deadly_error(class_name(), func, "state did not verify afterwards.");
340 template <class key_type, class contents>
341 bool hash_table<key_type, contents>::verify() const
343 for (int i = 0; i < _table->elements(); i++) {
344 const bucket<key_type, contents> *buck = _table->borrow(i);
345 if (!buck) continue; // that's acceptable.
346 for (int j = 0; j < buck->length(); j++) {
347 const hash_wrapper<key_type, contents> &wrap = (*buck)[j];
349 // program_wide_logger::get().log(a_sprintf("hash table: no data segment at position %d.", j));
353 // program_wide_logger::get().log(a_sprintf("hash table: no identifier at position %d.", j));
361 template <class key_type, class contents>
362 internal_hash_array<key_type, contents> &hash_table<key_type, contents>
363 ::table_access() const
366 template <class key_type, class contents>
367 void hash_table<key_type, contents>::rehash(int estimated_elements)
369 #ifdef EXTREME_CHECKING
371 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
373 typedef bucket<key_type, contents> buckie;
374 hash_table<key_type, contents> new_hash(*_hasher, estimated_elements);
375 // this is the new table that will be used.
377 // scoot through the existing table so we can move items into the new one.
378 for (int i = 0; i < _table->elements(); i++) {
379 buckie *b = _table->borrow(i); // look at the current element.
380 if (!b) continue; // nothing here, keep going.
381 for (int j = b->length() - 1; j >= 0; j--) {
382 key_type *k = b->use(j)._id;
383 contents *c = b->use(j)._data;
387 // clean out any items in the buckets here now that they've moved.
390 // now flip the contents of the new guy into "this".
391 _table->reset(); // toss previous stuff.
392 _table->adjust(new_hash._table->elements());
393 for (int q = 0; q < new_hash._table->elements(); q++)
394 _table->put(q, new_hash._table->acquire(q));
395 // reset other data members.
396 c_estim_elements = new_hash.c_estim_elements;
398 #ifdef EXTREME_CHECKING
400 deadly_error(class_name(), func, "state did not verify afterwards.");
404 template <class key_type, class contents>
405 basis::outcome hash_table<key_type, contents>::add(const key_type &key,
407 { return add(new key_type(key), to_store); }
409 template <class key_type, class contents>
410 basis::outcome hash_table<key_type, contents>::add(key_type *key,
411 contents *to_store, bool check_dupes)
413 #ifdef EXTREME_CHECKING
415 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
418 basis::un_int hashed = _hasher->hash((const void *)key, sizeof(key_type));
419 // make the value appropriate for our table.
420 hashed = hashed % _table->elements();
421 // see if the key already exists there.
422 bucket<key_type, contents> *buck = _table->borrow(hashed);
424 // this slot doesn't exist yet.
425 buck = new bucket<key_type, contents>;
426 _table->put(hashed, buck);
429 // we aren't even going to check for its existence.
430 *buck += hash_wrapper<key_type, contents>(key, to_store);
433 int indy = buck->find(*key);
434 if (basis::negative(indy)) {
435 // that value was not seen yet, so we're adding a new entry.
436 *buck += hash_wrapper<key_type, contents>(key, to_store);
437 #ifdef EXTREME_CHECKING
439 deadly_error(class_name(), func, "state did not verify afterwards.");
443 // that key already existed, so we'll re-use its slot with the new data.
444 basis::WHACK((*buck)[indy]._data);
445 basis::WHACK(key); // toss since we're not using.
446 (*buck)[indy]._data = to_store;
447 #ifdef EXTREME_CHECKING
449 deadly_error(class_name(), func, "state did not verify afterwards.");
454 template <class key_type, class contents>
455 basis::outcome hash_table<key_type, contents>::fast_dangerous_add
456 (const key_type &key, contents *to_store)
457 { return add(new key_type(key), to_store, false); }
459 template <class key_type, class contents>
460 bool hash_table<key_type, contents>::find(const key_type &key,
461 contents * &item_found) const
463 #ifdef EXTREME_CHECKING
465 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
467 item_found = NULL_POINTER;
469 basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
470 // make the value appropriate for our table.
471 hashed = hashed % _table->elements();
472 // see if the key exists in the bucket.
473 bucket<key_type, contents> *buck = _table->borrow(hashed);
474 if (!buck) return false;
475 int indy = buck->find(key);
476 if (basis::negative(indy)) return false; // not there.
477 // was there, so retrieve the data.
478 item_found = (*buck)[indy]._data;
482 template <class key_type, class contents>
483 contents *hash_table<key_type, contents>::acquire(const key_type &key)
485 #ifdef EXTREME_CHECKING
487 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
490 basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
491 // make the value appropriate for our table.
492 hashed = hashed % _table->elements();
493 // see if the key exists in the bucket.
494 bucket<key_type, contents> *buck = _table->borrow(hashed);
495 if (!buck) return NULL_POINTER;
496 int indy = buck->find(key);
497 if (basis::negative(indy)) return NULL_POINTER; // nope, not there.
498 contents *to_return = (*buck)[indy]._data;
499 basis::WHACK((*buck)[indy]._id); // clean the id.
500 buck->zap(indy, indy); // toss the storage blob for the item.
501 #ifdef EXTREME_CHECKING
503 deadly_error(class_name(), func, "state did not verify afterwards.");
508 template <class key_type, class contents>
509 bool hash_table<key_type, contents>::zap(const key_type &key)
511 #ifdef EXTREME_CHECKING
513 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
516 basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
517 // make the value appropriate for our table.
518 hashed = hashed % _table->elements();
519 // see if the key exists in the bucket.
520 bucket<key_type, contents> *buck = _table->borrow(hashed);
521 if (!buck) return false;
522 int indy = buck->find(key);
523 if (basis::negative(indy)) {
527 basis::WHACK((*buck)[indy]._data); // delete the data held.
528 basis::WHACK((*buck)[indy]._id); // delete the data held.
529 buck->zap(indy, indy); // toss the storage blob for the item.
530 if (!buck->length()) {
531 // clean up this bucket now.
532 buck = _table->acquire(hashed);
535 #ifdef EXTREME_CHECKING
537 deadly_error(class_name(), func, "state did not verify afterwards.");
542 template <class key_type, class contents>
543 void hash_table<key_type, contents>::apply(apply_function *to_apply,
546 #ifdef EXTREME_CHECKING
548 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
550 typedef bucket<key_type, contents> buckie;
552 int posn = _last_iter; // start at the same place we left.
553 while (bucks_seen++ < _table->elements()) {
554 if ( (posn < 0) || (posn >= _table->elements()) )
556 buckie *b = _table->borrow(posn);
558 // record where the iteration last touched and increment next position.
559 // we must do this before we check if the bucket exists or we'll rotate
560 // on this same place forever.
561 if (!b) continue; // nothing here, keep going.
562 for (int j = 0; j < b->length(); j++) {
563 contents *c = b->use(j)._data;
565 #ifdef EXTREME_CHECKING
566 deadly_error("hash_table", "apply", "logic error--missing pointer");
570 if (!to_apply(*b->use(j)._id, *c, data_link)) return;
575 template <class key_type, class contents>
576 int hash_table<key_type, contents>::elements() const
578 #ifdef EXTREME_CHECKING
580 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
582 typedef bucket<key_type, contents> buckie;
584 for (int i = 0; i < _table->elements(); i++) {
585 bucket<key_type, contents> *buck = _table->borrow(i);
586 if (!buck) continue; // nothing to count.
587 to_return += buck->length();
592 #undef static_class_name
596 #endif // outer guard.