feisty meow concerns codebase  2.140
hash_table.h
Go to the documentation of this file.
1 #ifndef HASH_TABLE_CLASS
2 #define HASH_TABLE_CLASS
3 
4 /*****************************************************************************\
5 * *
6 * Name : hash_table *
7 * Author : Chris Koeritz *
8 * *
9 *******************************************************************************
10 * Copyright (c) 2001-$now By Author. This program is free software; you can *
11 * redistribute it and/or modify it under the terms of the GNU General Public *
12 * License as published by the Free Software Foundation; either version 2 of *
13 * the License or (at your option) any later version. This is online at: *
14 * http://www.fsf.org/copyleft/gpl.html *
15 * Please send any updates to: fred@gruntose.com *
16 \*****************************************************************************/
17 
18 #include "amorph.h"
19 
20 #include <basis/array.h>
21 #include <basis/byte_array.h>
22 #include <basis/contracts.h>
23 #include <basis/enhance_cpp.h>
24 #include <basis/functions.h>
25 
26 #include <math.h>
27 
28 namespace structures {
29 
30 // forward.
31 template <class key, class contents> class internal_hash_array;
32 
34 
40 class hashing_algorithm : public virtual basis::clonable
41 {
42 public:
43  virtual basis::un_int hash(const void *key_data, int key_length) const = 0;
45 
51  virtual hashing_algorithm *clone() const = 0;
53 };
54 
56 
58 
65 template <class key_type, class contents>
66  // the key_type must support valid copy constructor, assignment operator and
67  // equality operators. the contents need not support any special operations;
68  // it is always considered as just a pointer.
69 class hash_table : public virtual basis::nameable
70 {
71 public:
74 
80  virtual ~hash_table();
82 
83  DEFINE_CLASS_NAME("hash_table");
84 
87 
89  enum outcomes {
90  IS_NEW = basis::common::IS_NEW,
91  EXISTING = basis::common::EXISTING
92  };
93 
96 
97  int elements() const;
99 
101  int estimated_elements() const { return c_estim_elements; }
103 
104  basis::outcome add(const key_type &key, contents *to_store);
106 
114  basis::outcome fast_dangerous_add(const key_type &key, contents *to_store);
116 
119  bool find(const key_type &key, contents * &item_found) const;
121 
125  contents *find(const key_type &key) const
126  { contents *c = NULL_POINTER; return find(key, c)? c : NULL_POINTER; }
128 
129  contents *acquire(const key_type &key);
131 
135  bool zap(const key_type &key);
137 
139  void reset();
141 
142  typedef bool apply_function(const key_type &key, contents &current,
143  void *data_link);
145 
150  void apply(apply_function *to_apply, void *data_link);
152 
158  basis::outcome add(key_type *key, contents *to_store, bool check_dupes = true);
160 
167  bool verify() const;
169 
171 private:
172  int c_estim_elements;
173  hashing_algorithm *_hasher;
175  int _last_iter;
177 
178  hash_table(const hash_table &to_copy);
180  hash_table &operator =(const hash_table &to_copy);
182 
183 public:
186 };
187 
189 
191 
197 template <class key_type, class contents>
199  const hash_table<key_type, contents> &source);
200 
202 
203 // implementations for longer methods below....
204 
205 // this is a very special micro-managed class. it holds two pointers which
206 // it neither creates nor destroys. thus all interaction with it must be
207 // careful about removing these objects at the appropriate time. we don't
208 // want automatic memory management since we want a cheap copy on the items
209 // in the buckets.
210 
211 template <class key_type, class contents>
212 class hash_wrapper : public virtual basis::root_object
213 {
214 public:
215  key_type *_id;
216  contents *_data;
217 
218  hash_wrapper(key_type *id = NULL_POINTER, contents *data = NULL_POINTER)
219  : _id(id), _data(data) {}
220 };
221 
223 
224 template <class key_type, class contents>
225 class bucket
226  : public basis::array<hash_wrapper<key_type, contents> >,
227  public virtual basis::root_object
228 {
229 public:
230  bucket() : basis::array<hash_wrapper<key_type, contents> >(0, NULL_POINTER,
231  basis::byte_array::SIMPLE_COPY | basis::byte_array::EXPONE
232  | basis::byte_array::FLUSH_INVISIBLE) {}
233 
234  int find(const key_type &to_find) {
235  for (int i = 0; i < this->length(); i++) {
236  key_type *curr_key = this->get(i)._id;
237 //hmmm: if curr key is not set, is that a logic error? it seems like we
238 // are seeing the potential for this.
239  if (curr_key && (to_find == *curr_key))
240  return i;
241  }
242  return basis::common::NOT_FOUND;
243  }
244 };
245 
247 
248 template <class key_type, class contents>
249 class internal_hash_array : public amorph<bucket<key_type, contents> >
250 {
251 public:
252  internal_hash_array(int estimated_elements)
253  : amorph<bucket<key_type, contents> >
254  (hash_table<key_type, contents>::calculate_num_slots(estimated_elements)) {}
255 };
256 
258 
259 template <class key_type, class contents>
260 hash_table<key_type, contents>::hash_table(const hashing_algorithm &hasher, int estimated_elements)
261 : c_estim_elements(estimated_elements),
262  _hasher(hasher.clone()),
263  _table(new internal_hash_array<key_type, contents>(estimated_elements)),
264  _last_iter(0)
265 {}
266 
267 template <class key_type, class contents>
269 {
270 #ifdef EXTREME_CHECKING
271  FUNCDEF("destructor");
272  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
273 #endif
274  reset();
275  basis::WHACK(_table);
276  basis::WHACK(_hasher);
277 }
278 
279 template <class key_type, class contents>
281 {
282 //printf("elems wanted = %d\n", estimated_elements);
283  int log_2_truncated = int(log(float(estimated_elements)) / log(2.0));
284 //printf("log 2 of elems, truncated = %d\n", log_2_truncated);
285  int slots_needed_for_elems = (int)pow(2.0, double(log_2_truncated + 1));
286 //printf("slots needed = %d\n", slots_needed_for_elems );
287  return slots_needed_for_elems;
288 }
289 
290 // the specialized copy operation.
291 template <class key_type, class contents>
293  const hash_table<key_type, contents> &source)
294 {
295 #ifdef EXTREME_CHECKING
296  FUNCDEF("copy_hash_table");
297  if (!source.verify())
298  deadly_error(class_name(), func, "source state did not verify.");
299 #endif
300  target.reset();
301  for (int i = 0; i < source.table_access().elements(); i++) {
302  bucket<key_type, contents> *buck = source.table_access().borrow(i);
303  if (!buck) continue;
304  for (int j = 0; j < buck->length(); j++) {
305  target.add(*((*buck)[j]._id), new contents(*((*buck)[j]._data)));
306  }
307  }
308 #ifdef EXTREME_CHECKING
309  if (!target.verify())
310  deadly_error(class_name(), func, "target state did not verify.");
311 #endif
312  #undef class_name
313 }
314 
315 template <class key_type, class contents>
317 {
318 #ifdef EXTREME_CHECKING
319  FUNCDEF("reset");
320  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
321 #endif
322  // iterate over the list whacking the content items in the buckets.
323  for (int i = 0; i < _table->elements(); i++) {
324  bucket<key_type, contents> *buck = _table->acquire(i);
325  if (!buck) continue;
326  for (int j = 0; j < buck->length(); j++) {
327  basis::WHACK((*buck)[j]._data); // eliminate the stored data.
328  basis::WHACK((*buck)[j]._id); // eliminate the stored key.
329  buck->zap(j, j); // remove the element.
330  j--; // skip back before whack happened.
331  }
332  basis::WHACK(buck);
333  }
334 #ifdef EXTREME_CHECKING
335  if (!verify())
336  deadly_error(class_name(), func, "state did not verify afterwards.");
337 #endif
338 }
339 
340 template <class key_type, class contents>
342 {
343  for (int i = 0; i < _table->elements(); i++) {
344  const bucket<key_type, contents> *buck = _table->borrow(i);
345  if (!buck) continue; // that's acceptable.
346  for (int j = 0; j < buck->length(); j++) {
347  const hash_wrapper<key_type, contents> &wrap = (*buck)[j];
348  if (!wrap._data) {
349 // program_wide_logger::get().log(a_sprintf("hash table: no data segment at position %d.", j));
350  return false;
351  }
352  if (!wrap._id) {
353 // program_wide_logger::get().log(a_sprintf("hash table: no identifier at position %d.", j));
354  return false;
355  }
356  }
357  }
358  return true;
359 }
360 
361 template <class key_type, class contents>
364 { return *_table; }
365 
366 template <class key_type, class contents>
367 void hash_table<key_type, contents>::rehash(int estimated_elements)
368 {
369 #ifdef EXTREME_CHECKING
370  FUNCDEF("rehash");
371  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
372 #endif
373  typedef bucket<key_type, contents> buckie;
374  hash_table<key_type, contents> new_hash(*_hasher, estimated_elements);
375  // this is the new table that will be used.
376 
377  // scoot through the existing table so we can move items into the new one.
378  for (int i = 0; i < _table->elements(); i++) {
379  buckie *b = _table->borrow(i); // look at the current element.
380  if (!b) continue; // nothing here, keep going.
381  for (int j = b->length() - 1; j >= 0; j--) {
382  key_type *k = b->use(j)._id;
383  contents *c = b->use(j)._data;
384  new_hash.add(k, c);
385  }
386  b->reset();
387  // clean out any items in the buckets here now that they've moved.
388  }
389 
390  // now flip the contents of the new guy into "this".
391  _table->reset(); // toss previous stuff.
392  _table->adjust(new_hash._table->elements());
393  for (int q = 0; q < new_hash._table->elements(); q++)
394  _table->put(q, new_hash._table->acquire(q));
395  // reset other data members.
396  c_estim_elements = new_hash.c_estim_elements;
397  _last_iter = 0;
398 #ifdef EXTREME_CHECKING
399  if (!verify())
400  deadly_error(class_name(), func, "state did not verify afterwards.");
401 #endif
402 }
403 
404 template <class key_type, class contents>
406  contents *to_store)
407 { return add(new key_type(key), to_store); }
408 
409 template <class key_type, class contents>
411  contents *to_store, bool check_dupes)
412 {
413 #ifdef EXTREME_CHECKING
414  FUNCDEF("add");
415  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
416 #endif
417  // get a hash value.
418  basis::un_int hashed = _hasher->hash((const void *)key, sizeof(key_type));
419  // make the value appropriate for our table.
420  hashed = hashed % _table->elements();
421  // see if the key already exists there.
422  bucket<key_type, contents> *buck = _table->borrow(hashed);
423  if (!buck) {
424  // this slot doesn't exist yet.
425  buck = new bucket<key_type, contents>;
426  _table->put(hashed, buck);
427  }
428  if (!check_dupes) {
429  // we aren't even going to check for its existence.
430  *buck += hash_wrapper<key_type, contents>(key, to_store);
431  return IS_NEW;
432  }
433  int indy = buck->find(*key);
434  if (basis::negative(indy)) {
435  // that value was not seen yet, so we're adding a new entry.
436  *buck += hash_wrapper<key_type, contents>(key, to_store);
437 #ifdef EXTREME_CHECKING
438  if (!verify())
439  deadly_error(class_name(), func, "state did not verify afterwards.");
440 #endif
441  return IS_NEW;
442  }
443  // that key already existed, so we'll re-use its slot with the new data.
444  basis::WHACK((*buck)[indy]._data);
445  basis::WHACK(key); // toss since we're not using.
446  (*buck)[indy]._data = to_store;
447 #ifdef EXTREME_CHECKING
448  if (!verify())
449  deadly_error(class_name(), func, "state did not verify afterwards.");
450 #endif
451  return EXISTING;
452 }
453 
454 template <class key_type, class contents>
456  (const key_type &key, contents *to_store)
457 { return add(new key_type(key), to_store, false); }
458 
459 template <class key_type, class contents>
460 bool hash_table<key_type, contents>::find(const key_type &key,
461  contents * &item_found) const
462 {
463 #ifdef EXTREME_CHECKING
464  FUNCDEF("find");
465  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
466 #endif
467  item_found = NULL_POINTER;
468  // get a hash value.
469  basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
470  // make the value appropriate for our table.
471  hashed = hashed % _table->elements();
472  // see if the key exists in the bucket.
473  bucket<key_type, contents> *buck = _table->borrow(hashed);
474  if (!buck) return false;
475  int indy = buck->find(key);
476  if (basis::negative(indy)) return false; // not there.
477  // was there, so retrieve the data.
478  item_found = (*buck)[indy]._data;
479  return true;
480 }
481 
482 template <class key_type, class contents>
483 contents *hash_table<key_type, contents>::acquire(const key_type &key)
484 {
485 #ifdef EXTREME_CHECKING
486  FUNCDEF("acquire");
487  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
488 #endif
489  // get a hash value.
490  basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
491  // make the value appropriate for our table.
492  hashed = hashed % _table->elements();
493  // see if the key exists in the bucket.
494  bucket<key_type, contents> *buck = _table->borrow(hashed);
495  if (!buck) return NULL_POINTER;
496  int indy = buck->find(key);
497  if (basis::negative(indy)) return NULL_POINTER; // nope, not there.
498  contents *to_return = (*buck)[indy]._data;
499  basis::WHACK((*buck)[indy]._id); // clean the id.
500  buck->zap(indy, indy); // toss the storage blob for the item.
501 #ifdef EXTREME_CHECKING
502  if (!verify())
503  deadly_error(class_name(), func, "state did not verify afterwards.");
504 #endif
505  return to_return;
506 }
507 
508 template <class key_type, class contents>
509 bool hash_table<key_type, contents>::zap(const key_type &key)
510 {
511 #ifdef EXTREME_CHECKING
512  FUNCDEF("zap");
513  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
514 #endif
515  // get a hash value.
516  basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
517  // make the value appropriate for our table.
518  hashed = hashed % _table->elements();
519  // see if the key exists in the bucket.
520  bucket<key_type, contents> *buck = _table->borrow(hashed);
521  if (!buck) return false;
522  int indy = buck->find(key);
523  if (basis::negative(indy)) {
524  // nope, not there.
525  return false;
526  }
527  basis::WHACK((*buck)[indy]._data); // delete the data held.
528  basis::WHACK((*buck)[indy]._id); // delete the data held.
529  buck->zap(indy, indy); // toss the storage blob for the item.
530  if (!buck->length()) {
531  // clean up this bucket now.
532  buck = _table->acquire(hashed);
533  basis::WHACK(buck);
534  }
535 #ifdef EXTREME_CHECKING
536  if (!verify())
537  deadly_error(class_name(), func, "state did not verify afterwards.");
538 #endif
539  return true;
540 }
541 
542 template <class key_type, class contents>
543 void hash_table<key_type, contents>::apply(apply_function *to_apply,
544  void *data_link)
545 {
546 #ifdef EXTREME_CHECKING
547  FUNCDEF("apply");
548  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
549 #endif
550  typedef bucket<key_type, contents> buckie;
551  int bucks_seen = 0;
552  int posn = _last_iter; // start at the same place we left.
553  while (bucks_seen++ < _table->elements()) {
554  if ( (posn < 0) || (posn >= _table->elements()) )
555  posn = 0;
556  buckie *b = _table->borrow(posn);
557  _last_iter = posn++;
558  // record where the iteration last touched and increment next position.
559  // we must do this before we check if the bucket exists or we'll rotate
560  // on this same place forever.
561  if (!b) continue; // nothing here, keep going.
562  for (int j = 0; j < b->length(); j++) {
563  contents *c = b->use(j)._data;
564  if (!c) {
565 #ifdef EXTREME_CHECKING
566  deadly_error("hash_table", "apply", "logic error--missing pointer");
567 #endif
568  continue;
569  }
570  if (!to_apply(*b->use(j)._id, *c, data_link)) return;
571  }
572  }
573 }
574 
575 template <class key_type, class contents>
577 {
578 #ifdef EXTREME_CHECKING
579  FUNCDEF("elements");
580  if (!verify()) deadly_error(class_name(), func, "state did not verify.");
581 #endif
582  typedef bucket<key_type, contents> buckie;
583  int to_return = 0;
584  for (int i = 0; i < _table->elements(); i++) {
585  bucket<key_type, contents> *buck = _table->borrow(i);
586  if (!buck) continue; // nothing to count.
587  to_return += buck->length();
588  }
589  return to_return;
590 }
591 
592 #undef static_class_name
593 
594 } //namespace.
595 
596 #endif // outer guard.
597 
Represents a sequential, ordered, contiguous collection of objects.
Definition: array.h:54
@ EXPONE
synonym for EXPONENTIAL_GROWTH.
Definition: array.h:61
@ SIMPLE_COPY
the contents can be memcpy'd and are not deep.
Definition: array.h:59
@ FLUSH_INVISIBLE
blanks out allocated but inaccessible elements.
Definition: array.h:62
outcome put(int index, const contents &to_put)
Stores an object at the index "index" in the array.
Definition: array.h:834
const hash_wrapper< key_type, contents > & get(int index) const
Accesses individual objects stored in "this" at the "index" position.
Definition: array.h:372
int length() const
Returns the current reported length of the allocated C array.
Definition: array.h:115
outcome zap(int start, int end)
Deletes from "this" the objects inclusively between "start" and "end".
Definition: array.h:769
contents & use(int index)
A non-constant version of get(); the returned object can be modified.
Definition: array.h:365
array(int number=0, const hash_wrapper< key_type, contents > *init=NULL_POINTER, int flags=EXPONENTIAL_GROWTH|FLUSH_INVISIBLE)
Constructs an array with room for "number" objects.
Definition: array.h:315
A clonable object knows how to make copy of itself.
Definition: contracts.h:109
Root object for any class that knows its own name.
Definition: contracts.h:123
Outcomes describe the state of completion for an operation.
Definition: outcome.h:31
int find(const key_type &to_find)
Definition: hash_table.h:234
Implements hashing into buckets for quick object access.
Definition: hash_table.h:70
bool apply_function(const key_type &key, contents &current, void *data_link)
the "apply_function" is what a user of the "apply" method must supply.
Definition: hash_table.h:142
int elements() const
the number of valid items we found by traversing the hash table.
Definition: hash_table.h:576
basis::outcome add(key_type *key, contents *to_store, bool check_dupes=true)
specialized add for a pre-existing pointer "key".
Definition: hash_table.h:410
basis::outcome add(const key_type &key, contents *to_store)
Stores "to_store" into the table given its "key" for hashing.
Definition: hash_table.h:405
internal_hash_array< key_type, contents > & table_access() const
special accessor for the copy_hash_table method only.
Definition: hash_table.h:363
bool verify() const
returns true if the hash table is internally consistent.
Definition: hash_table.h:341
DEFINE_CLASS_NAME("hash_table")
static int calculate_num_slots(int estimated_elements)
a helper method that lets us know what n is for how many 2^n slots we should have.
Definition: hash_table.h:280
void apply(apply_function *to_apply, void *data_link)
Invokes the function "to_apply" on every entry in the table.
Definition: hash_table.h:543
void rehash(int estimated_elements)
resizes the hashing structures to optimise for a new size of "estimated_elements".
Definition: hash_table.h:367
contents * acquire(const key_type &key)
retrieves the contents held for "key" out of the table.
Definition: hash_table.h:483
void reset()
removes all entries in the table and returns it to a pristine state.
Definition: hash_table.h:316
bool find(const key_type &key, contents *&item_found) const
locates the item specified by the "key", if possible.
Definition: hash_table.h:460
basis::outcome fast_dangerous_add(const key_type &key, contents *to_store)
Like the add method above, but doesn't check for duplicates.
Definition: hash_table.h:456
hash_table(const hashing_algorithm &hasher, int estimated_elements)
Creates a table using the "hasher" that is ready to store "estimated_elements".
Definition: hash_table.h:260
virtual ~hash_table()
destroys any objects left in the hash_table.
Definition: hash_table.h:268
int estimated_elements() const
returns the size of table we're optimized for.
Definition: hash_table.h:101
contents * find(const key_type &key) const
simplified form of above find() method.
Definition: hash_table.h:125
bool zap(const key_type &key)
removes the entry with the "key" specified.
Definition: hash_table.h:509
hash_wrapper(key_type *id=NULL_POINTER, contents *data=NULL_POINTER)
Definition: hash_table.h:218
A hashing algorithm takes a key and derives a related integer from it.
Definition: hash_table.h:41
virtual hashing_algorithm * clone() const =0
supports virtual construction of new algorithm objects.
virtual basis::un_int hash(const void *key_data, int key_length) const =0
returns a hash value based on the "key_data" and "key_length".
internal_hash_array(int estimated_elements)
Definition: hash_table.h:252
#define deadly_error(c, f, i)
#define NULL_POINTER
The value representing a pointer to nothing.
Definition: definitions.h:32
#define FUNCDEF(func_in)
FUNCDEF sets the name of a function (and plugs it into the callstack).
Definition: enhance_cpp.h:57
The guards collection helps in testing preconditions and reporting errors.
Definition: array.h:30
void WHACK(contents *&ptr)
deletion with clearing of the pointer.
Definition: functions.h:121
unsigned int un_int
Abbreviated name for unsigned integers.
Definition: definitions.h:62
bool negative(const type &a)
negative returns true if "a" is less than zero.
Definition: functions.h:43
A dynamic container class that holds any kind of object via pointers.
Definition: amorph.h:55
void copy_hash_table(hash_table< key_type, contents > &target, const hash_table< key_type, contents > &source)
Copies the entire hash table, given a contents type that supports copying.
Definition: hash_table.h:292