feisty meow concerns codebase 2.140
hash_table.h
Go to the documentation of this file.
1#ifndef HASH_TABLE_CLASS
2#define HASH_TABLE_CLASS
3
4/*****************************************************************************\
5* *
6* Name : hash_table *
7* Author : Chris Koeritz *
8* *
9*******************************************************************************
10* Copyright (c) 2001-$now By Author. This program is free software; you can *
11* redistribute it and/or modify it under the terms of the GNU General Public *
12* License as published by the Free Software Foundation; either version 2 of *
13* the License or (at your option) any later version. This is online at: *
14* http://www.fsf.org/copyleft/gpl.html *
15* Please send any updates to: fred@gruntose.com *
16\*****************************************************************************/
17
18#include "amorph.h"
19
20#include <basis/array.h>
21#include <basis/byte_array.h>
22#include <basis/contracts.h>
23#include <basis/enhance_cpp.h>
24#include <basis/functions.h>
25
26#include <math.h>
27
28namespace structures {
29
30const int HASH_FUDGE_FACTOR = 10;
31 // we fudge the number of elements requested by adding a little chunk, just in case.
32 // since we're going to take the power of 2 that comes closest, we can afford to be
33 // a little generous, rather than being too miserly.
34
35// forward.
36template <class key, class contents> class internal_hash_array;
37
39
45class hashing_algorithm : public virtual basis::clonable
46{
47public:
48 virtual basis::un_int hash(const void *key_data, int key_length) const = 0;
50
56 virtual hashing_algorithm *clone() const = 0;
58};
59
61
63
70template <class key_type, class contents>
71 // the key_type must support valid copy constructor, assignment operator and
72 // equality operators. the contents need not support any special operations;
73 // it is always considered as just a pointer.
74class hash_table : public virtual basis::nameable
75{
76public:
79
85 virtual ~hash_table();
87
88 DEFINE_CLASS_NAME("hash_table");
89
92
94 enum outcomes {
95 IS_NEW = basis::common::IS_NEW,
96 EXISTING = basis::common::EXISTING
97 };
98
101
102 int elements() const;
104
106 int estimated_elements() const { return c_estim_elements; }
108
109 basis::outcome add(const key_type &key, contents *to_store);
111
119 basis::outcome fast_dangerous_add(const key_type &key, contents *to_store);
121
124 bool find(const key_type &key, contents * &item_found) const;
126
130 contents *find(const key_type &key) const
131 { contents *c = NULL_POINTER; return find(key, c)? c : NULL_POINTER; }
133
134 contents *acquire(const key_type &key);
136
140 bool zap(const key_type &key);
142
144 void reset();
146
147 typedef bool apply_function(const key_type &key, contents &current,
148 void *data_link);
150
155 void apply(apply_function *to_apply, void *data_link);
157
163 basis::outcome add(key_type *key, contents *to_store, bool check_dupes = true);
165
172 bool verify() const;
174
176private:
177 int c_estim_elements;
178 hashing_algorithm *_hasher;
180 int _last_iter;
182
183 hash_table(const hash_table &to_copy);
185 hash_table &operator =(const hash_table &to_copy);
187
188public:
191};
192
194
196
202template <class key_type, class contents>
204 const hash_table<key_type, contents> &source);
205
207
208// implementations for longer methods below....
209
210// this is a very special micro-managed class. it holds two pointers which
211// it neither creates nor destroys. thus all interaction with it must be
212// careful about removing these objects at the appropriate time. we don't
213// want automatic memory management since we want a cheap copy on the items
214// in the buckets.
215
216template <class key_type, class contents>
217class hash_wrapper : public virtual basis::root_object
218{
219public:
220 key_type *_id;
221 contents *_data;
222
223 hash_wrapper(key_type *id = NULL_POINTER, contents *data = NULL_POINTER)
224 : _id(id), _data(data) {}
225};
226
228
229template <class key_type, class contents>
231 : public basis::array<hash_wrapper<key_type, contents> >,
232 public virtual basis::root_object
233{
234public:
235 bucket() : basis::array<hash_wrapper<key_type, contents> >(0, NULL_POINTER,
236 basis::byte_array::SIMPLE_COPY | basis::byte_array::EXPONE
237 | basis::byte_array::FLUSH_INVISIBLE) {}
238
239 int find(const key_type &to_find) {
240 for (int i = 0; i < this->length(); i++) {
241 key_type *curr_key = this->get(i)._id;
242//hmmm: if curr key is not set, is that a logic error? it seems like we
243// are seeing the potential for this.
244 if (curr_key && (to_find == *curr_key))
245 return i;
246 }
247 return basis::common::NOT_FOUND;
248 }
249};
250
252
253template <class key_type, class contents>
254class internal_hash_array : public amorph<bucket<key_type, contents> >
255{
256public:
257 internal_hash_array(int estimated_elements)
258 : amorph<bucket<key_type, contents> >
259 (hash_table<key_type, contents>::calculate_num_slots(estimated_elements)) {}
260};
261
263
264template <class key_type, class contents>
266: c_estim_elements(estimated_elements),
267 _hasher(hasher.clone()),
268 _table(new internal_hash_array<key_type, contents>(estimated_elements)),
269 _last_iter(0)
270{}
271
272template <class key_type, class contents>
274{
275#ifdef EXTREME_CHECKING
276 FUNCDEF("destructor");
277 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
278#endif
279 reset();
280 basis::WHACK(_table);
281 basis::WHACK(_hasher);
282}
283
284template <class key_type, class contents>
286{
287//printf("[ elems wanted = %d, fudge adjusted = %d\n", estimated_elements, estimated_elements + HASH_FUDGE_FACTOR);
288 int log_2_truncated = int(log(float(estimated_elements + HASH_FUDGE_FACTOR)) / log(2.0));
289//printf(" log 2 of elems, truncated = %d\n", log_2_truncated);
290 int slots_needed_for_elems = (int)pow(2.0, double(log_2_truncated + 1));
291//printf(" slots needed = %d]\n", slots_needed_for_elems );
292 return slots_needed_for_elems;
293}
294
295// the specialized copy operation.
296template <class key_type, class contents>
298 const hash_table<key_type, contents> &source)
299{
300#ifdef EXTREME_CHECKING
301 FUNCDEF("copy_hash_table");
302 if (!source.verify())
303 deadly_error(class_name(), func, "source state did not verify.");
304#endif
305 target.reset();
306 for (int i = 0; i < source.table_access().elements(); i++) {
307 bucket<key_type, contents> *buck = source.table_access().borrow(i);
308 if (!buck) continue;
309 for (int j = 0; j < buck->length(); j++) {
310 target.add(*((*buck)[j]._id), new contents(*((*buck)[j]._data)));
311 }
312 }
313#ifdef EXTREME_CHECKING
314 if (!target.verify())
315 deadly_error(class_name(), func, "target state did not verify.");
316#endif
317 #undef class_name
318}
319
320template <class key_type, class contents>
322{
323#ifdef EXTREME_CHECKING
324 FUNCDEF("reset");
325 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
326#endif
327 // iterate over the list whacking the content items in the buckets.
328 for (int i = 0; i < _table->elements(); i++) {
329 bucket<key_type, contents> *buck = _table->acquire(i);
330 if (!buck) continue;
331 for (int j = 0; j < buck->length(); j++) {
332 basis::WHACK((*buck)[j]._data); // eliminate the stored data.
333 basis::WHACK((*buck)[j]._id); // eliminate the stored key.
334 buck->zap(j, j); // remove the element.
335 j--; // skip back before whack happened.
336 }
337 basis::WHACK(buck);
338 }
339#ifdef EXTREME_CHECKING
340 if (!verify())
341 deadly_error(class_name(), func, "state did not verify afterwards.");
342#endif
343}
344
345template <class key_type, class contents>
347{
348 for (int i = 0; i < _table->elements(); i++) {
349 const bucket<key_type, contents> *buck = _table->borrow(i);
350 if (!buck) continue; // that's acceptable.
351 for (int j = 0; j < buck->length(); j++) {
352 const hash_wrapper<key_type, contents> &wrap = (*buck)[j];
353 if (!wrap._data) {
354// program_wide_logger::get().log(a_sprintf("hash table: no data segment at position %d.", j));
355 return false;
356 }
357 if (!wrap._id) {
358// program_wide_logger::get().log(a_sprintf("hash table: no identifier at position %d.", j));
359 return false;
360 }
361 }
362 }
363 return true;
364}
365
366template <class key_type, class contents>
369{ return *_table; }
370
371template <class key_type, class contents>
372void hash_table<key_type, contents>::rehash(int estimated_elements)
373{
374#ifdef EXTREME_CHECKING
375 FUNCDEF("rehash");
376 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
377#endif
378 typedef bucket<key_type, contents> buckie;
379 hash_table<key_type, contents> new_hash(*_hasher, estimated_elements);
380 // this is the new table that will be used.
381
382 // scoot through the existing table so we can move items into the new one.
383 for (int i = 0; i < _table->elements(); i++) {
384 buckie *b = _table->borrow(i); // look at the current element.
385 if (!b) continue; // nothing here, keep going.
386 for (int j = b->length() - 1; j >= 0; j--) {
387 key_type *k = b->use(j)._id;
388 contents *c = b->use(j)._data;
389 new_hash.add(k, c);
390 }
391 b->reset();
392 // clean out any items in the buckets here now that they've moved.
393 }
394
395 // now flip the contents of the new guy into "this".
396 _table->reset(); // toss previous stuff.
397 _table->adjust(new_hash._table->elements());
398 for (int q = 0; q < new_hash._table->elements(); q++)
399 _table->put(q, new_hash._table->acquire(q));
400 // reset other data members.
401 c_estim_elements = new_hash.c_estim_elements;
402 _last_iter = 0;
403#ifdef EXTREME_CHECKING
404 if (!verify())
405 deadly_error(class_name(), func, "state did not verify afterwards.");
406#endif
407}
408
409template <class key_type, class contents>
411 contents *to_store)
412{ return add(new key_type(key), to_store); }
413
414template <class key_type, class contents>
416 contents *to_store, bool check_dupes)
417{
418#ifdef EXTREME_CHECKING
419 FUNCDEF("add");
420 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
421#endif
422 // get a hash value.
423 basis::un_int hashed = _hasher->hash((const void *)key, sizeof(key_type));
424 // make the value appropriate for our table.
425 hashed = hashed % _table->elements();
426 // see if the key already exists there.
427 bucket<key_type, contents> *buck = _table->borrow(hashed);
428 if (!buck) {
429 // this slot doesn't exist yet.
431 _table->put(hashed, buck);
432 }
433 if (!check_dupes) {
434 // we aren't even going to check for its existence.
435 *buck += hash_wrapper<key_type, contents>(key, to_store);
436 return IS_NEW;
437 }
438 int indy = buck->find(*key);
439 if (basis::negative(indy)) {
440 // that value was not seen yet, so we're adding a new entry.
441 *buck += hash_wrapper<key_type, contents>(key, to_store);
442#ifdef EXTREME_CHECKING
443 if (!verify())
444 deadly_error(class_name(), func, "state did not verify afterwards.");
445#endif
446 return IS_NEW;
447 }
448 // that key already existed, so we'll re-use its slot with the new data.
449 basis::WHACK((*buck)[indy]._data);
450 basis::WHACK(key); // toss since we're not using.
451 (*buck)[indy]._data = to_store;
452#ifdef EXTREME_CHECKING
453 if (!verify())
454 deadly_error(class_name(), func, "state did not verify afterwards.");
455#endif
456 return EXISTING;
457}
458
459template <class key_type, class contents>
461 (const key_type &key, contents *to_store)
462{ return add(new key_type(key), to_store, false); }
463
464template <class key_type, class contents>
466 contents * &item_found) const
467{
468#ifdef EXTREME_CHECKING
469 FUNCDEF("find");
470 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
471#endif
472 item_found = NULL_POINTER;
473 // get a hash value.
474 basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
475 // make the value appropriate for our table.
476 hashed = hashed % _table->elements();
477 // see if the key exists in the bucket.
478 bucket<key_type, contents> *buck = _table->borrow(hashed);
479 if (!buck) return false;
480 int indy = buck->find(key);
481 if (basis::negative(indy)) return false; // not there.
482 // was there, so retrieve the data.
483 item_found = (*buck)[indy]._data;
484 return true;
485}
486
487template <class key_type, class contents>
488contents *hash_table<key_type, contents>::acquire(const key_type &key)
489{
490#ifdef EXTREME_CHECKING
491 FUNCDEF("acquire");
492 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
493#endif
494 // get a hash value.
495 basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
496 // make the value appropriate for our table.
497 hashed = hashed % _table->elements();
498 // see if the key exists in the bucket.
499 bucket<key_type, contents> *buck = _table->borrow(hashed);
500 if (!buck) return NULL_POINTER;
501 int indy = buck->find(key);
502 if (basis::negative(indy)) return NULL_POINTER; // nope, not there.
503 contents *to_return = (*buck)[indy]._data;
504 basis::WHACK((*buck)[indy]._id); // clean the id.
505 buck->zap(indy, indy); // toss the storage blob for the item.
506#ifdef EXTREME_CHECKING
507 if (!verify())
508 deadly_error(class_name(), func, "state did not verify afterwards.");
509#endif
510 return to_return;
511}
512
513template <class key_type, class contents>
514bool hash_table<key_type, contents>::zap(const key_type &key)
515{
516#ifdef EXTREME_CHECKING
517 FUNCDEF("zap");
518 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
519#endif
520 // get a hash value.
521 basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
522 // make the value appropriate for our table.
523 hashed = hashed % _table->elements();
524 // see if the key exists in the bucket.
525 bucket<key_type, contents> *buck = _table->borrow(hashed);
526 if (!buck) return false;
527 int indy = buck->find(key);
528 if (basis::negative(indy)) {
529 // nope, not there.
530 return false;
531 }
532 basis::WHACK((*buck)[indy]._data); // delete the data held.
533 basis::WHACK((*buck)[indy]._id); // delete the data held.
534 buck->zap(indy, indy); // toss the storage blob for the item.
535 if (!buck->length()) {
536 // clean up this bucket now.
537 buck = _table->acquire(hashed);
538 basis::WHACK(buck);
539 }
540#ifdef EXTREME_CHECKING
541 if (!verify())
542 deadly_error(class_name(), func, "state did not verify afterwards.");
543#endif
544 return true;
545}
546
547template <class key_type, class contents>
548void hash_table<key_type, contents>::apply(apply_function *to_apply,
549 void *data_link)
550{
551#ifdef EXTREME_CHECKING
552 FUNCDEF("apply");
553 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
554#endif
555 typedef bucket<key_type, contents> buckie;
556 int bucks_seen = 0;
557 int posn = _last_iter; // start at the same place we left.
558 while (bucks_seen++ < _table->elements()) {
559 if ( (posn < 0) || (posn >= _table->elements()) )
560 posn = 0;
561 buckie *b = _table->borrow(posn);
562 _last_iter = posn++;
563 // record where the iteration last touched and increment next position.
564 // we must do this before we check if the bucket exists or we'll rotate
565 // on this same place forever.
566 if (!b) continue; // nothing here, keep going.
567 for (int j = 0; j < b->length(); j++) {
568 contents *c = b->use(j)._data;
569 if (!c) {
570#ifdef EXTREME_CHECKING
571 deadly_error("hash_table", "apply", "logic error--missing pointer");
572#endif
573 continue;
574 }
575 if (!to_apply(*b->use(j)._id, *c, data_link)) return;
576 }
577 }
578}
579
580template <class key_type, class contents>
582{
583#ifdef EXTREME_CHECKING
584 FUNCDEF("elements");
585 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
586#endif
587 typedef bucket<key_type, contents> buckie;
588 int to_return = 0;
589 for (int i = 0; i < _table->elements(); i++) {
590 bucket<key_type, contents> *buck = _table->borrow(i);
591 if (!buck) continue; // nothing to count.
592 to_return += buck->length();
593 }
594 return to_return;
595}
596
597#undef static_class_name
598
599} //namespace.
600
601#endif // outer guard.
602
Represents a sequential, ordered, contiguous collection of objects.
Definition array.h:54
@ EXPONE
synonym for EXPONENTIAL_GROWTH.
Definition array.h:61
@ SIMPLE_COPY
the contents can be memcpy'd and are not deep.
Definition array.h:59
@ FLUSH_INVISIBLE
blanks out allocated but inaccessible elements.
Definition array.h:62
outcome put(int index, const contents &to_put)
Stores an object at the index "index" in the array.
Definition array.h:834
const hash_wrapper< key_type, contents > & get(int index) const
Accesses individual objects stored in "this" at the "index" position.
Definition array.h:372
int length() const
Returns the current reported length of the allocated C array.
Definition array.h:115
outcome zap(int start, int end)
Deletes from "this" the objects inclusively between "start" and "end".
Definition array.h:769
contents & use(int index)
A non-constant version of get(); the returned object can be modified.
Definition array.h:365
array(int number=0, const hash_wrapper< key_type, contents > *init=NULL_POINTER, int flags=EXPONENTIAL_GROWTH|FLUSH_INVISIBLE)
Constructs an array with room for "number" objects.
Definition array.h:315
A clonable object knows how to make copy of itself.
Definition contracts.h:109
Root object for any class that knows its own name.
Definition contracts.h:123
Outcomes describe the state of completion for an operation.
Definition outcome.h:31
int find(const key_type &to_find)
Definition hash_table.h:239
Implements hashing into buckets for quick object access.
Definition hash_table.h:75
bool apply_function(const key_type &key, contents &current, void *data_link)
the "apply_function" is what a user of the "apply" method must supply.
Definition hash_table.h:147
int elements() const
the number of valid items we found by traversing the hash table.
Definition hash_table.h:581
basis::outcome add(key_type *key, contents *to_store, bool check_dupes=true)
specialized add for a pre-existing pointer "key".
Definition hash_table.h:415
basis::outcome add(const key_type &key, contents *to_store)
Stores "to_store" into the table given its "key" for hashing.
Definition hash_table.h:410
internal_hash_array< key_type, contents > & table_access() const
special accessor for the copy_hash_table method only.
Definition hash_table.h:368
bool verify() const
returns true if the hash table is internally consistent.
Definition hash_table.h:346
DEFINE_CLASS_NAME("hash_table")
static int calculate_num_slots(int estimated_elements)
a helper method that lets us know what n is for how many 2^n slots we should have.
Definition hash_table.h:285
void apply(apply_function *to_apply, void *data_link)
Invokes the function "to_apply" on every entry in the table.
Definition hash_table.h:548
void rehash(int estimated_elements)
resizes the hashing structures to optimise for a new size of "estimated_elements".
Definition hash_table.h:372
contents * acquire(const key_type &key)
retrieves the contents held for "key" out of the table.
Definition hash_table.h:488
void reset()
removes all entries in the table and returns it to a pristine state.
Definition hash_table.h:321
contents * find(const key_type &key) const
simplified form of above find() method.
Definition hash_table.h:130
bool find(const key_type &key, contents *&item_found) const
locates the item specified by the "key", if possible.
Definition hash_table.h:465
basis::outcome fast_dangerous_add(const key_type &key, contents *to_store)
Like the add method above, but doesn't check for duplicates.
Definition hash_table.h:461
hash_table(const hashing_algorithm &hasher, int estimated_elements)
Creates a table using the "hasher" that is ready to store "estimated_elements".
Definition hash_table.h:265
virtual ~hash_table()
destroys any objects left in the hash_table.
Definition hash_table.h:273
int estimated_elements() const
returns the size of table we're optimized for.
Definition hash_table.h:106
bool zap(const key_type &key)
removes the entry with the "key" specified.
Definition hash_table.h:514
hash_wrapper(key_type *id=NULL_POINTER, contents *data=NULL_POINTER)
Definition hash_table.h:223
A hashing algorithm takes a key and derives a related integer from it.
Definition hash_table.h:46
virtual basis::un_int hash(const void *key_data, int key_length) const =0
returns a hash value based on the "key_data" and "key_length".
virtual hashing_algorithm * clone() const =0
supports virtual construction of new algorithm objects.
internal_hash_array(int estimated_elements)
Definition hash_table.h:257
#define deadly_error(c, f, i)
#define NULL_POINTER
The value representing a pointer to nothing.
Definition definitions.h:32
#define FUNCDEF(func_in)
FUNCDEF sets the name of a function (and plugs it into the callstack).
Definition enhance_cpp.h:54
The guards collection helps in testing preconditions and reporting errors.
Definition array.h:30
void WHACK(contents *&ptr)
deletion with clearing of the pointer.
Definition functions.h:121
unsigned int un_int
Abbreviated name for unsigned integers.
Definition definitions.h:62
bool negative(const type &a)
negative returns true if "a" is less than zero.
Definition functions.h:43
A dynamic container class that holds any kind of object via pointers.
Definition amorph.h:55
void copy_hash_table(hash_table< key_type, contents > &target, const hash_table< key_type, contents > &source)
Copies the entire hash table, given a contents type that supports copying.
Definition hash_table.h:297
const int HASH_FUDGE_FACTOR
Definition hash_table.h:30