From 3a4838bed13b767132cbdf06364b2658da6cc356 Mon Sep 17 00:00:00 2001 From: chatty Date: Tue, 15 Dec 1992 10:55:33 +0000 Subject: Initial revision --- utils/HashTable.cc | 635 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 635 insertions(+) create mode 100644 utils/HashTable.cc (limited to 'utils/HashTable.cc') diff --git a/utils/HashTable.cc b/utils/HashTable.cc new file mode 100644 index 0000000..03bfd9a --- /dev/null +++ b/utils/HashTable.cc @@ -0,0 +1,635 @@ +/* + * CENA C++ Utilities + * + * by Stephane Chatty + * + * Copyright 1991, 1992 + * Laboratoire de Recherche en Informatique (LRI) + * Centre d'Etudes de la Navigation Aerienne (CENA) + * + * plain and generic hash tables and dictionnaries + * (Original C version by Michel Beaudouin-Lafon) + * + * $Id$ + * $CurLog$ + */ + +#include +#include "String.h" +#include "HashTable.h" +#include "Allocator.h" + +/*?class CcuHashTable +The class \typ{CcuHashTable} implements association tables based on a hashing algorithm. +Hash tables are useful to store information that needs to be retrieved +quickly, and which bears no efficient ordering. For instance, this is the +case for strings: +in a compiler, one may wish to maintain a hash table with identifiers +as hash keys and symbol table entries as information. + +A hash table is made of a set of hash entries (of class \typ{CcuHashCell}), +which can be modified during +the life of the table. An entry has two main fields: a hash key, and a user defined +information attached to that key. We will not give details about the structure of +a hash table, as they can be found in any book about algorithms. Figure~\ref{fig:hash} is only +here as a reminder. +\fig{hash}{Internals of a hash table} +The current implementation uses lists to store the synonyms so that entries can +be deleted efficiently. + +The class \typ{CcuHashTable} makes no assumption about the type of the keys, which it handles as +\typ{const void *}. + +The user can define several functions used when managing a hash table: the hash function, which +is determinant for the performances of the hash table, and the functions to copy, compare, and +destroy keys. Basic functions are provided by default. +?*/ + + + +CcuAllocator* CcuHashCell::HashCellMem = 0; + +/*?nodoc?*/ +void +CcuHashCell :: ClassInit () +{ + if (!HashCellMem) + HashCellMem = new CcuAllocator (sizeof (CcuHashCell)); +} + +/*?nodoc?*/ +void* +CcuHashCell :: operator new (unsigned int) +{ + return HashCellMem->Alloc (); +} + +/*?nodoc?*/ +void +CcuHashCell :: operator delete (void* that) +{ + HashCellMem->Free (that); +} + + + +/*? +Constructor for \typ{CcuHashTable}s. Build a hash table based on an array of +size \var{size}. +You can optionally provide your own functions for copying, destroying +and comparing keys, and the hashing function. If no hashing function is +provided, the identity function is used. +?*/ +CcuHashTable :: CcuHashTable (unsigned int size, HASH_F hash_fun, HCP_F cp_fun, HDEL_F del_fun, HCMP_F cmp_fun) +: Size (size), + Hash (hash_fun), + Copy (cp_fun), + Delete (del_fun), + Compare (cmp_fun) +{ + if (Size <= 0) + Size = 1; + Table = new CcuHashCell* [Size]; + memset (Table, 0, Size*sizeof (CcuHashCell*)); + + /* I know that HashCells can only be created from HashTables */ + if (!CcuHashCell::HashCellMem) + CcuHashCell::ClassInit (); +} + +/*? +Copy constructor for hash tables. The table and the cells are copied. The keys are +copied only if \var{ht} had a copy function. The new hash table will contain the +same informations as \var{ht}, but the synonyms will not necessarily be in the same order. +?*/ +CcuHashTable :: CcuHashTable (const CcuHashTable& ht) +: Size (ht.Size), + Hash (ht.Hash), + Copy (ht.Copy), + Delete (ht.Delete), + Compare (ht.Compare) +{ + Table = new CcuHashCell* [Size]; + register CcuHashCell** p = Table; + register CcuHashCell** q = ht.Table; + while (p < Table + Size) { + *p = 0; + register CcuHashCell* r = *q; + while (r) { + (*p) = new CcuHashCell (Copy ? (*Copy) (r->GetKey ()) : r->GetKey (), *p); + (*p)->SetInfo (r->Info); + r = r->Next; + } + ++p; + ++q; + } +} + +/*? +Destructor for hash tables. All keys are destroyed if a destruction function was provided. +?*/ +CcuHashTable :: ~CcuHashTable () +{ + Clear (); + +#ifdef CPLUS_BUG4 + delete [Size] Table; +#else + delete [] Table; +#endif +} + + +/*? +Remove all data from the hash table. All keys are destroyed if a destruction function was provided. +?*/ +void +CcuHashTable :: Clear () +{ + /* destroy keys */ + if (Delete) { + CcuHashCellIter hi (*this); + while (++hi) + (*Delete) ((*hi)->Key); + } + + /* destroy cells */ + int s = Size; + CcuHashCell** entry = Table; + + /* for each slot of the array ... */ + for ( ; s--; ++entry ) { + if (! *entry) + continue; + + /* ... destroy all cells linked from that slot ...*/ + for (CcuHashCell* h = *entry; h;) { + CcuHashCell* del = h; + h = h->Next; + delete del; + } + + /* ... and clear it */ + *entry = 0; + } +} + + +/*?hidden?*/ +CcuHashCell** +CcuHashTable :: HashKey (const void* key) const +{ + register unsigned int h = (unsigned int) key; + + if (Hash) + h = (*Hash) (key, Size); + + if (h >= Size) + h %= Size; + + return (Table + h); +} + +/*?hidden?*/ +CcuHashCell* +CcuHashTable :: FindCell (CcuHashCell** entry, const void* key) const +{ + CcuHashCell* h; + for (h = *entry; h; h = h -> Next) + if (Compare ? (*Compare) (key, h->Key) : (key == h->Key)) + break; + return h; +} + + +/*?hidden?*/ +CcuHashCell* +CcuHashTable :: AddCell (CcuHashCell** entry, const void* key) const +{ + if (Copy) + key = (* Copy) (key); + + CcuHashCell* h = new CcuHashCell (key, *entry); + *entry = h; + return h; +} + +/*?class CcuHashCell +\typ{CcuHashCell}s hold the entries of an hash table. +A \typ{CcuHashCell} contains a key and a user defined information. +?*/ + + +#ifdef DOC + +/*?hidden?*/ +CcuHashCellOf :: CcuHashCell (const void*, CcuHashCell*) +{ +} + +/*?nextdoc?*/ +const void* +CcuHashCell :: GetKey () const +{ +} + +/*? +Retrieve the key, or the information from a \typ{CcuHashCell}. +?*/ +CcuHashItem* +CcuHashCell :: GetInfo () const +{ +} + +/*? +Set the cell information to \var{p}. +?*/ +void +CcuHashCell :: SetInfo (CcuHashItem* p) +{ +} + +#endif + +/*? +Add the key \var{key} into the table. If +\var{found} is not 0, it is a return parameter which contains TRUE if the +key was already in the table, or FALSE if it was new. In both cases, the +function returns the address of the hash entry corresponding to the key. +\fun{Add} uses the hash function of the table. +If the key is new and a copy function was provided, the key is copied. +?*/ +CcuHashCell* +CcuHashTable :: Add (const void* key, int* found) +{ + /* est-il dans la table ? */ + CcuHashCell** entry = HashKey (key); + CcuHashCell* h = FindCell (entry, key); + + if (found) + *found = (h != 0); + + if (h) + return h; + + return AddCell (entry, key); +} + +/*? +Retrieve the key \var{key} from the table. If it exists, return +the address of its hash entry, thus allowing to get its associated info. If the key +does not exist in the table, return 0. +?*/ +CcuHashCell* +CcuHashTable :: Get (const void* key) +{ + CcuHashCell **entry = HashKey (key); + return FindCell (entry, key); +} + +#if 0 +/*? +Get a reference to the data stored in the table for the key \var{key}. That +reference can be used for reading as for writing. If the key was not present +in the table, a new entry is created. +?*/ +CcuHashItem*& +CcuHashTable :: operator [] (const void* key) +{ + CcuHashCell **entry = HashKey (key); + CcuHashCell *h = FindCell (entry, key); + + if (!h) + h = AddCell (entry, key); + + return h->Info; +} +#else +CcuHashCellRef +CcuHashTable :: operator [] (const void* key) const +{ + return CcuHashCellRef (*this, key); +} +#endif + + +/*? +Remove the key \var{key} from the table. If it exists, return its +associated data. If the key does not exist, return 0. \var{found} may +be used to test whether the key was found. +?*/ +CcuHashItem* +CcuHashTable :: Remove (const void* key, int* found) +{ + CcuHashCell **entry; + register CcuHashCell *h, *hp = 0; + CcuHashItem* info = 0; + + entry = HashKey (key); + + for (h = *entry; h; h = h->Next) { + if (Compare ? (*Compare) (key, h->Key) : (key == h->Key)) { + /* remove it from the list */ + if (hp) + hp->Next = h->Next; + else + *entry = h->Next; + + /* free it, sparing its info */ + if (Delete) + (*Delete) (h->Key); + info = h->Info; + delete h; + break; + } + hp = h; + } + if (found) + *found = (h != 0); + return info; +} + + +/*? +Change the size of the table, which is re-hashed. +?*/ +void +CcuHashTable :: Resize (int size) +{ + int s = Size; + if (size <= 0) + size = 1; + Size = size; + + CcuHashCell** entry = Table; + CcuHashCell** old_table = Table; + Table = new CcuHashCell* [size]; + memset (Table, 0, size*sizeof (CcuHashCell*)); + + /* rehash */ + for (; s--; ++entry) { + + /* scan the list */ + register CcuHashCell *cur, *next; + for (cur = *entry; cur; cur = next) { + next = cur->Next; + + register CcuHashCell** slot = HashKey (cur->Key); + + /* prepend the cell to the corresponding list */ + cur->Next = *slot; + *slot = cur; + } + } +#ifdef CPLUS_BUG4 + delete [Size] old_table; +#else + delete [] old_table; +#endif +} + + +#ifdef TUNE +/*?nextdoc?*/ +void +CcuHashTable :: HashStats (int fd) +{ + CcuHashCell * h; + CcuHashCell **entry; + char msg[80]; + int s, coll = 0, empty = 0; + + for (s = Size, entry = Table; s--; entry++) { + if (h = *entry) { + sprintf (msg, "%s(%d)", h->Key, h->Info); + write (fd, msg, strlen (msg)); + while (h = h->Next) { + sprintf (msg, "\t%s(%d)", h->Key, h->Info); + write (fd, msg, strlen (msg)); + coll++; + } + write (fd, "\n", 1); + } else { + write (fd, "\n", 8); + empty++; + } + } + sprintf (msg, "size : %d; %d empty; %d collisions\n", Size, empty, coll); + write (fd, msg, strlen (msg)); +} + +/*? +Print some statistics about this hash table on file descriptor \var{fd} +(default is standard error). +?*/ +void +CcuHashTable :: CollStats (int fd) +{ + CcuHashCell * h; + CcuHashCell **entry; + char msg[80]; + int s, lcoll, coll = 0, empty = 0; + int min = 999999, max = 0, moy = 0, moy2 = 0; + float fmoy, fvar; + + for (s = Size, entry = (CcuHashCell**) Data; s--; entry++) { + lcoll = 0; + if (h = *entry) { + while (h = h->Next) { + coll++, lcoll++; + } +/* sprintf (msg, "%d colls\n", lcoll); + write (fd, msg, strlen (msg)); +*/ moy += lcoll; + moy2 += lcoll * lcoll; + if (lcoll < min) + min = lcoll; + if (lcoll > max) + max = lcoll; + } else { +/* write (fd, "\n", 8); +*/ empty++; + } + } + fmoy = ((float) moy) / ((float) Size); + fvar = fmoy * fmoy - (((float) moy2) / ((float) Size)); + sprintf (msg, "size : %d; %d empty; %d collisions\n", Size, empty, coll); + write (fd, msg, strlen (msg)); + sprintf (msg, "min : %d; max %d; moy %f; variance %f\n----\n\n", min, max, fmoy, fvar); + write (fd, msg, strlen (msg)); +} + +#endif /* TUNE */ + +CcuHashItem* +CcuHashCellRef :: operator = (CcuHashItem* info) +{ + CcuHashCell **entry = Table.HashKey (Key); + CcuHashCell *h = Table.FindCell (entry, Key); + + if (!h) + h = Table.AddCell (entry, Key); + + h->SetInfo (info); + + return info; +} + +CcuHashCellRef :: operator CcuHashItem* () +{ + CcuHashCell **entry = Table.HashKey (Key); + CcuHashCell *h = Table.FindCell (entry, Key); + return h ? h->GetInfo () : 0; +} + + +/*?class CcuDictionnary +For the common case when character strings are used as keys, +the class \typ{CcuDictionnary} was derived from the class \typ{CcuHashTable}. +It is the class that should be used for dictionaries, symbol tables,~\ldots\, +For this special kind of hash tables, keys are compared as strings, and are copied with the +function \fun{NewString} (see page~\pageref{newstring}). If users do not provide a hash function, +a default one is used. + +All the functions of class \typ{CcuHashTable} are available in class \typ{CcuDictionnary}. +The iterator \typ{Hashiter} can be used on \typ{CcuDictionnary} as well. +?*/ + + +/*?hidden?*/ +int +CcuDictionnary :: HashKey (const void* key, int) +{ + register int h = 0; + register const char *p = (const char *) key; + while (*p) + h ^= *p++; + return h; +} + +/*?hidden?*/ +int +CcuDictionnary :: CompareKey (const void* a, const void* b) +{ + register const char *p, *q; + /* on fait le strcmp a la main : ca va + vite */ + for (p = (const char *) a, q = (const char *) b; *p == *q++; p++) + if (*p == '\0') + return 1; + return 0; +} + +/*?hidden?*/ +void* +CcuDictionnary :: CopyKey (const void* k) +{ + return NewString ((const char*) k); +} + +/*?hidden?*/ +void +CcuDictionnary :: DeleteKey (void* k) +{ + FreeString ((char*) k); +} + +/*? +Constructor for \typ{CcuDictionnary}s. Initialize a \typ{CcuDictionnary} with an array of size \var{size}, +and \var{hash\_fun} as the hash function. If \var{hash\_fun} is not provided, the default +function is used. +?*/ +CcuDictionnary :: CcuDictionnary (int size, HASH_F f) +: CcuHashTable (size, (f ? f : HashKey), (HCP_F) (CopyKey), (HDEL_F) (DeleteKey), (HCMP_F) (CompareKey)) +{ +} + +/*?nodoc?*/ +CcuDictionnary :: ~CcuDictionnary () +{ +} + +/*?nodoc?*/ +void +CcuDictionnary :: KeyCopy (int flag) +{ + if (flag) { + Copy = (HCP_F) (&CopyKey); + Delete = (HDEL_F) &DeleteKey; + } else { + Copy = 0; + Delete = 0; + } +} + +/*?class Hashiter +The class \typ{Hashiter} makes it possible to iterate through the entries of an hash table. +?*/ + +#ifdef DOC +/*? +Constructor for \typ{CcuHashCellIter}s. Initialize an iterator on the hash table \var{t}. +?*/ +CcuHashCellIterOf :: CcuHashCellIter (const CcuHashTable& t) +{ +} + +/*? +Get the current cell pointed by an interator. +?*/ +CcuHashCell* +CcuHashCellIter :: operator * () const +{ +} +#endif + +/*? +Take one step of iteration. This operator returns a pointer on the next +hash entry of the table it is iterating on. When it is finished, it returns 0. +?*/ +CcuHashCellIter& +CcuHashCellIter :: operator ++ () +{ + register CcuHashCell* h = 0; + register int size = TheTable.GetSize (); + + + if (!CurCell || !(h = CurCell->Next)) + while ((++CurIndex < size) && !(h = (TheTable.Table [CurIndex]))) + ; + + CurCell = h; + return *this; +} + +#ifndef CPLUS_BUG16 +/*? +Post-increment equivalent of the previous operator. +?*/ +CcuHashCellIter +CcuHashCellIter :: operator ++ (int) +{ + CcuHashCellIter result = *this; + ++(*this); + return result; +} +#endif + +#ifndef CPLUS_BUG13 +CcuHashCellIter:: +#endif +hashiter_status +CcuHashCellIter :: Status () const +{ + if (CurCell) + return Normal; + if (CurIndex < 0) + return StartOfTable; + return EndOfTable; +} + +#ifdef DOC +CcuHashCellIter :: operator int () const +{ +} +#endif + -- cgit v1.1