summaryrefslogtreecommitdiff
path: root/utils/HashTable.cc
diff options
context:
space:
mode:
Diffstat (limited to 'utils/HashTable.cc')
-rw-r--r--utils/HashTable.cc635
1 files changed, 635 insertions, 0 deletions
diff --git a/utils/HashTable.cc b/utils/HashTable.cc
new file mode 100644
index 0000000..03bfd9a
--- /dev/null
+++ b/utils/HashTable.cc
@@ -0,0 +1,635 @@
+/*
+ * CENA C++ Utilities
+ *
+ * by Stephane Chatty
+ *
+ * Copyright 1991, 1992
+ * Laboratoire de Recherche en Informatique (LRI)
+ * Centre d'Etudes de la Navigation Aerienne (CENA)
+ *
+ * plain and generic hash tables and dictionnaries
+ * (Original C version by Michel Beaudouin-Lafon)
+ *
+ * $Id$
+ * $CurLog$
+ */
+
+#include <string.h>
+#include "String.h"
+#include "HashTable.h"
+#include "Allocator.h"
+
+/*?class CcuHashTable
+The class \typ{CcuHashTable} implements association tables based on a hashing algorithm.
+Hash tables are useful to store information that needs to be retrieved
+quickly, and which bears no efficient ordering. For instance, this is the
+case for strings:
+in a compiler, one may wish to maintain a hash table with identifiers
+as hash keys and symbol table entries as information.
+
+A hash table is made of a set of hash entries (of class \typ{CcuHashCell}),
+which can be modified during
+the life of the table. An entry has two main fields: a hash key, and a user defined
+information attached to that key. We will not give details about the structure of
+a hash table, as they can be found in any book about algorithms. Figure~\ref{fig:hash} is only
+here as a reminder.
+\fig{hash}{Internals of a hash table}
+The current implementation uses lists to store the synonyms so that entries can
+be deleted efficiently.
+
+The class \typ{CcuHashTable} makes no assumption about the type of the keys, which it handles as
+\typ{const void *}.
+
+The user can define several functions used when managing a hash table: the hash function, which
+is determinant for the performances of the hash table, and the functions to copy, compare, and
+destroy keys. Basic functions are provided by default.
+?*/
+
+
+
+CcuAllocator* CcuHashCell::HashCellMem = 0;
+
+/*?nodoc?*/
+void
+CcuHashCell :: ClassInit ()
+{
+ if (!HashCellMem)
+ HashCellMem = new CcuAllocator (sizeof (CcuHashCell));
+}
+
+/*?nodoc?*/
+void*
+CcuHashCell :: operator new (unsigned int)
+{
+ return HashCellMem->Alloc ();
+}
+
+/*?nodoc?*/
+void
+CcuHashCell :: operator delete (void* that)
+{
+ HashCellMem->Free (that);
+}
+
+
+
+/*?
+Constructor for \typ{CcuHashTable}s. Build a hash table based on an array of
+size \var{size}.
+You can optionally provide your own functions for copying, destroying
+and comparing keys, and the hashing function. If no hashing function is
+provided, the identity function is used.
+?*/
+CcuHashTable :: CcuHashTable (unsigned int size, HASH_F hash_fun, HCP_F cp_fun, HDEL_F del_fun, HCMP_F cmp_fun)
+: Size (size),
+ Hash (hash_fun),
+ Copy (cp_fun),
+ Delete (del_fun),
+ Compare (cmp_fun)
+{
+ if (Size <= 0)
+ Size = 1;
+ Table = new CcuHashCell* [Size];
+ memset (Table, 0, Size*sizeof (CcuHashCell*));
+
+ /* I know that HashCells can only be created from HashTables */
+ if (!CcuHashCell::HashCellMem)
+ CcuHashCell::ClassInit ();
+}
+
+/*?
+Copy constructor for hash tables. The table and the cells are copied. The keys are
+copied only if \var{ht} had a copy function. The new hash table will contain the
+same informations as \var{ht}, but the synonyms will not necessarily be in the same order.
+?*/
+CcuHashTable :: CcuHashTable (const CcuHashTable& ht)
+: Size (ht.Size),
+ Hash (ht.Hash),
+ Copy (ht.Copy),
+ Delete (ht.Delete),
+ Compare (ht.Compare)
+{
+ Table = new CcuHashCell* [Size];
+ register CcuHashCell** p = Table;
+ register CcuHashCell** q = ht.Table;
+ while (p < Table + Size) {
+ *p = 0;
+ register CcuHashCell* r = *q;
+ while (r) {
+ (*p) = new CcuHashCell (Copy ? (*Copy) (r->GetKey ()) : r->GetKey (), *p);
+ (*p)->SetInfo (r->Info);
+ r = r->Next;
+ }
+ ++p;
+ ++q;
+ }
+}
+
+/*?
+Destructor for hash tables. All keys are destroyed if a destruction function was provided.
+?*/
+CcuHashTable :: ~CcuHashTable ()
+{
+ Clear ();
+
+#ifdef CPLUS_BUG4
+ delete [Size] Table;
+#else
+ delete [] Table;
+#endif
+}
+
+
+/*?
+Remove all data from the hash table. All keys are destroyed if a destruction function was provided.
+?*/
+void
+CcuHashTable :: Clear ()
+{
+ /* destroy keys */
+ if (Delete) {
+ CcuHashCellIter hi (*this);
+ while (++hi)
+ (*Delete) ((*hi)->Key);
+ }
+
+ /* destroy cells */
+ int s = Size;
+ CcuHashCell** entry = Table;
+
+ /* for each slot of the array ... */
+ for ( ; s--; ++entry ) {
+ if (! *entry)
+ continue;
+
+ /* ... destroy all cells linked from that slot ...*/
+ for (CcuHashCell* h = *entry; h;) {
+ CcuHashCell* del = h;
+ h = h->Next;
+ delete del;
+ }
+
+ /* ... and clear it */
+ *entry = 0;
+ }
+}
+
+
+/*?hidden?*/
+CcuHashCell**
+CcuHashTable :: HashKey (const void* key) const
+{
+ register unsigned int h = (unsigned int) key;
+
+ if (Hash)
+ h = (*Hash) (key, Size);
+
+ if (h >= Size)
+ h %= Size;
+
+ return (Table + h);
+}
+
+/*?hidden?*/
+CcuHashCell*
+CcuHashTable :: FindCell (CcuHashCell** entry, const void* key) const
+{
+ CcuHashCell* h;
+ for (h = *entry; h; h = h -> Next)
+ if (Compare ? (*Compare) (key, h->Key) : (key == h->Key))
+ break;
+ return h;
+}
+
+
+/*?hidden?*/
+CcuHashCell*
+CcuHashTable :: AddCell (CcuHashCell** entry, const void* key) const
+{
+ if (Copy)
+ key = (* Copy) (key);
+
+ CcuHashCell* h = new CcuHashCell (key, *entry);
+ *entry = h;
+ return h;
+}
+
+/*?class CcuHashCell
+\typ{CcuHashCell}s hold the entries of an hash table.
+A \typ{CcuHashCell} contains a key and a user defined information.
+?*/
+
+
+#ifdef DOC
+
+/*?hidden?*/
+CcuHashCellOf :: CcuHashCell (const void*, CcuHashCell*)
+{
+}
+
+/*?nextdoc?*/
+const void*
+CcuHashCell :: GetKey () const
+{
+}
+
+/*?
+Retrieve the key, or the information from a \typ{CcuHashCell}.
+?*/
+CcuHashItem*
+CcuHashCell :: GetInfo () const
+{
+}
+
+/*?
+Set the cell information to \var{p}.
+?*/
+void
+CcuHashCell :: SetInfo (CcuHashItem* p)
+{
+}
+
+#endif
+
+/*?
+Add the key \var{key} into the table. If
+\var{found} is not 0, it is a return parameter which contains TRUE if the
+key was already in the table, or FALSE if it was new. In both cases, the
+function returns the address of the hash entry corresponding to the key.
+\fun{Add} uses the hash function of the table.
+If the key is new and a copy function was provided, the key is copied.
+?*/
+CcuHashCell*
+CcuHashTable :: Add (const void* key, int* found)
+{
+ /* est-il dans la table ? */
+ CcuHashCell** entry = HashKey (key);
+ CcuHashCell* h = FindCell (entry, key);
+
+ if (found)
+ *found = (h != 0);
+
+ if (h)
+ return h;
+
+ return AddCell (entry, key);
+}
+
+/*?
+Retrieve the key \var{key} from the table. If it exists, return
+the address of its hash entry, thus allowing to get its associated info. If the key
+does not exist in the table, return 0.
+?*/
+CcuHashCell*
+CcuHashTable :: Get (const void* key)
+{
+ CcuHashCell **entry = HashKey (key);
+ return FindCell (entry, key);
+}
+
+#if 0
+/*?
+Get a reference to the data stored in the table for the key \var{key}. That
+reference can be used for reading as for writing. If the key was not present
+in the table, a new entry is created.
+?*/
+CcuHashItem*&
+CcuHashTable :: operator [] (const void* key)
+{
+ CcuHashCell **entry = HashKey (key);
+ CcuHashCell *h = FindCell (entry, key);
+
+ if (!h)
+ h = AddCell (entry, key);
+
+ return h->Info;
+}
+#else
+CcuHashCellRef
+CcuHashTable :: operator [] (const void* key) const
+{
+ return CcuHashCellRef (*this, key);
+}
+#endif
+
+
+/*?
+Remove the key \var{key} from the table. If it exists, return its
+associated data. If the key does not exist, return 0. \var{found} may
+be used to test whether the key was found.
+?*/
+CcuHashItem*
+CcuHashTable :: Remove (const void* key, int* found)
+{
+ CcuHashCell **entry;
+ register CcuHashCell *h, *hp = 0;
+ CcuHashItem* info = 0;
+
+ entry = HashKey (key);
+
+ for (h = *entry; h; h = h->Next) {
+ if (Compare ? (*Compare) (key, h->Key) : (key == h->Key)) {
+ /* remove it from the list */
+ if (hp)
+ hp->Next = h->Next;
+ else
+ *entry = h->Next;
+
+ /* free it, sparing its info */
+ if (Delete)
+ (*Delete) (h->Key);
+ info = h->Info;
+ delete h;
+ break;
+ }
+ hp = h;
+ }
+ if (found)
+ *found = (h != 0);
+ return info;
+}
+
+
+/*?
+Change the size of the table, which is re-hashed.
+?*/
+void
+CcuHashTable :: Resize (int size)
+{
+ int s = Size;
+ if (size <= 0)
+ size = 1;
+ Size = size;
+
+ CcuHashCell** entry = Table;
+ CcuHashCell** old_table = Table;
+ Table = new CcuHashCell* [size];
+ memset (Table, 0, size*sizeof (CcuHashCell*));
+
+ /* rehash */
+ for (; s--; ++entry) {
+
+ /* scan the list */
+ register CcuHashCell *cur, *next;
+ for (cur = *entry; cur; cur = next) {
+ next = cur->Next;
+
+ register CcuHashCell** slot = HashKey (cur->Key);
+
+ /* prepend the cell to the corresponding list */
+ cur->Next = *slot;
+ *slot = cur;
+ }
+ }
+#ifdef CPLUS_BUG4
+ delete [Size] old_table;
+#else
+ delete [] old_table;
+#endif
+}
+
+
+#ifdef TUNE
+/*?nextdoc?*/
+void
+CcuHashTable :: HashStats (int fd)
+{
+ CcuHashCell * h;
+ CcuHashCell **entry;
+ char msg[80];
+ int s, coll = 0, empty = 0;
+
+ for (s = Size, entry = Table; s--; entry++) {
+ if (h = *entry) {
+ sprintf (msg, "%s(%d)", h->Key, h->Info);
+ write (fd, msg, strlen (msg));
+ while (h = h->Next) {
+ sprintf (msg, "\t%s(%d)", h->Key, h->Info);
+ write (fd, msg, strlen (msg));
+ coll++;
+ }
+ write (fd, "\n", 1);
+ } else {
+ write (fd, "<EMPTY>\n", 8);
+ empty++;
+ }
+ }
+ sprintf (msg, "size : %d; %d empty; %d collisions\n", Size, empty, coll);
+ write (fd, msg, strlen (msg));
+}
+
+/*?
+Print some statistics about this hash table on file descriptor \var{fd}
+(default is standard error).
+?*/
+void
+CcuHashTable :: CollStats (int fd)
+{
+ CcuHashCell * h;
+ CcuHashCell **entry;
+ char msg[80];
+ int s, lcoll, coll = 0, empty = 0;
+ int min = 999999, max = 0, moy = 0, moy2 = 0;
+ float fmoy, fvar;
+
+ for (s = Size, entry = (CcuHashCell**) Data; s--; entry++) {
+ lcoll = 0;
+ if (h = *entry) {
+ while (h = h->Next) {
+ coll++, lcoll++;
+ }
+/* sprintf (msg, "%d colls\n", lcoll);
+ write (fd, msg, strlen (msg));
+*/ moy += lcoll;
+ moy2 += lcoll * lcoll;
+ if (lcoll < min)
+ min = lcoll;
+ if (lcoll > max)
+ max = lcoll;
+ } else {
+/* write (fd, "<EMPTY>\n", 8);
+*/ empty++;
+ }
+ }
+ fmoy = ((float) moy) / ((float) Size);
+ fvar = fmoy * fmoy - (((float) moy2) / ((float) Size));
+ sprintf (msg, "size : %d; %d empty; %d collisions\n", Size, empty, coll);
+ write (fd, msg, strlen (msg));
+ sprintf (msg, "min : %d; max %d; moy %f; variance %f\n----\n\n", min, max, fmoy, fvar);
+ write (fd, msg, strlen (msg));
+}
+
+#endif /* TUNE */
+
+CcuHashItem*
+CcuHashCellRef :: operator = (CcuHashItem* info)
+{
+ CcuHashCell **entry = Table.HashKey (Key);
+ CcuHashCell *h = Table.FindCell (entry, Key);
+
+ if (!h)
+ h = Table.AddCell (entry, Key);
+
+ h->SetInfo (info);
+
+ return info;
+}
+
+CcuHashCellRef :: operator CcuHashItem* ()
+{
+ CcuHashCell **entry = Table.HashKey (Key);
+ CcuHashCell *h = Table.FindCell (entry, Key);
+ return h ? h->GetInfo () : 0;
+}
+
+
+/*?class CcuDictionnary
+For the common case when character strings are used as keys,
+the class \typ{CcuDictionnary} was derived from the class \typ{CcuHashTable}.
+It is the class that should be used for dictionaries, symbol tables,~\ldots\,
+For this special kind of hash tables, keys are compared as strings, and are copied with the
+function \fun{NewString} (see page~\pageref{newstring}). If users do not provide a hash function,
+a default one is used.
+
+All the functions of class \typ{CcuHashTable} are available in class \typ{CcuDictionnary}.
+The iterator \typ{Hashiter} can be used on \typ{CcuDictionnary} as well.
+?*/
+
+
+/*?hidden?*/
+int
+CcuDictionnary :: HashKey (const void* key, int)
+{
+ register int h = 0;
+ register const char *p = (const char *) key;
+ while (*p)
+ h ^= *p++;
+ return h;
+}
+
+/*?hidden?*/
+int
+CcuDictionnary :: CompareKey (const void* a, const void* b)
+{
+ register const char *p, *q;
+ /* on fait le strcmp a la main : ca va + vite */
+ for (p = (const char *) a, q = (const char *) b; *p == *q++; p++)
+ if (*p == '\0')
+ return 1;
+ return 0;
+}
+
+/*?hidden?*/
+void*
+CcuDictionnary :: CopyKey (const void* k)
+{
+ return NewString ((const char*) k);
+}
+
+/*?hidden?*/
+void
+CcuDictionnary :: DeleteKey (void* k)
+{
+ FreeString ((char*) k);
+}
+
+/*?
+Constructor for \typ{CcuDictionnary}s. Initialize a \typ{CcuDictionnary} with an array of size \var{size},
+and \var{hash\_fun} as the hash function. If \var{hash\_fun} is not provided, the default
+function is used.
+?*/
+CcuDictionnary :: CcuDictionnary (int size, HASH_F f)
+: CcuHashTable (size, (f ? f : HashKey), (HCP_F) (CopyKey), (HDEL_F) (DeleteKey), (HCMP_F) (CompareKey))
+{
+}
+
+/*?nodoc?*/
+CcuDictionnary :: ~CcuDictionnary ()
+{
+}
+
+/*?nodoc?*/
+void
+CcuDictionnary :: KeyCopy (int flag)
+{
+ if (flag) {
+ Copy = (HCP_F) (&CopyKey);
+ Delete = (HDEL_F) &DeleteKey;
+ } else {
+ Copy = 0;
+ Delete = 0;
+ }
+}
+
+/*?class Hashiter
+The class \typ{Hashiter} makes it possible to iterate through the entries of an hash table.
+?*/
+
+#ifdef DOC
+/*?
+Constructor for \typ{CcuHashCellIter}s. Initialize an iterator on the hash table \var{t}.
+?*/
+CcuHashCellIterOf :: CcuHashCellIter (const CcuHashTable& t)
+{
+}
+
+/*?
+Get the current cell pointed by an interator.
+?*/
+CcuHashCell*
+CcuHashCellIter :: operator * () const
+{
+}
+#endif
+
+/*?
+Take one step of iteration. This operator returns a pointer on the next
+hash entry of the table it is iterating on. When it is finished, it returns 0.
+?*/
+CcuHashCellIter&
+CcuHashCellIter :: operator ++ ()
+{
+ register CcuHashCell* h = 0;
+ register int size = TheTable.GetSize ();
+
+
+ if (!CurCell || !(h = CurCell->Next))
+ while ((++CurIndex < size) && !(h = (TheTable.Table [CurIndex])))
+ ;
+
+ CurCell = h;
+ return *this;
+}
+
+#ifndef CPLUS_BUG16
+/*?
+Post-increment equivalent of the previous operator.
+?*/
+CcuHashCellIter
+CcuHashCellIter :: operator ++ (int)
+{
+ CcuHashCellIter result = *this;
+ ++(*this);
+ return result;
+}
+#endif
+
+#ifndef CPLUS_BUG13
+CcuHashCellIter::
+#endif
+hashiter_status
+CcuHashCellIter :: Status () const
+{
+ if (CurCell)
+ return Normal;
+ if (CurIndex < 0)
+ return StartOfTable;
+ return EndOfTable;
+}
+
+#ifdef DOC
+CcuHashCellIter :: operator int () const
+{
+}
+#endif
+