From 39c586807a09f9c7cfe2ca59ffcf7aae016b5859 Mon Sep 17 00:00:00 2001 From: fcolin Date: Wed, 27 Jul 2005 14:40:22 +0000 Subject: hash table to store regexp --- src/hash.c | 659 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/hash.h | 29 +++ 2 files changed, 688 insertions(+) create mode 100755 src/hash.c create mode 100755 src/hash.h (limited to 'src') diff --git a/src/hash.c b/src/hash.c new file mode 100755 index 0000000..50a9acd --- /dev/null +++ b/src/hash.c @@ -0,0 +1,659 @@ +/************************************************************************ + hash.c - This module maintains a hash table of key/value pairs. + Keys can be strings of any size, or numbers up to size + unsigned long (HASHKEYTYPE). + Values should be a pointer to some data you wish to store. + See hash_usage() for an example of use. + ************************************************************************/ + +#include +#include +#include +#include +#include +#include + +#include "hash.h" + +// Enums +typedef enum {FindEmpty = 0, FindExisting = 1} enumFind; + +// Structs +struct ROW +{ + HASHKEYTYPE Key; + int Type; + void * Data; +}; + +struct HASHTABLE_T +{ + int KeyIsString; + unsigned long MaxLoad; + unsigned long MaxRows; + unsigned long ItemCount; + struct ROW *Rows; +}; + +// Function prototypes +static void hash_expand(HASHTABLE OldTable); + +// Current module name (used by error processing) +static char *module = "hash"; + +// Error list +static const char *ERR_MUST_CREATE_HASH_TABLE_FIRST = "You must create the hash table before using it!"; +static const char *ERR_NOT_STRING_TABLE = "Not a string hash table!"; + +// Deleted entry indicator +static const int Empty = 0x00000000; +static const int InUse = 0x11111111; +static const int Deleted = 0xffffffff; + +// Expand the table when the usage exceeds this amount (80%) +static double LoadFactor = 0.8; + + +/************************************************************************ + ExitEarly - Quit the program early + ************************************************************************/ +static void ExitEarly(const char *strModule, const char *strFunction, const char *errmsg, ...) +{ + va_list args; + fprintf(stderr, "Error in module %s, function %s: ", strModule, strFunction); + va_start(args, errmsg); + vfprintf(stderr, errmsg, args); + va_end(args); + exit(1); +} + +/************************************************************************ + hash1 - Calculate a hash value for the desired key + ************************************************************************/ +static HASHKEYTYPE hash1(HASHKEYTYPE Key, BOOL KeyIsString) +{ + + if (!KeyIsString) + { + return Key; + } + else + { + unsigned char *s = (unsigned char *)Key; + HASHKEYTYPE h = 0; + while (*s) + h = h * 31UL + *s++; + return h ; + } +} + +/************************************************************************ + hash2 - Calculate a secondary hash value for the desired key + ************************************************************************/ +static HASHKEYTYPE hash2(HASHKEYTYPE Key, BOOL KeyIsString) +{ + return hash1(Key, KeyIsString) >> 3; +} + +/************************************************************************ + NextPrime - Return the next prime number past a certain number. + ************************************************************************/ +static unsigned long NextPrime(unsigned long NumberDesired) +{ + unsigned long i; + unsigned long HalfwayPoint; + int IsDivisible ; + + do + { + NumberDesired++; + IsDivisible = FALSE; + HalfwayPoint = NumberDesired / 2; + for (i = 2; i <= HalfwayPoint; i++) + { + if (NumberDesired % i == 0) + { + IsDivisible = TRUE; + break; + } + } + } while (IsDivisible); + + return NumberDesired; + +}; + +/************************************************************************ + hash_create - Create a new hash table + ************************************************************************/ +HASHTABLE hash_create(unsigned long InitialSize, BOOL KeyIsString) +{ + HASHTABLE table; + + // Allocate space for the hash table. + table = malloc(sizeof(*table)); + + // Minimum size of hash table is 8. + if (InitialSize < 8 ) + InitialSize = 8; + + // Allocate it large enough so that it's not more than 80% full. + InitialSize = (unsigned long)(InitialSize * 1.25); + + // Allocate space for the rows in the hash table + InitialSize = NextPrime(InitialSize); + table->MaxLoad = (unsigned long)(InitialSize * LoadFactor); + table->MaxRows = InitialSize; + table->Rows = malloc(InitialSize * sizeof(struct ROW)); + table->KeyIsString = KeyIsString; + + + return table; + +} + + +/************************************************************************ + hash_destroy - Destroy a hash table. + ************************************************************************/ +HASHTABLE hash_destroy(HASHTABLE table) +{ + char *function = "hash_destroy"; + + // Make sure the table is valid + if (table == NULL) + ExitEarly(module, function, ERR_MUST_CREATE_HASH_TABLE_FIRST); + + // Free the rows in the table + free(table->Rows); + + // Free the table itself + free(table); + + // Return NULL + return NULL; +} + + +/************************************************************************ + FindSlot - Find a slot in the hash table, either empty or existing. + ************************************************************************/ +static int FindSlot(HASHTABLE table, HASHKEYTYPE Key, int FindMethod, HASHKEYTYPE *Slot) +{ + char *function = "FindSlot"; + HASHKEYTYPE hash = 0; + unsigned long i; + HASHKEYTYPE AddlAmt = 0; + + // Hash the key. + hash = (HASHKEYTYPE)(hash1(Key, table->KeyIsString) % table->MaxRows); + + // Perform the lookup a maximum of MaxRows times. + for (i = 0; i < table->MaxRows; i++) + { + // Are we supposed to find an empty slot or look for an existing key? + if (FindMethod == FindExisting) + { + // Look for an existing key + if (table->Rows[hash].Type == Empty) + { + // Couldn't find the key + return FALSE; + } + else if (table->Rows[hash].Type == InUse && + ( (table->KeyIsString && strcmp((char *)table->Rows[hash].Key, (char *)Key) == 0) || + (!table->KeyIsString && table->Rows[hash].Key == Key) ) ) + + { + // Found the key + *Slot = hash; + return TRUE; + } + } + else + { + // Look for an empty slot to insert the new key into + if (table->Rows[hash].Type != InUse) + { + // Found a spot + *Slot = hash; + return TRUE; + } + else if ( (table->KeyIsString && strcmp((char *)table->Rows[hash].Key, (char *)Key) == 0) || + (!table->KeyIsString && table->Rows[hash].Key == Key) ) + { + // Key already exists! + return FALSE; + } + } + + // Rehash the hash and try again. + if (AddlAmt == 0) + AddlAmt = (HASHKEYTYPE)(hash2(Key, table->KeyIsString) % (table->MaxRows >> 3) + 1); + hash = (hash + AddlAmt) % table->MaxRows; + + } + + // Searched the whole table unsuccessfully! :-O + // Should never hit this point, because the table expands when it gets too full. + ExitEarly(module, function, "Findslot failed!"); + return FALSE; +} + + +/************************************************************************ + hash_add - Add an item to a hash table. + ************************************************************************/ +BOOL hash_add(HASHTABLE table, HASHKEYTYPE Key, const void *Data) +{ + char *function = "hash_add"; + HASHKEYTYPE position; + + if (table == NULL) + ExitEarly(module, function, ERR_MUST_CREATE_HASH_TABLE_FIRST); + + // Do not let the table become overloaded + if (table->ItemCount > table->MaxLoad) + hash_expand(table); + + // Search for an empty slot + if (FindSlot(table, Key, FindEmpty, &position)) + { + // Insert the data into the table. + table->Rows[position].Key = Key; + table->Rows[position].Type = InUse; + table->Rows[position].Data = (void *)Data; + table->ItemCount++; + return TRUE; + } + else + { + // The entry already exists! + return FALSE; + } +} + + +/************************************************************************ + hash_addstring - Add an string to a hash table. + ************************************************************************/ +BOOL hash_addstring(HASHTABLE table, char * Key, const void * Data) +{ + char *function = "hash_addstring"; + + if (table == NULL) + ExitEarly(module, function, ERR_MUST_CREATE_HASH_TABLE_FIRST); + if (table->KeyIsString == FALSE) + ExitEarly(module, function, ERR_NOT_STRING_TABLE); + return hash_add(table, (HASHKEYTYPE)Key, Data); +} + +/************************************************************************ + hash_remove - Remove an item from a hash table. + ************************************************************************/ +void *hash_remove(HASHTABLE table, HASHKEYTYPE Key) +{ + char *function = "hash_remove"; + void *Data; + unsigned int position; + + if (table == NULL) + ExitEarly(module, function, ERR_MUST_CREATE_HASH_TABLE_FIRST); + + if (FindSlot(table, Key, FindExisting, &position)) + { + // Found the item, now mark it as deleted. + Data = table->Rows[position].Data; + table->Rows[position].Key = 0; + table->Rows[position].Type = Deleted; + table->Rows[position].Data = 0; + table->ItemCount--; + return Data; + } + else + { + // Couldn't find the item to remove it. + return NULL; + } +} + +/************************************************************************ + hash_lookup - Lookup an item from a hash table. + ************************************************************************/ +void *hash_lookup(HASHTABLE table, HASHKEYTYPE Key) +{ + char *function = "hash_lookup"; + HASHKEYTYPE position; + + if (table == NULL) + ExitEarly(module, function, ERR_MUST_CREATE_HASH_TABLE_FIRST); + + if (FindSlot(table, Key, FindExisting, &position)) + return table->Rows[position].Data; + else + return NULL; +} + +/************************************************************************ + hash_exists - Returns whether or not a key exists in the table. + ************************************************************************/ +BOOL hash_exists(HASHTABLE table, HASHKEYTYPE Key) +{ + char *function = "hash_lookup"; + HASHKEYTYPE position; + + if (table == NULL) + ExitEarly(module, function, ERR_MUST_CREATE_HASH_TABLE_FIRST); + + if (FindSlot(table, Key, FindExisting, &position)) + return TRUE; + else + return FALSE; +} + +/************************************************************************ + hash_count - Return a count of all items in the hash table + ************************************************************************/ +extern size_t hash_count(HASHTABLE table) +{ + char *function = "hash_count"; + if (table == NULL) + ExitEarly(module, function, ERR_MUST_CREATE_HASH_TABLE_FIRST); + + return table->ItemCount; +} + +/************************************************************************ + hash_expand - Expand the hash table to accommodate more entries. + ************************************************************************/ +static void hash_expand(HASHTABLE OldTable) +{ + HASHTABLE NewTable; + unsigned long i; + + // Create a new temporary table + NewTable = hash_create(OldTable->MaxRows * 2, OldTable->KeyIsString); + + // Add the data from the old table into the new table + for (i = 0; i < OldTable->MaxRows; i++) + { + if (OldTable->Rows[i].Type == InUse) + hash_add(NewTable, OldTable->Rows[i].Key, OldTable->Rows[i].Data); + } + + // Free the old table rows + free(OldTable->Rows); + + // Overlay the old table values with the temporary table values. + OldTable->MaxRows = NewTable->MaxRows; + OldTable->MaxLoad = NewTable->MaxLoad; + OldTable->ItemCount = NewTable->ItemCount; + OldTable->Rows = NewTable->Rows; + + // Destroy the temporary table + NewTable->Rows = NULL; + free(NewTable); +} + +/************************************************************************ + hash_search - Search for an item in the hashtable. + ************************************************************************/ +extern void *hash_search(HASHTABLE table, BOOL (*Search)(HASHKEYTYPE key, void *, va_list), ...) +{ + va_list args; + unsigned int i; + void *p = NULL; + + va_start(args, Search); + for (i = 0; i < table->MaxRows; i++) + { + if (table->Rows[i].Type != Deleted && + table->Rows[i].Type != Empty) + { + if (Search(table->Rows[i].Key, table->Rows[i].Data, args) != FALSE) + { + p = table->Rows[i].Data; + break; + } + } + } + va_end(args); + return p; +} + +/************************************************************************ + hash_searchwithvalist - Search for an item in the hashtable. + A va_list has already been passed in, no need to extract one. + ************************************************************************/ +extern void *hash_searchwithvalist(HASHTABLE table, BOOL (*Search)(HASHKEYTYPE key, void *, va_list), va_list args) +{ + unsigned int i; + void *p = NULL; + + for (i = 0; i < table->MaxRows; i++) + { + if (table->Rows[i].Type != Deleted && + table->Rows[i].Type != Empty) + { + if (Search(table->Rows[i].Key, table->Rows[i].Data, args) != FALSE) + { + p = table->Rows[i].Data; + break; + } + } + } + return p; +} + +/************************************************************************ + * hash_usage - Example of how to use this module. + ************************************************************************/ +struct person_t +{ + int ID; + char Name[32]; + int Age; +}; + +extern void hash_usage(void) +{ + void *table; + struct person_t *p; + struct person_t *lookup; + char *StringKey = "Lloyd"; + int NumberKey = 1234567; + + // Create some data to store + p = malloc(sizeof(*p)); + p->ID = NumberKey; + p->Age = 20; + sprintf(p->Name, StringKey); + + // Numeric hash example + + // Create the hash table + table = hash_create(10, FALSE); + + // Add a key/value pair to the table + if (hash_add(table, p->ID, p) == FALSE) + printf("Entry already exists!"); + + // Lookup a key/value pair in the table + if ((lookup = hash_lookup(table, NumberKey)) == FALSE) + printf("Entry not found!"); + else + printf("Person found by ID: %d. Name is: %s\n", lookup->ID, lookup->Name); + + // Remove a key/value pair from the table + if ((lookup = hash_remove(table, NumberKey)) == FALSE) + printf("Entry not found!"); + else + // you COULD free lookup here, but we're not done with it. + ; + + // Free the table. + table = hash_destroy(table); + + + // String hash example + + // Create the hash table + table = hash_create(10, TRUE); + + // Add a key/value pair to the table + if (hash_add(table, (HASHKEYTYPE)p->Name, p) == FALSE) + printf("Entry already exists!"); + + // Lookup a key/value pair in the table + if ((lookup = hash_lookup(table, (HASHKEYTYPE)StringKey)) == FALSE) + printf("Entry not found!"); + else + printf("Person found by name: %s. ID is: %d\n", lookup->Name, lookup->ID); + + // Remove a key/value pair from the table + if ((lookup = hash_remove(table, (HASHKEYTYPE)StringKey)) == FALSE) + printf("Entry not found!"); + else + free(lookup); + + // Free the table. + table = hash_destroy(table); +} + +/************************************************************************ + hash_test - Test the hash table. + ************************************************************************/ +static BOOL Search(HASHKEYTYPE key, void *p, va_list args) +{ + char *s = p; + char *find = va_arg(args, char *); + if (strcmp(s, find) == 0) + return TRUE; + else + return FALSE; +} + +static BOOL Iterate(HASHKEYTYPE key, void *p, va_list args) +{ + // Just iterate a counter. + unsigned int *i = va_arg(args, unsigned int *); + *i = *i + 1; + // Return false to keep searching. + return FALSE; +} + +extern BOOL hash_test(void) +{ + HASHTABLE table; + HASHTABLE strings; + int i; + int max = 1000000; + clock_t start; + char **s; + char temp[80]; + char *fmt = "\t%-8s %4d milliseconds, %8d per second.\n"; + unsigned int ctr = 0; + + // Logical tests + table = hash_create(8, FALSE); + if (hash_add(table, 0, &table) == FALSE) return FALSE; + if (hash_add(table, 1, &table) == FALSE) return FALSE; + if (hash_add(table, 0xffffffff, &table) == FALSE) return FALSE; + + strings = hash_create(8, TRUE); + if (hash_add(strings, (HASHKEYTYPE)"test1", &strings) == FALSE) return FALSE; + if (hash_add(strings, (HASHKEYTYPE)"test2", &strings) == FALSE) return FALSE; + if (hash_add(strings, (HASHKEYTYPE)"test3", &strings) == FALSE) return FALSE; + + if (hash_lookup(table, 0) != &table) return FALSE; + if (hash_lookup(table, 1) != &table) return FALSE; + if (hash_lookup(table, 0xffffffff) != &table) return FALSE; + + if (hash_lookup(strings, (HASHKEYTYPE)"test1") != &strings) return FALSE; + if (hash_lookup(strings, (HASHKEYTYPE)"test2") != &strings) return FALSE; + if (hash_lookup(strings, (HASHKEYTYPE)"test3") != &strings) return FALSE; + + if (hash_remove(strings, (HASHKEYTYPE)"test1") != &strings) return FALSE; + if (hash_lookup(strings, (HASHKEYTYPE)"test1") != FALSE) return FALSE; + + if (hash_destroy(table) != NULL) return FALSE; + if (hash_destroy(strings) != NULL) return FALSE; + + + // Stress tests for numbers + printf("HASH Numeric Tests for %d items\n", max); + table = hash_create(max, FALSE); + + start = clock(); + for (i = 0; i < max; i++) + hash_add(table, i, &table); + printf(fmt, "Adds:", clock() - start, (max * CLOCKS_PER_SEC)/(clock() - start)); + + start = clock(); + for (i = 0; i < max; i++) + if (hash_lookup(table, i) != &table) + break; + printf(fmt, "Lookups:", clock() - start, (max * CLOCKS_PER_SEC)/(clock() - start)); + + start = clock(); + for (i = 0; i < max; i++) + if (hash_remove(table, i) != &table) + break; + printf(fmt, "Deletes:", clock() - start, (max * CLOCKS_PER_SEC)/(clock() - start)); + if (hash_count(table) != 0) + return FALSE; + table = hash_destroy(table); + + // Stress tests for strings + printf("HASH strings Tests for %d items\n", max); + table = hash_create(max, TRUE); + s = malloc(sizeof(*s) * max); + for (i = 0; i < max; i++) + { + sprintf(temp, "Item %d", i); + s[i] = strdup(temp); + } + + start = clock(); + for (i = 0; i < max; i++) + hash_add(table, (HASHKEYTYPE)s[i], s[i]); + printf(fmt, "Adds:", clock() - start, (max * CLOCKS_PER_SEC)/(clock() - start)); + + start = clock(); + for (i = 0; i < max; i++) + if (hash_lookup(table, (HASHKEYTYPE)s[i]) != s[i]) + break; + printf(fmt, "Lookups:", clock() - start, (max * CLOCKS_PER_SEC)/(clock() - start)); + + // Test searching + if (hash_search(table, Search, "Item 271") != s[271]) + return FALSE; + if (hash_search(table, Search, "Not Found") != FALSE) + return FALSE; + + // Test iterating + ctr = 0; + start = clock(); + hash_search(table, Iterate, &ctr); + printf(fmt, "Iterate:", clock() - start, (max * CLOCKS_PER_SEC)/(clock() - start)); + printf("\t\t(Ctr is %u)\n", ctr); + + // Test deleting + start = clock(); + for (i = 0; i < max; i++) + if (hash_remove(table, (HASHKEYTYPE)s[i]) != s[i]) + break; + printf(fmt, "Deletes:", clock() - start, (max * CLOCKS_PER_SEC)/(clock() - start)); + if (hash_count(table) != 0) + return FALSE; + table = hash_destroy(table); + + // Free the test strings + for (i = 0; i < max; i++) + free(s[i]); + + + return TRUE; +} + + diff --git a/src/hash.h b/src/hash.h new file mode 100755 index 0000000..4ea7731 --- /dev/null +++ b/src/hash.h @@ -0,0 +1,29 @@ +/************************************************************************ + hash.h + ************************************************************************/ +#ifndef HASH_H +#define HASH_H + + +typedef unsigned int HASHKEYTYPE; +typedef struct HASHTABLE_T *HASHTABLE; +typedef int BOOL; + +#define FALSE 0 +#define TRUE 1 + +extern void hash_usage (void); +extern BOOL hash_test (void); +extern HASHTABLE hash_create (unsigned long InitialSize, BOOL KeyIsString); +extern HASHTABLE hash_destroy (HASHTABLE table); +extern BOOL hash_add (HASHTABLE table, HASHKEYTYPE Key, const void *Data); +extern BOOL hash_addstring (HASHTABLE table, char * Key, const void * Data); +extern void * hash_remove (HASHTABLE table, HASHKEYTYPE Key); +extern void * hash_lookup (HASHTABLE table, HASHKEYTYPE Key); +extern size_t hash_count (HASHTABLE table); +extern void * hash_search (HASHTABLE table, BOOL (*Search)(HASHKEYTYPE key, void *, va_list), ...); +extern void * hash_searchwithvalist(HASHTABLE table, BOOL (*Search)(HASHKEYTYPE key, void *, va_list), va_list args); +extern BOOL hash_exists (HASHTABLE table, HASHKEYTYPE Key); + + +#endif -- cgit v1.1