Project 8: Hash Table
Project 8: Hash Table¶
Learning Objectives¶
What you will learn through this project: - Hash function principles - Hash table structure - Collision handling (chaining, open addressing) - Practical application: Simple dictionary program
What Is a Hash Table?¶
Concept¶
A hash table transforms a Key into an index using a hash function, then stores the Value at that position.
Key: "apple"
|
Hash Function: hash("apple") = 3
|
+---+---+---+---+---+---+---+
| | | | X | | | | -> Stored at Index 3
+---+---+---+---+---+---+---+
0 1 2 3 4 5 6
Time Complexity¶
| Operation | Average | Worst |
|---|---|---|
| Insert | O(1) | O(n) |
| Search | O(1) | O(n) |
| Delete | O(1) | O(n) |
Worst case: When all keys collide to the same index
Step 1: Understanding Hash Functions¶
Qualities of a Good Hash Function¶
- Deterministic: Same input -> Always same output
- Uniform distribution: Output is evenly distributed
- Fast computation: O(1) time
String Hash Functions¶
// hash_functions.c
#include <stdio.h>
#include <string.h>
#define TABLE_SIZE 10
// 1. Simple sum (bad example)
unsigned int hash_simple(const char *key) {
unsigned int hash = 0;
while (*key) {
hash += *key++;
}
return hash % TABLE_SIZE;
}
// 2. djb2 (Daniel J. Bernstein) - Recommended
unsigned int hash_djb2(const char *key) {
unsigned int hash = 5381;
int c;
while ((c = *key++)) {
hash = ((hash << 5) + hash) + c; // hash * 33 + c
}
return hash % TABLE_SIZE;
}
// 3. sdbm
unsigned int hash_sdbm(const char *key) {
unsigned int hash = 0;
int c;
while ((c = *key++)) {
hash = c + (hash << 6) + (hash << 16) - hash;
}
return hash % TABLE_SIZE;
}
// 4. FNV-1a
unsigned int hash_fnv1a(const char *key) {
unsigned int hash = 2166136261u;
while (*key) {
hash ^= (unsigned char)*key++;
hash *= 16777619;
}
return hash % TABLE_SIZE;
}
int main(void) {
const char *keys[] = {"apple", "banana", "cherry", "date", "elderberry"};
int n = sizeof(keys) / sizeof(keys[0]);
printf("=== Hash Function Comparison ===\n\n");
printf("%-12s | simple | djb2 | sdbm | fnv1a\n", "Key");
printf("-------------|--------|------|------|------\n");
for (int i = 0; i < n; i++) {
printf("%-12s | %6u | %4u | %4u | %5u\n",
keys[i],
hash_simple(keys[i]),
hash_djb2(keys[i]),
hash_sdbm(keys[i]),
hash_fnv1a(keys[i]));
}
return 0;
}
Step 2: Chaining (Separate Chaining)¶
Store collisions at the same index using a linked list.
Collision at Index 3:
+---+
| 0 | -> NULL
+---+
| 1 | -> NULL
+---+
| 2 | -> NULL
+---+
| 3 | -> [apple] -> [apricot] -> NULL (chain)
+---+
| 4 | -> NULL
+---+
Implementation¶
// hash_chaining.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#define TABLE_SIZE 10
#define KEY_SIZE 50
#define VALUE_SIZE 100
// Node (key-value pair)
typedef struct Node {
char key[KEY_SIZE];
char value[VALUE_SIZE];
struct Node *next;
} Node;
// Hash table
typedef struct {
Node *buckets[TABLE_SIZE];
int count;
} HashTable;
// Hash function (djb2)
unsigned int hash(const char *key) {
unsigned int hash = 5381;
int c;
while ((c = *key++)) {
hash = ((hash << 5) + hash) + c;
}
return hash % TABLE_SIZE;
}
// Create
HashTable* ht_create(void) {
HashTable *ht = malloc(sizeof(HashTable));
if (ht) {
for (int i = 0; i < TABLE_SIZE; i++) {
ht->buckets[i] = NULL;
}
ht->count = 0;
}
return ht;
}
// Destroy
void ht_destroy(HashTable *ht) {
for (int i = 0; i < TABLE_SIZE; i++) {
Node *current = ht->buckets[i];
while (current) {
Node *next = current->next;
free(current);
current = next;
}
}
free(ht);
}
// Insert/Update
bool ht_set(HashTable *ht, const char *key, const char *value) {
unsigned int index = hash(key);
// Find existing key
Node *current = ht->buckets[index];
while (current) {
if (strcmp(current->key, key) == 0) {
// Existing key -> update value
strncpy(current->value, value, VALUE_SIZE - 1);
return true;
}
current = current->next;
}
// Create new node
Node *node = malloc(sizeof(Node));
if (!node) return false;
strncpy(node->key, key, KEY_SIZE - 1);
strncpy(node->value, value, VALUE_SIZE - 1);
node->next = ht->buckets[index];
ht->buckets[index] = node;
ht->count++;
return true;
}
// Search
char* ht_get(HashTable *ht, const char *key) {
unsigned int index = hash(key);
Node *current = ht->buckets[index];
while (current) {
if (strcmp(current->key, key) == 0) {
return current->value;
}
current = current->next;
}
return NULL; // Not found
}
// Delete
bool ht_delete(HashTable *ht, const char *key) {
unsigned int index = hash(key);
Node *current = ht->buckets[index];
Node *prev = NULL;
while (current) {
if (strcmp(current->key, key) == 0) {
if (prev) {
prev->next = current->next;
} else {
ht->buckets[index] = current->next;
}
free(current);
ht->count--;
return true;
}
prev = current;
current = current->next;
}
return false; // Not found
}
// Print
void ht_print(HashTable *ht) {
printf("\n=== Hash Table (count=%d) ===\n", ht->count);
for (int i = 0; i < TABLE_SIZE; i++) {
printf("[%d]: ", i);
Node *current = ht->buckets[i];
if (!current) {
printf("(empty)");
}
while (current) {
printf("(\"%s\": \"%s\")", current->key, current->value);
if (current->next) printf(" -> ");
current = current->next;
}
printf("\n");
}
}
// Test
int main(void) {
HashTable *ht = ht_create();
printf("=== Chaining Hash Table ===\n");
// Insert
ht_set(ht, "apple", "a fruit");
ht_set(ht, "banana", "a tropical fruit");
ht_set(ht, "cherry", "a small red fruit");
ht_set(ht, "date", "a sweet fruit");
ht_set(ht, "elderberry", "a berry");
ht_print(ht);
// Search
printf("\nSearch test:\n");
printf("apple: %s\n", ht_get(ht, "apple") ?: "(not found)");
printf("grape: %s\n", ht_get(ht, "grape") ?: "(not found)");
// Update
printf("\nUpdate test:\n");
ht_set(ht, "apple", "a delicious fruit");
printf("apple: %s\n", ht_get(ht, "apple"));
// Delete
printf("\nDelete test:\n");
ht_delete(ht, "banana");
ht_print(ht);
ht_destroy(ht);
return 0;
}
Step 3: Open Addressing¶
When collision occurs, find another empty slot to store.
Linear Probing¶
hash("apple") = 3, hash("apricot") = 3 (collision!)
Insert "apple":
+---+---+---+---+---+---+---+
| | | | X | | | |
+---+---+---+---+---+---+---+
0 1 2 3 4 5 6
Insert "apricot" (collision -> next slot):
+---+---+---+---+---+---+---+
| | | | X | Y | | | <- Stored at Index 4
+---+---+---+---+---+---+---+
0 1 2 3 4 5 6
Implementation¶
// hash_linear_probing.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#define TABLE_SIZE 10
#define KEY_SIZE 50
#define VALUE_SIZE 100
// Slot status
typedef enum {
EMPTY, // Empty
OCCUPIED, // In use
DELETED // Deleted (continue probing on search)
} SlotStatus;
// Slot
typedef struct {
char key[KEY_SIZE];
char value[VALUE_SIZE];
SlotStatus status;
} Slot;
// Hash table
typedef struct {
Slot slots[TABLE_SIZE];
int count;
} HashTable;
unsigned int hash(const char *key) {
unsigned int hash = 5381;
int c;
while ((c = *key++)) {
hash = ((hash << 5) + hash) + c;
}
return hash % TABLE_SIZE;
}
HashTable* ht_create(void) {
HashTable *ht = malloc(sizeof(HashTable));
if (ht) {
for (int i = 0; i < TABLE_SIZE; i++) {
ht->slots[i].status = EMPTY;
}
ht->count = 0;
}
return ht;
}
void ht_destroy(HashTable *ht) {
free(ht);
}
// Insert
bool ht_set(HashTable *ht, const char *key, const char *value) {
if (ht->count >= TABLE_SIZE) {
printf("Hash table is full!\n");
return false;
}
unsigned int index = hash(key);
unsigned int original_index = index;
// Linear probing
do {
// Empty slot or same key
if (ht->slots[index].status != OCCUPIED ||
strcmp(ht->slots[index].key, key) == 0) {
if (ht->slots[index].status != OCCUPIED) {
ht->count++;
}
strncpy(ht->slots[index].key, key, KEY_SIZE - 1);
strncpy(ht->slots[index].value, value, VALUE_SIZE - 1);
ht->slots[index].status = OCCUPIED;
return true;
}
index = (index + 1) % TABLE_SIZE; // Next slot
} while (index != original_index);
return false;
}
// Search
char* ht_get(HashTable *ht, const char *key) {
unsigned int index = hash(key);
unsigned int original_index = index;
do {
if (ht->slots[index].status == EMPTY) {
return NULL; // Not found
}
if (ht->slots[index].status == OCCUPIED &&
strcmp(ht->slots[index].key, key) == 0) {
return ht->slots[index].value;
}
index = (index + 1) % TABLE_SIZE;
} while (index != original_index);
return NULL;
}
// Delete
bool ht_delete(HashTable *ht, const char *key) {
unsigned int index = hash(key);
unsigned int original_index = index;
do {
if (ht->slots[index].status == EMPTY) {
return false;
}
if (ht->slots[index].status == OCCUPIED &&
strcmp(ht->slots[index].key, key) == 0) {
ht->slots[index].status = DELETED; // DELETED, not EMPTY
ht->count--;
return true;
}
index = (index + 1) % TABLE_SIZE;
} while (index != original_index);
return false;
}
void ht_print(HashTable *ht) {
printf("\n=== Hash Table (count=%d) ===\n", ht->count);
for (int i = 0; i < TABLE_SIZE; i++) {
printf("[%d]: ", i);
switch (ht->slots[i].status) {
case EMPTY:
printf("(empty)\n");
break;
case DELETED:
printf("(deleted)\n");
break;
case OCCUPIED:
printf("\"%s\": \"%s\"\n",
ht->slots[i].key, ht->slots[i].value);
break;
}
}
}
int main(void) {
HashTable *ht = ht_create();
printf("=== Linear Probing Hash Table ===\n");
ht_set(ht, "apple", "a fruit");
ht_set(ht, "banana", "a tropical fruit");
ht_set(ht, "cherry", "a small red fruit");
ht_print(ht);
printf("\nSearch: apple = %s\n", ht_get(ht, "apple") ?: "(not found)");
printf("\nDelete: banana\n");
ht_delete(ht, "banana");
ht_print(ht);
ht_destroy(ht);
return 0;
}
Step 4: Practical - Simple Dictionary Program¶
// dictionary.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define TABLE_SIZE 1000
#define KEY_SIZE 100
#define VALUE_SIZE 500
typedef struct Node {
char word[KEY_SIZE];
char meaning[VALUE_SIZE];
struct Node *next;
} Node;
typedef struct {
Node *buckets[TABLE_SIZE];
int count;
} Dictionary;
unsigned int hash(const char *key) {
unsigned int hash = 5381;
while (*key) {
hash = ((hash << 5) + hash) + tolower(*key++);
}
return hash % TABLE_SIZE;
}
Dictionary* dict_create(void) {
Dictionary *dict = calloc(1, sizeof(Dictionary));
return dict;
}
void dict_destroy(Dictionary *dict) {
for (int i = 0; i < TABLE_SIZE; i++) {
Node *current = dict->buckets[i];
while (current) {
Node *next = current->next;
free(current);
current = next;
}
}
free(dict);
}
void dict_add(Dictionary *dict, const char *word, const char *meaning) {
unsigned int index = hash(word);
// Check existing word
Node *current = dict->buckets[index];
while (current) {
if (strcasecmp(current->word, word) == 0) {
strncpy(current->meaning, meaning, VALUE_SIZE - 1);
printf("'%s' updated\n", word);
return;
}
current = current->next;
}
// Add new word
Node *node = malloc(sizeof(Node));
strncpy(node->word, word, KEY_SIZE - 1);
strncpy(node->meaning, meaning, VALUE_SIZE - 1);
node->next = dict->buckets[index];
dict->buckets[index] = node;
dict->count++;
printf("'%s' added\n", word);
}
char* dict_search(Dictionary *dict, const char *word) {
unsigned int index = hash(word);
Node *current = dict->buckets[index];
while (current) {
if (strcasecmp(current->word, word) == 0) {
return current->meaning;
}
current = current->next;
}
return NULL;
}
void dict_delete(Dictionary *dict, const char *word) {
unsigned int index = hash(word);
Node *current = dict->buckets[index];
Node *prev = NULL;
while (current) {
if (strcasecmp(current->word, word) == 0) {
if (prev) {
prev->next = current->next;
} else {
dict->buckets[index] = current->next;
}
free(current);
dict->count--;
printf("'%s' deleted\n", word);
return;
}
prev = current;
current = current->next;
}
printf("'%s' not found\n", word);
}
void dict_list(Dictionary *dict) {
printf("\n=== Dictionary List (Total: %d) ===\n", dict->count);
for (int i = 0; i < TABLE_SIZE; i++) {
Node *current = dict->buckets[i];
while (current) {
printf(" %s: %s\n", current->word, current->meaning);
current = current->next;
}
}
}
void print_menu(void) {
printf("\n============================\n");
printf("| Simple Dictionary |\n");
printf("|==========================|\n");
printf("| 1. Add word |\n");
printf("| 2. Search word |\n");
printf("| 3. Delete word |\n");
printf("| 4. Show all |\n");
printf("| 0. Exit |\n");
printf("============================\n");
}
void clear_input(void) {
int c;
while ((c = getchar()) != '\n' && c != EOF);
}
int main(void) {
Dictionary *dict = dict_create();
int choice;
char word[KEY_SIZE];
char meaning[VALUE_SIZE];
// Sample data
dict_add(dict, "apple", "a fruit; round and sweet");
dict_add(dict, "book", "printed pages bound together");
dict_add(dict, "computer", "electronic computing device");
while (1) {
print_menu();
printf("Choice: ");
scanf("%d", &choice);
clear_input();
switch (choice) {
case 1:
printf("Word: ");
fgets(word, KEY_SIZE, stdin);
word[strcspn(word, "\n")] = '\0';
printf("Meaning: ");
fgets(meaning, VALUE_SIZE, stdin);
meaning[strcspn(meaning, "\n")] = '\0';
dict_add(dict, word, meaning);
break;
case 2:
printf("Word to search: ");
fgets(word, KEY_SIZE, stdin);
word[strcspn(word, "\n")] = '\0';
char *result = dict_search(dict, word);
if (result) {
printf("\n %s: %s\n", word, result);
} else {
printf("\n '%s' not found\n", word);
}
break;
case 3:
printf("Word to delete: ");
fgets(word, KEY_SIZE, stdin);
word[strcspn(word, "\n")] = '\0';
dict_delete(dict, word);
break;
case 4:
dict_list(dict);
break;
case 0:
printf("Exiting dictionary.\n");
dict_destroy(dict);
return 0;
default:
printf("Invalid choice.\n");
}
}
return 0;
}
Compile and Run¶
gcc -Wall -std=c11 hash_chaining.c -o hash_chaining
gcc -Wall -std=c11 dictionary.c -o dictionary
./dictionary
Summary¶
| Concept | Description |
|---|---|
| Hash function | Convert key to index |
| Collision | Different keys map to same index |
| Chaining | Handle collisions with linked list |
| Open addressing | Handle collisions by probing empty slots |
| Load factor | count / table_size (0.7 or less recommended) |
Chaining vs Open Addressing¶
| Comparison | Chaining | Open Addressing |
|---|---|---|
| Memory | Dynamic allocation | Fixed size |
| Delete | Simple | Requires DELETED marker |
| Cache | Unfavorable | Favorable |
| Load factor | Can exceed 1 | Must be less than 1 |
Exercises¶
-
Resize: Double table size when load factor exceeds 0.7
-
File save: Save/load dictionary data to/from file
-
Double hashing: Use second hash function for probe interval on collision
Next Step¶
11_Project_Snake_Game.md -> Let's make a terminal game!