1/*
2 * hash_functions.c
3 * ๋ค์ํ ํด์ ํจ์ ๋น๊ต ๋ฐ ์ถฉ๋ ๋ถ์
4 *
5 * ๊ตฌํ๋ ํด์ ํจ์:
6 * 1. hash_simple - ๋จ์ ํฉ์ฐ (์ถฉ๋ ๋ง์)
7 * 2. hash_djb2 - Daniel J. Bernstein (์ฐ์ํ ๋ถํฌ)
8 * 3. hash_sdbm - sdbm ๋ฐ์ดํฐ๋ฒ ์ด์ค ํด์
9 * 4. hash_fnv1a - Fowler-Noll-Vo 1a (๋น ๋ฅด๊ณ ์ข์ ๋ถํฌ)
10 */
11
12#include <stdio.h>
13#include <string.h>
14#include <stdlib.h>
15
16#define TABLE_SIZE 100
17#define MAX_TEST_WORDS 50
18
19// 1. ๋จ์ ํฉ์ฐ ํด์ (๋์ ์)
20// ๋ฌธ์ ์ : ๊ฐ์ ๋ฌธ์์ ์์ด์ด ๊ฐ์ ๊ฐ ์์ฑ (์: "abc"์ "bca")
21unsigned int hash_simple(const char *key) {
22 unsigned int hash = 0;
23 while (*key) {
24 hash += (unsigned char)*key++;
25 }
26 return hash % TABLE_SIZE;
27}
28
29// 2. djb2 ํด์ (Daniel J. Bernstein) - ์ถ์ฒ
30// ์ฅ์ : ๋จ์ํ์ง๋ง ์ฐ์ํ ๋ถํฌ ํน์ฑ
31unsigned int hash_djb2(const char *key) {
32 unsigned int hash = 5381;
33 int c;
34 while ((c = *key++)) {
35 hash = ((hash << 5) + hash) + c; // hash * 33 + c
36 }
37 return hash % TABLE_SIZE;
38}
39
40// 3. sdbm ํด์
41// ์ฅ์ : ๋ง์ ๋ฐ์ดํฐ๋ฒ ์ด์ค์์ ๊ฒ์ฆ๋ ์ฑ๋ฅ
42unsigned int hash_sdbm(const char *key) {
43 unsigned int hash = 0;
44 int c;
45 while ((c = *key++)) {
46 hash = c + (hash << 6) + (hash << 16) - hash;
47 }
48 return hash % TABLE_SIZE;
49}
50
51// 4. FNV-1a ํด์ (Fowler-Noll-Vo)
52// ์ฅ์ : ๋น ๋ฅธ ์๋์ ์ข์ ๋ถํฌ
53unsigned int hash_fnv1a(const char *key) {
54 unsigned int hash = 2166136261u; // FNV offset basis
55 while (*key) {
56 hash ^= (unsigned char)*key++;
57 hash *= 16777619; // FNV prime
58 }
59 return hash % TABLE_SIZE;
60}
61
62// ์ถฉ๋ ์นด์ดํฐ ๊ตฌ์กฐ์ฒด
63typedef struct {
64 int simple;
65 int djb2;
66 int sdbm;
67 int fnv1a;
68} CollisionStats;
69
70// ์ถฉ๋ ๋ถ์ ํจ์
71CollisionStats analyze_collisions(const char **keys, int n) {
72 CollisionStats stats = {0, 0, 0, 0};
73
74 // ๊ฐ ํด์ ํจ์๋ณ๋ก ์ฌ์ฉ๋ ๋ฒํท ์ถ์
75 int buckets_simple[TABLE_SIZE] = {0};
76 int buckets_djb2[TABLE_SIZE] = {0};
77 int buckets_sdbm[TABLE_SIZE] = {0};
78 int buckets_fnv1a[TABLE_SIZE] = {0};
79
80 for (int i = 0; i < n; i++) {
81 unsigned int idx;
82
83 // simple
84 idx = hash_simple(keys[i]);
85 if (buckets_simple[idx] > 0) stats.simple++;
86 buckets_simple[idx]++;
87
88 // djb2
89 idx = hash_djb2(keys[i]);
90 if (buckets_djb2[idx] > 0) stats.djb2++;
91 buckets_djb2[idx]++;
92
93 // sdbm
94 idx = hash_sdbm(keys[i]);
95 if (buckets_sdbm[idx] > 0) stats.sdbm++;
96 buckets_sdbm[idx]++;
97
98 // fnv1a
99 idx = hash_fnv1a(keys[i]);
100 if (buckets_fnv1a[idx] > 0) stats.fnv1a++;
101 buckets_fnv1a[idx]++;
102 }
103
104 return stats;
105}
106
107// ๋ถํฌ ๊ท ์ผ์ฑ ๊ณ์ฐ (ํ์คํธ์ฐจ)
108double calculate_distribution(unsigned int (*hash_func)(const char*),
109 const char **keys, int n) {
110 int buckets[TABLE_SIZE] = {0};
111
112 // ๊ฐ ๋ฒํท์ ํ ๋น๋ ํค ๊ฐ์ ์นด์ดํธ
113 for (int i = 0; i < n; i++) {
114 unsigned int idx = hash_func(keys[i]);
115 buckets[idx]++;
116 }
117
118 // ํ๊ท ๊ณ์ฐ
119 double mean = (double)n / TABLE_SIZE;
120
121 // ๋ถ์ฐ ๊ณ์ฐ
122 double variance = 0.0;
123 for (int i = 0; i < TABLE_SIZE; i++) {
124 double diff = buckets[i] - mean;
125 variance += diff * diff;
126 }
127 variance /= TABLE_SIZE;
128
129 // ํ์คํธ์ฐจ ๋ฐํ (๋ฎ์์๋ก ๊ท ์ผํ ๋ถํฌ)
130 return variance;
131}
132
133// ํ
์คํธ์ฉ ๋จ์ด ๋ชฉ๋ก ์์ฑ
134const char** generate_test_words(int *count) {
135 static const char *words[] = {
136 // ๊ณผ์ผ
137 "apple", "banana", "cherry", "date", "elderberry",
138 "fig", "grape", "honeydew", "kiwi", "lemon",
139 // ์์
140 "red", "blue", "green", "yellow", "orange",
141 "purple", "pink", "brown", "black", "white",
142 // ๋๋ฌผ
143 "cat", "dog", "elephant", "fox", "giraffe",
144 "horse", "iguana", "jaguar", "kangaroo", "lion",
145 // ๊ตญ๊ฐ
146 "korea", "japan", "china", "america", "france",
147 "germany", "italy", "spain", "brazil", "india",
148 // ํ๋ก๊ทธ๋๋ฐ
149 "python", "java", "javascript", "ruby", "php",
150 "swift", "kotlin", "rust", "golang", "typescript"
151 };
152
153 *count = sizeof(words) / sizeof(words[0]);
154 return words;
155}
156
157void print_hash_table(const char *title, const char **keys, int n,
158 unsigned int (*hash_func)(const char*)) {
159 printf("\n=== %s ===\n", title);
160
161 // ํด์ ๊ฐ ์ถ๋ ฅ
162 printf("%-15s | Hash Value\n", "Key");
163 printf("----------------+-----------\n");
164 for (int i = 0; i < (n < 10 ? n : 10); i++) { // ์ฒ์ 10๊ฐ๋ง
165 printf("%-15s | %10u\n", keys[i], hash_func(keys[i]));
166 }
167 if (n > 10) printf("... (%d more)\n", n - 10);
168}
169
170int main(void) {
171 int n;
172 const char **test_words = generate_test_words(&n);
173
174 printf("โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\n");
175 printf("โ ํด์ ํจ์ ๋น๊ต ๋ฐ ์ถฉ๋ ๋ถ์ ๋๊ตฌ โ\n");
176 printf("โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\n");
177 printf("\nํ
์คํธ ๋จ์ด ๊ฐ์: %d\n", n);
178 printf("ํด์ ํ
์ด๋ธ ํฌ๊ธฐ: %d\n\n", TABLE_SIZE);
179
180 // 1. ์ํ ๋จ์ด๋ค์ ํด์ ๊ฐ ๋น๊ต
181 const char *sample_keys[] = {"apple", "banana", "cherry", "date", "elderberry"};
182 int sample_n = sizeof(sample_keys) / sizeof(sample_keys[0]);
183
184 printf("=== ์ํ ๋จ์ด ํด์ ๊ฐ ๋น๊ต ===\n\n");
185 printf("%-12s | Simple | djb2 | sdbm | fnv1a\n", "Key");
186 printf("-------------|--------|------|------|------\n");
187
188 for (int i = 0; i < sample_n; i++) {
189 printf("%-12s | %6u | %4u | %4u | %5u\n",
190 sample_keys[i],
191 hash_simple(sample_keys[i]),
192 hash_djb2(sample_keys[i]),
193 hash_sdbm(sample_keys[i]),
194 hash_fnv1a(sample_keys[i]));
195 }
196
197 // 2. ์ถฉ๋ ๋ถ์
198 printf("\n=== ์ถฉ๋ ๋ถ์ (์ด %d๊ฐ ๋จ์ด) ===\n\n", n);
199 CollisionStats stats = analyze_collisions(test_words, n);
200
201 printf("ํด์ ํจ์ | ์ถฉ๋ ํ์ | ์ถฉ๋๋ฅ \n");
202 printf("-------------|-----------|--------\n");
203 printf("Simple | %9d | %5.1f%%\n", stats.simple,
204 100.0 * stats.simple / n);
205 printf("djb2 | %9d | %5.1f%%\n", stats.djb2,
206 100.0 * stats.djb2 / n);
207 printf("sdbm | %9d | %5.1f%%\n", stats.sdbm,
208 100.0 * stats.sdbm / n);
209 printf("FNV-1a | %9d | %5.1f%%\n", stats.fnv1a,
210 100.0 * stats.fnv1a / n);
211
212 // 3. ๋ถํฌ ๊ท ์ผ์ฑ ๋ถ์
213 printf("\n=== ๋ถํฌ ๊ท ์ผ์ฑ ๋ถ์ (๋ถ์ฐ) ===\n");
214 printf("โป ๋ฎ์์๋ก ๊ท ์ผํ ๋ถํฌ\n\n");
215
216 double var_simple = calculate_distribution(hash_simple, test_words, n);
217 double var_djb2 = calculate_distribution(hash_djb2, test_words, n);
218 double var_sdbm = calculate_distribution(hash_sdbm, test_words, n);
219 double var_fnv1a = calculate_distribution(hash_fnv1a, test_words, n);
220
221 printf("ํด์ ํจ์ | ๋ถ์ฐ ๊ฐ\n");
222 printf("-------------|----------\n");
223 printf("Simple | %8.2f\n", var_simple);
224 printf("djb2 | %8.2f โ ์ถ์ฒ\n", var_djb2);
225 printf("sdbm | %8.2f\n", var_sdbm);
226 printf("FNV-1a | %8.2f\n", var_fnv1a);
227
228 // 4. ์ฑ๋ฅ ๊ถ์ฅ์ฌํญ
229 printf("\nโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\n");
230 printf("โ ๊ถ์ฅ ํด์ ํจ์ โ\n");
231 printf("โ โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโฃ\n");
232 printf("โ 1. djb2 - ์ผ๋ฐ์ ์ธ ์ฉ๋ (๊ท ํ์์) โ\n");
233 printf("โ 2. FNV-1a - ๋น ๋ฅธ ์๋ ํ์์ โ\n");
234 printf("โ 3. sdbm - ๋ฐ์ดํฐ๋ฒ ์ด์ค ์ฉ๋ โ\n");
235 printf("โ โ\n");
236 printf("โ โ ๏ธ Simple์ ์ฌ์ฉํ์ง ๋ง์ธ์! โ\n");
237 printf("โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ\n");
238
239 // 5. ์ค์ ๋ถํฌ ์๊ฐํ (djb2)
240 printf("\n=== djb2 ํด์ ๋ถํฌ ์๊ฐํ ===\n");
241 printf("(๊ฐ '*'๋ ํ๋์ ํค๋ฅผ ๋ํ๋)\n\n");
242
243 int buckets[TABLE_SIZE] = {0};
244 for (int i = 0; i < n; i++) {
245 unsigned int idx = hash_djb2(test_words[i]);
246 buckets[idx]++;
247 }
248
249 // ์ฒ์ 20๊ฐ ๋ฒํท๋ง ์๊ฐํ
250 for (int i = 0; i < 20; i++) {
251 printf("[%2d] ", i);
252 for (int j = 0; j < buckets[i]; j++) {
253 printf("*");
254 }
255 printf(" (%d)\n", buckets[i]);
256 }
257 printf("...\n");
258
259 return 0;
260}