hash_functions.c

Download
c 261 lines 8.7 KB
  1/*
  2 * hash_functions.c
  3 * ๋‹ค์–‘ํ•œ ํ•ด์‹œ ํ•จ์ˆ˜ ๋น„๊ต ๋ฐ ์ถฉ๋Œ ๋ถ„์„
  4 *
  5 * ๊ตฌํ˜„๋œ ํ•ด์‹œ ํ•จ์ˆ˜:
  6 * 1. hash_simple - ๋‹จ์ˆœ ํ•ฉ์‚ฐ (์ถฉ๋Œ ๋งŽ์Œ)
  7 * 2. hash_djb2 - Daniel J. Bernstein (์šฐ์ˆ˜ํ•œ ๋ถ„ํฌ)
  8 * 3. hash_sdbm - sdbm ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ํ•ด์‹œ
  9 * 4. hash_fnv1a - Fowler-Noll-Vo 1a (๋น ๋ฅด๊ณ  ์ข‹์€ ๋ถ„ํฌ)
 10 */
 11
 12#include <stdio.h>
 13#include <string.h>
 14#include <stdlib.h>
 15
 16#define TABLE_SIZE 100
 17#define MAX_TEST_WORDS 50
 18
 19// 1. ๋‹จ์ˆœ ํ•ฉ์‚ฐ ํ•ด์‹œ (๋‚˜์œ ์˜ˆ)
 20// ๋ฌธ์ œ์ : ๊ฐ™์€ ๋ฌธ์ž์˜ ์ˆœ์—ด์ด ๊ฐ™์€ ๊ฐ’ ์ƒ์„ฑ (์˜ˆ: "abc"์™€ "bca")
 21unsigned int hash_simple(const char *key) {
 22    unsigned int hash = 0;
 23    while (*key) {
 24        hash += (unsigned char)*key++;
 25    }
 26    return hash % TABLE_SIZE;
 27}
 28
 29// 2. djb2 ํ•ด์‹œ (Daniel J. Bernstein) - ์ถ”์ฒœ
 30// ์žฅ์ : ๋‹จ์ˆœํ•˜์ง€๋งŒ ์šฐ์ˆ˜ํ•œ ๋ถ„ํฌ ํŠน์„ฑ
 31unsigned int hash_djb2(const char *key) {
 32    unsigned int hash = 5381;
 33    int c;
 34    while ((c = *key++)) {
 35        hash = ((hash << 5) + hash) + c;  // hash * 33 + c
 36    }
 37    return hash % TABLE_SIZE;
 38}
 39
 40// 3. sdbm ํ•ด์‹œ
 41// ์žฅ์ : ๋งŽ์€ ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค์—์„œ ๊ฒ€์ฆ๋œ ์„ฑ๋Šฅ
 42unsigned int hash_sdbm(const char *key) {
 43    unsigned int hash = 0;
 44    int c;
 45    while ((c = *key++)) {
 46        hash = c + (hash << 6) + (hash << 16) - hash;
 47    }
 48    return hash % TABLE_SIZE;
 49}
 50
 51// 4. FNV-1a ํ•ด์‹œ (Fowler-Noll-Vo)
 52// ์žฅ์ : ๋น ๋ฅธ ์†๋„์™€ ์ข‹์€ ๋ถ„ํฌ
 53unsigned int hash_fnv1a(const char *key) {
 54    unsigned int hash = 2166136261u;  // FNV offset basis
 55    while (*key) {
 56        hash ^= (unsigned char)*key++;
 57        hash *= 16777619;  // FNV prime
 58    }
 59    return hash % TABLE_SIZE;
 60}
 61
 62// ์ถฉ๋Œ ์นด์šดํ„ฐ ๊ตฌ์กฐ์ฒด
 63typedef struct {
 64    int simple;
 65    int djb2;
 66    int sdbm;
 67    int fnv1a;
 68} CollisionStats;
 69
 70// ์ถฉ๋Œ ๋ถ„์„ ํ•จ์ˆ˜
 71CollisionStats analyze_collisions(const char **keys, int n) {
 72    CollisionStats stats = {0, 0, 0, 0};
 73
 74    // ๊ฐ ํ•ด์‹œ ํ•จ์ˆ˜๋ณ„๋กœ ์‚ฌ์šฉ๋œ ๋ฒ„ํ‚ท ์ถ”์ 
 75    int buckets_simple[TABLE_SIZE] = {0};
 76    int buckets_djb2[TABLE_SIZE] = {0};
 77    int buckets_sdbm[TABLE_SIZE] = {0};
 78    int buckets_fnv1a[TABLE_SIZE] = {0};
 79
 80    for (int i = 0; i < n; i++) {
 81        unsigned int idx;
 82
 83        // simple
 84        idx = hash_simple(keys[i]);
 85        if (buckets_simple[idx] > 0) stats.simple++;
 86        buckets_simple[idx]++;
 87
 88        // djb2
 89        idx = hash_djb2(keys[i]);
 90        if (buckets_djb2[idx] > 0) stats.djb2++;
 91        buckets_djb2[idx]++;
 92
 93        // sdbm
 94        idx = hash_sdbm(keys[i]);
 95        if (buckets_sdbm[idx] > 0) stats.sdbm++;
 96        buckets_sdbm[idx]++;
 97
 98        // fnv1a
 99        idx = hash_fnv1a(keys[i]);
100        if (buckets_fnv1a[idx] > 0) stats.fnv1a++;
101        buckets_fnv1a[idx]++;
102    }
103
104    return stats;
105}
106
107// ๋ถ„ํฌ ๊ท ์ผ์„ฑ ๊ณ„์‚ฐ (ํ‘œ์ค€ํŽธ์ฐจ)
108double calculate_distribution(unsigned int (*hash_func)(const char*),
109                             const char **keys, int n) {
110    int buckets[TABLE_SIZE] = {0};
111
112    // ๊ฐ ๋ฒ„ํ‚ท์— ํ• ๋‹น๋œ ํ‚ค ๊ฐœ์ˆ˜ ์นด์šดํŠธ
113    for (int i = 0; i < n; i++) {
114        unsigned int idx = hash_func(keys[i]);
115        buckets[idx]++;
116    }
117
118    // ํ‰๊ท  ๊ณ„์‚ฐ
119    double mean = (double)n / TABLE_SIZE;
120
121    // ๋ถ„์‚ฐ ๊ณ„์‚ฐ
122    double variance = 0.0;
123    for (int i = 0; i < TABLE_SIZE; i++) {
124        double diff = buckets[i] - mean;
125        variance += diff * diff;
126    }
127    variance /= TABLE_SIZE;
128
129    // ํ‘œ์ค€ํŽธ์ฐจ ๋ฐ˜ํ™˜ (๋‚ฎ์„์ˆ˜๋ก ๊ท ์ผํ•œ ๋ถ„ํฌ)
130    return variance;
131}
132
133// ํ…Œ์ŠคํŠธ์šฉ ๋‹จ์–ด ๋ชฉ๋ก ์ƒ์„ฑ
134const char** generate_test_words(int *count) {
135    static const char *words[] = {
136        // ๊ณผ์ผ
137        "apple", "banana", "cherry", "date", "elderberry",
138        "fig", "grape", "honeydew", "kiwi", "lemon",
139        // ์ƒ‰์ƒ
140        "red", "blue", "green", "yellow", "orange",
141        "purple", "pink", "brown", "black", "white",
142        // ๋™๋ฌผ
143        "cat", "dog", "elephant", "fox", "giraffe",
144        "horse", "iguana", "jaguar", "kangaroo", "lion",
145        // ๊ตญ๊ฐ€
146        "korea", "japan", "china", "america", "france",
147        "germany", "italy", "spain", "brazil", "india",
148        // ํ”„๋กœ๊ทธ๋ž˜๋ฐ
149        "python", "java", "javascript", "ruby", "php",
150        "swift", "kotlin", "rust", "golang", "typescript"
151    };
152
153    *count = sizeof(words) / sizeof(words[0]);
154    return words;
155}
156
157void print_hash_table(const char *title, const char **keys, int n,
158                     unsigned int (*hash_func)(const char*)) {
159    printf("\n=== %s ===\n", title);
160
161    // ํ•ด์‹œ ๊ฐ’ ์ถœ๋ ฅ
162    printf("%-15s | Hash Value\n", "Key");
163    printf("----------------+-----------\n");
164    for (int i = 0; i < (n < 10 ? n : 10); i++) {  // ์ฒ˜์Œ 10๊ฐœ๋งŒ
165        printf("%-15s | %10u\n", keys[i], hash_func(keys[i]));
166    }
167    if (n > 10) printf("... (%d more)\n", n - 10);
168}
169
170int main(void) {
171    int n;
172    const char **test_words = generate_test_words(&n);
173
174    printf("โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—\n");
175    printf("โ•‘     ํ•ด์‹œ ํ•จ์ˆ˜ ๋น„๊ต ๋ฐ ์ถฉ๋Œ ๋ถ„์„ ๋„๊ตฌ      โ•‘\n");
176    printf("โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•\n");
177    printf("\nํ…Œ์ŠคํŠธ ๋‹จ์–ด ๊ฐœ์ˆ˜: %d\n", n);
178    printf("ํ•ด์‹œ ํ…Œ์ด๋ธ” ํฌ๊ธฐ: %d\n\n", TABLE_SIZE);
179
180    // 1. ์ƒ˜ํ”Œ ๋‹จ์–ด๋“ค์˜ ํ•ด์‹œ ๊ฐ’ ๋น„๊ต
181    const char *sample_keys[] = {"apple", "banana", "cherry", "date", "elderberry"};
182    int sample_n = sizeof(sample_keys) / sizeof(sample_keys[0]);
183
184    printf("=== ์ƒ˜ํ”Œ ๋‹จ์–ด ํ•ด์‹œ ๊ฐ’ ๋น„๊ต ===\n\n");
185    printf("%-12s | Simple | djb2 | sdbm | fnv1a\n", "Key");
186    printf("-------------|--------|------|------|------\n");
187
188    for (int i = 0; i < sample_n; i++) {
189        printf("%-12s | %6u | %4u | %4u | %5u\n",
190               sample_keys[i],
191               hash_simple(sample_keys[i]),
192               hash_djb2(sample_keys[i]),
193               hash_sdbm(sample_keys[i]),
194               hash_fnv1a(sample_keys[i]));
195    }
196
197    // 2. ์ถฉ๋Œ ๋ถ„์„
198    printf("\n=== ์ถฉ๋Œ ๋ถ„์„ (์ด %d๊ฐœ ๋‹จ์–ด) ===\n\n", n);
199    CollisionStats stats = analyze_collisions(test_words, n);
200
201    printf("ํ•ด์‹œ ํ•จ์ˆ˜    | ์ถฉ๋Œ ํšŸ์ˆ˜ | ์ถฉ๋Œ๋ฅ \n");
202    printf("-------------|-----------|--------\n");
203    printf("Simple       | %9d | %5.1f%%\n", stats.simple,
204           100.0 * stats.simple / n);
205    printf("djb2         | %9d | %5.1f%%\n", stats.djb2,
206           100.0 * stats.djb2 / n);
207    printf("sdbm         | %9d | %5.1f%%\n", stats.sdbm,
208           100.0 * stats.sdbm / n);
209    printf("FNV-1a       | %9d | %5.1f%%\n", stats.fnv1a,
210           100.0 * stats.fnv1a / n);
211
212    // 3. ๋ถ„ํฌ ๊ท ์ผ์„ฑ ๋ถ„์„
213    printf("\n=== ๋ถ„ํฌ ๊ท ์ผ์„ฑ ๋ถ„์„ (๋ถ„์‚ฐ) ===\n");
214    printf("โ€ป ๋‚ฎ์„์ˆ˜๋ก ๊ท ์ผํ•œ ๋ถ„ํฌ\n\n");
215
216    double var_simple = calculate_distribution(hash_simple, test_words, n);
217    double var_djb2 = calculate_distribution(hash_djb2, test_words, n);
218    double var_sdbm = calculate_distribution(hash_sdbm, test_words, n);
219    double var_fnv1a = calculate_distribution(hash_fnv1a, test_words, n);
220
221    printf("ํ•ด์‹œ ํ•จ์ˆ˜    | ๋ถ„์‚ฐ ๊ฐ’\n");
222    printf("-------------|----------\n");
223    printf("Simple       | %8.2f\n", var_simple);
224    printf("djb2         | %8.2f โ† ์ถ”์ฒœ\n", var_djb2);
225    printf("sdbm         | %8.2f\n", var_sdbm);
226    printf("FNV-1a       | %8.2f\n", var_fnv1a);
227
228    // 4. ์„ฑ๋Šฅ ๊ถŒ์žฅ์‚ฌํ•ญ
229    printf("\nโ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—\n");
230    printf("โ•‘              ๊ถŒ์žฅ ํ•ด์‹œ ํ•จ์ˆ˜                โ•‘\n");
231    printf("โ• โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•ฃ\n");
232    printf("โ•‘  1. djb2   - ์ผ๋ฐ˜์ ์ธ ์šฉ๋„ (๊ท ํ˜•์žˆ์Œ)     โ•‘\n");
233    printf("โ•‘  2. FNV-1a - ๋น ๋ฅธ ์†๋„ ํ•„์š”์‹œ              โ•‘\n");
234    printf("โ•‘  3. sdbm   - ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ์šฉ๋„             โ•‘\n");
235    printf("โ•‘                                            โ•‘\n");
236    printf("โ•‘  โš ๏ธ  Simple์€ ์‚ฌ์šฉํ•˜์ง€ ๋งˆ์„ธ์š”!             โ•‘\n");
237    printf("โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•\n");
238
239    // 5. ์‹ค์ œ ๋ถ„ํฌ ์‹œ๊ฐํ™” (djb2)
240    printf("\n=== djb2 ํ•ด์‹œ ๋ถ„ํฌ ์‹œ๊ฐํ™” ===\n");
241    printf("(๊ฐ '*'๋Š” ํ•˜๋‚˜์˜ ํ‚ค๋ฅผ ๋‚˜ํƒ€๋ƒ„)\n\n");
242
243    int buckets[TABLE_SIZE] = {0};
244    for (int i = 0; i < n; i++) {
245        unsigned int idx = hash_djb2(test_words[i]);
246        buckets[idx]++;
247    }
248
249    // ์ฒ˜์Œ 20๊ฐœ ๋ฒ„ํ‚ท๋งŒ ์‹œ๊ฐํ™”
250    for (int i = 0; i < 20; i++) {
251        printf("[%2d] ", i);
252        for (int j = 0; j < buckets[i]; j++) {
253            printf("*");
254        }
255        printf(" (%d)\n", buckets[i]);
256    }
257    printf("...\n");
258
259    return 0;
260}