hash_linear_probing.c

Download
c 411 lines 12.2 KB
  1/*
  2 * hash_linear_probing.c
  3 * μ„ ν˜• 탐사(Linear Probing)λ₯Ό μ΄μš©ν•œ μ˜€ν”ˆ μ–΄λ“œλ ˆμ‹± ν•΄μ‹œ ν…Œμ΄λΈ”
  4 *
  5 * μ„ ν˜• 탐사 방식:
  6 * - 좩돌 λ°œμƒ μ‹œ λ‹€μŒ 빈 μŠ¬λ‘―μ„ 순차적으둜 탐색
  7 * - μž₯점: μΊμ‹œ 효율 μ’‹μŒ, μΆ”κ°€ λ©”λͺ¨λ¦¬ λΆˆν•„μš”
  8 * - 단점: ν΄λŸ¬μŠ€ν„°λ§ ν˜„μƒ, μ‚­μ œ 처리 볡작
  9 */
 10
 11#include <stdio.h>
 12#include <stdlib.h>
 13#include <string.h>
 14#include <stdbool.h>
 15
 16#define TABLE_SIZE 20
 17#define KEY_SIZE 50
 18#define VALUE_SIZE 100
 19
 20// 슬둯 μƒνƒœ
 21typedef enum {
 22    EMPTY,      // λΉ„μ–΄μžˆμŒ (ν•œ λ²ˆλ„ μ‚¬μš© μ•ˆ 됨)
 23    OCCUPIED,   // μ‚¬μš© 쀑
 24    DELETED     // μ‚­μ œλ¨ (탐색 μ‹œ κ±΄λ„ˆλ›°μ§€ μ•ŠμŒ)
 25} SlotStatus;
 26
 27// 슬둯 ꡬ쑰체
 28typedef struct {
 29    char key[KEY_SIZE];
 30    char value[VALUE_SIZE];
 31    SlotStatus status;
 32} Slot;
 33
 34// ν•΄μ‹œ ν…Œμ΄λΈ” ꡬ쑰체
 35typedef struct {
 36    Slot slots[TABLE_SIZE];
 37    int count;          // ν˜„μž¬ μ €μž₯된 ν•­λͺ© 수 (OCCUPIED)
 38    int probes;         // 총 탐사 횟수
 39    int collisions;     // 좩돌 횟수
 40} HashTable;
 41
 42// djb2 ν•΄μ‹œ ν•¨μˆ˜
 43unsigned int hash(const char *key) {
 44    unsigned int hash = 5381;
 45    int c;
 46    while ((c = *key++)) {
 47        hash = ((hash << 5) + hash) + c;
 48    }
 49    return hash % TABLE_SIZE;
 50}
 51
 52// ν•΄μ‹œ ν…Œμ΄λΈ” 생성
 53HashTable* ht_create(void) {
 54    HashTable *ht = malloc(sizeof(HashTable));
 55    if (!ht) {
 56        fprintf(stderr, "λ©”λͺ¨λ¦¬ ν• λ‹Ή μ‹€νŒ¨\n");
 57        return NULL;
 58    }
 59
 60    // λͺ¨λ“  슬둯 μ΄ˆκΈ°ν™”
 61    for (int i = 0; i < TABLE_SIZE; i++) {
 62        ht->slots[i].status = EMPTY;
 63        ht->slots[i].key[0] = '\0';
 64        ht->slots[i].value[0] = '\0';
 65    }
 66
 67    ht->count = 0;
 68    ht->probes = 0;
 69    ht->collisions = 0;
 70
 71    return ht;
 72}
 73
 74// ν•΄μ‹œ ν…Œμ΄λΈ” ν•΄μ œ
 75void ht_destroy(HashTable *ht) {
 76    free(ht);
 77}
 78
 79// μ‚½μž… λ˜λŠ” μˆ˜μ •
 80bool ht_set(HashTable *ht, const char *key, const char *value) {
 81    if (!ht || !key || !value) return false;
 82
 83    // ν…Œμ΄λΈ”μ΄ 가득 μ°¬ 경우
 84    if (ht->count >= TABLE_SIZE) {
 85        fprintf(stderr, "ν•΄μ‹œ ν…Œμ΄λΈ”μ΄ 가득 μ°ΌμŠ΅λ‹ˆλ‹€!\n");
 86        return false;
 87    }
 88
 89    unsigned int index = hash(key);
 90    unsigned int original_index = index;
 91    int probe_count = 0;
 92
 93    // μ„ ν˜• 탐사
 94    do {
 95        probe_count++;
 96
 97        // 1. 빈 슬둯 λ˜λŠ” μ‚­μ œλœ 슬둯 β†’ μƒˆλ‘œ μ‚½μž…
 98        if (ht->slots[index].status != OCCUPIED) {
 99            if (probe_count > 1) {
100                ht->collisions++;  // 좩돌 λ°œμƒ
101            }
102
103            strncpy(ht->slots[index].key, key, KEY_SIZE - 1);
104            ht->slots[index].key[KEY_SIZE - 1] = '\0';
105            strncpy(ht->slots[index].value, value, VALUE_SIZE - 1);
106            ht->slots[index].value[VALUE_SIZE - 1] = '\0';
107            ht->slots[index].status = OCCUPIED;
108
109            ht->count++;
110            ht->probes += probe_count;
111            return true;
112        }
113
114        // 2. 같은 ν‚€ 발견 β†’ κ°’ μ—…λ°μ΄νŠΈ
115        if (strcmp(ht->slots[index].key, key) == 0) {
116            strncpy(ht->slots[index].value, value, VALUE_SIZE - 1);
117            ht->slots[index].value[VALUE_SIZE - 1] = '\0';
118            ht->probes += probe_count;
119            return true;
120        }
121
122        // 3. λ‹€μŒ 슬둯으둜
123        index = (index + 1) % TABLE_SIZE;
124
125    } while (index != original_index);
126
127    // λͺ¨λ“  슬둯 νƒμƒ‰ν–ˆμ§€λ§Œ μ‹€νŒ¨ (이둠상 도달 λΆˆκ°€)
128    return false;
129}
130
131// 검색
132char* ht_get(HashTable *ht, const char *key) {
133    if (!ht || !key) return NULL;
134
135    unsigned int index = hash(key);
136    unsigned int original_index = index;
137
138    // μ„ ν˜• 탐사
139    do {
140        // EMPTY 발견 β†’ ν‚€κ°€ μ—†μŒ (탐색 μ’…λ£Œ)
141        if (ht->slots[index].status == EMPTY) {
142            return NULL;
143        }
144
145        // OCCUPIED이고 ν‚€κ°€ 일치 β†’ 찾음!
146        if (ht->slots[index].status == OCCUPIED &&
147            strcmp(ht->slots[index].key, key) == 0) {
148            return ht->slots[index].value;
149        }
150
151        // DELETED λ˜λŠ” λ‹€λ₯Έ ν‚€ β†’ 계속 탐색
152        index = (index + 1) % TABLE_SIZE;
153
154    } while (index != original_index);
155
156    return NULL;  // μ°Ύμ§€ λͺ»ν•¨
157}
158
159// μ‚­μ œ
160bool ht_delete(HashTable *ht, const char *key) {
161    if (!ht || !key) return false;
162
163    unsigned int index = hash(key);
164    unsigned int original_index = index;
165
166    do {
167        // EMPTY 발견 β†’ ν‚€κ°€ μ—†μŒ
168        if (ht->slots[index].status == EMPTY) {
169            return false;
170        }
171
172        // OCCUPIED이고 ν‚€κ°€ 일치 β†’ μ‚­μ œ
173        if (ht->slots[index].status == OCCUPIED &&
174            strcmp(ht->slots[index].key, key) == 0) {
175
176            // EMPTYκ°€ μ•„λ‹Œ DELETED둜 ν‘œμ‹œ (μ€‘μš”!)
177            // 이유: 탐색 체인을 λŠμ§€ μ•ŠκΈ° μœ„ν•΄
178            ht->slots[index].status = DELETED;
179            ht->count--;
180            return true;
181        }
182
183        index = (index + 1) % TABLE_SIZE;
184
185    } while (index != original_index);
186
187    return false;  // μ°Ύμ§€ λͺ»ν•¨
188}
189
190// ν•΄μ‹œ ν…Œμ΄λΈ” 좜λ ₯
191void ht_print(HashTable *ht) {
192    if (!ht) return;
193
194    printf("\n╔════════════════════════════════════════════╗\n");
195    printf("β•‘      ν•΄μ‹œ ν…Œμ΄λΈ” μƒνƒœ (μ„ ν˜• 탐사)         β•‘\n");
196    printf("╠════════════════════════════════════════════╣\n");
197    printf("β•‘  ν•­λͺ© 개수: %-5d / %-5d                   β•‘\n",
198           ht->count, TABLE_SIZE);
199    printf("β•‘  λ‘œλ“œ νŒ©ν„°: %.2f                           β•‘\n",
200           (double)ht->count / TABLE_SIZE);
201    printf("β•‘  좩돌 횟수: %-5d                          β•‘\n", ht->collisions);
202    printf("β•‘  평균 탐사: %.2f                           β•‘\n",
203           ht->count > 0 ? (double)ht->probes / ht->count : 0.0);
204    printf("β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•\n\n");
205
206    for (int i = 0; i < TABLE_SIZE; i++) {
207        printf("[%2d] ", i);
208
209        switch (ht->slots[i].status) {
210            case EMPTY:
211                printf("(λΉ„μ–΄μžˆμŒ)\n");
212                break;
213
214            case DELETED:
215                printf("(μ‚­μ œλ¨) [이전 ν‚€: %s]\n",
216                       ht->slots[i].key[0] ? ht->slots[i].key : "?");
217                break;
218
219            case OCCUPIED: {
220                unsigned int original_hash = hash(ht->slots[i].key);
221                if (original_hash == (unsigned int)i) {
222                    printf("\"%s\" : \"%s\" βœ“\n",
223                           ht->slots[i].key, ht->slots[i].value);
224                } else {
225                    printf("\"%s\" : \"%s\" (μ›λž˜: [%u], 좩돌)\n",
226                           ht->slots[i].key, ht->slots[i].value, original_hash);
227                }
228                break;
229            }
230        }
231    }
232}
233
234// ν΄λŸ¬μŠ€ν„°λ§ 뢄석
235void analyze_clustering(HashTable *ht) {
236    printf("\n=== ν΄λŸ¬μŠ€ν„°λ§ 뢄석 ===\n\n");
237
238    int max_cluster = 0;
239    int current_cluster = 0;
240    int num_clusters = 0;
241
242    for (int i = 0; i < TABLE_SIZE; i++) {
243        if (ht->slots[i].status == OCCUPIED) {
244            current_cluster++;
245        } else {
246            if (current_cluster > 0) {
247                num_clusters++;
248                if (current_cluster > max_cluster) {
249                    max_cluster = current_cluster;
250                }
251            }
252            current_cluster = 0;
253        }
254    }
255
256    // λ§ˆμ§€λ§‰ ν΄λŸ¬μŠ€ν„° 처리
257    if (current_cluster > 0) {
258        num_clusters++;
259        if (current_cluster > max_cluster) {
260            max_cluster = current_cluster;
261        }
262    }
263
264    printf("ν΄λŸ¬μŠ€ν„° 개수:   %d\n", num_clusters);
265    printf("μ΅œλŒ€ ν΄λŸ¬μŠ€ν„°:   %d개 연속\n", max_cluster);
266
267    // μ‹œκ°ν™”
268    printf("\nν΄λŸ¬μŠ€ν„° μ‹œκ°ν™”:\n");
269    printf("[");
270    for (int i = 0; i < TABLE_SIZE; i++) {
271        if (ht->slots[i].status == OCCUPIED) {
272            printf("β–ˆ");
273        } else if (ht->slots[i].status == DELETED) {
274            printf("β–‘");
275        } else {
276            printf(" ");
277        }
278    }
279    printf("]\n");
280    printf("β–ˆ: μ‚¬μš©μ€‘  β–‘: μ‚­μ œλ¨  (곡백): λΉ„μ–΄μžˆμŒ\n");
281}
282
283// μ„±λŠ₯ ν…ŒμŠ€νŠΈ
284void performance_test(void) {
285    printf("\n╔════════════════════════════════════════════╗\n");
286    printf("β•‘           λ‘œλ“œ νŒ©ν„°λ³„ μ„±λŠ₯ 비ꡐ            β•‘\n");
287    printf("β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•\n\n");
288
289    int test_sizes[] = {5, 10, 15, 18};  // 25%, 50%, 75%, 90%
290
291    printf("λ‘œλ“œ νŒ©ν„°  | 평균 탐사 | 좩돌 횟수\n");
292    printf("-----------|-----------|----------\n");
293
294    for (int t = 0; t < 4; t++) {
295        HashTable *ht = ht_create();
296        int n = test_sizes[t];
297
298        // ν…ŒμŠ€νŠΈ 데이터 μ‚½μž…
299        char key[20], value[20];
300        for (int i = 0; i < n; i++) {
301            sprintf(key, "key%d", i);
302            sprintf(value, "value%d", i);
303            ht_set(ht, key, value);
304        }
305
306        double load_factor = (double)ht->count / TABLE_SIZE;
307        double avg_probes = ht->count > 0 ? (double)ht->probes / ht->count : 0.0;
308
309        printf("%6.0f%%    | %9.2f | %9d\n",
310               load_factor * 100, avg_probes, ht->collisions);
311
312        ht_destroy(ht);
313    }
314
315    printf("\nβ€» λ‘œλ“œ νŒ©ν„°κ°€ λ†’μ„μˆ˜λ‘ μ„±λŠ₯ μ €ν•˜\n");
316    printf("β€» 0.7 μ΄ν•˜ μœ μ§€ ꢌμž₯\n");
317}
318
319// 메인 ν…ŒμŠ€νŠΈ
320int main(void) {
321    printf("╔════════════════════════════════════════════╗\n");
322    printf("β•‘   μ„ ν˜• 탐사 ν•΄μ‹œ ν…Œμ΄λΈ” κ΅¬ν˜„ 및 ν…ŒμŠ€νŠΈ    β•‘\n");
323    printf("β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•\n");
324
325    HashTable *ht = ht_create();
326    if (!ht) return 1;
327
328    // 1. μ‚½μž… ν…ŒμŠ€νŠΈ
329    printf("\n[ 1단계: μ‚½μž… ν…ŒμŠ€νŠΈ ]\n");
330    printf("μ—¬λŸ¬ 데이터λ₯Ό μ‚½μž…ν•©λ‹ˆλ‹€...\n");
331
332    ht_set(ht, "apple", "사과");
333    ht_set(ht, "banana", "λ°”λ‚˜λ‚˜");
334    ht_set(ht, "cherry", "체리");
335    ht_set(ht, "date", "λŒ€μΆ”μ•Όμž");
336    ht_set(ht, "elderberry", "μ—˜λ”λ² λ¦¬");
337    ht_set(ht, "fig", "무화과");
338    ht_set(ht, "grape", "포도");
339
340    ht_print(ht);
341
342    // 2. 검색 ν…ŒμŠ€νŠΈ
343    printf("\n[ 2단계: 검색 ν…ŒμŠ€νŠΈ ]\n");
344    const char *keys[] = {"apple", "grape", "kiwi", "banana"};
345    for (int i = 0; i < 4; i++) {
346        char *value = ht_get(ht, keys[i]);
347        if (value) {
348            printf("βœ“ '%s' β†’ '%s'\n", keys[i], value);
349        } else {
350            printf("βœ— '%s' β†’ (찾을 수 μ—†μŒ)\n", keys[i]);
351        }
352    }
353
354    // 3. μˆ˜μ • ν…ŒμŠ€νŠΈ
355    printf("\n[ 3단계: μˆ˜μ • ν…ŒμŠ€νŠΈ ]\n");
356    printf("'apple'의 값을 μˆ˜μ •ν•©λ‹ˆλ‹€...\n");
357    ht_set(ht, "apple", "λ§›μžˆλŠ” 사과 🍎");
358    printf("μˆ˜μ • ν›„: %s\n", ht_get(ht, "apple"));
359
360    // 4. μ‚­μ œ ν…ŒμŠ€νŠΈ
361    printf("\n[ 4단계: μ‚­μ œ ν…ŒμŠ€νŠΈ ]\n");
362    printf("'banana'λ₯Ό μ‚­μ œν•©λ‹ˆλ‹€...\n");
363
364    if (ht_delete(ht, "banana")) {
365        printf("βœ“ μ‚­μ œ 성곡\n");
366    }
367
368    printf("μ‚­μ œ 확인: %s\n", ht_get(ht, "banana") ?: "(찾을 수 μ—†μŒ)");
369
370    ht_print(ht);
371
372    // 5. 좩돌 유발 ν…ŒμŠ€νŠΈ
373    printf("\n[ 5단계: 좩돌 및 ν΄λŸ¬μŠ€ν„°λ§ ν…ŒμŠ€νŠΈ ]\n");
374    printf("μΆ”κ°€ 데이터λ₯Ό μ‚½μž…ν•©λ‹ˆλ‹€...\n");
375
376    ht_set(ht, "honeydew", "ν—ˆλ‹ˆλ“€");
377    ht_set(ht, "kiwi", "ν‚€μœ„");
378    ht_set(ht, "lemon", "레λͺ¬");
379    ht_set(ht, "mango", "망고");
380
381    ht_print(ht);
382
383    // 6. ν΄λŸ¬μŠ€ν„°λ§ 뢄석
384    analyze_clustering(ht);
385
386    // 7. μ‚­μ œ ν›„ μž¬μ‚½μž… ν…ŒμŠ€νŠΈ
387    printf("\n[ 6단계: μ‚­μ œ ν›„ μž¬μ‚½μž… ν…ŒμŠ€νŠΈ ]\n");
388    printf("μ—¬λŸ¬ ν•­λͺ©μ„ μ‚­μ œν•œ ν›„ μƒˆλ‘œμš΄ ν•­λͺ©μ„ μ‚½μž…ν•©λ‹ˆλ‹€...\n\n");
389
390    ht_delete(ht, "cherry");
391    ht_delete(ht, "fig");
392
393    printf("μ‚­μ œ ν›„:\n");
394    ht_print(ht);
395
396    printf("\nμƒˆ ν•­λͺ© μ‚½μž…:\n");
397    ht_set(ht, "orange", "μ˜€λ Œμ§€");
398    ht_set(ht, "peach", "λ³΅μˆ­μ•„");
399
400    ht_print(ht);
401
402    // 정리
403    ht_destroy(ht);
404
405    // 8. μ„±λŠ₯ ν…ŒμŠ€νŠΈ
406    performance_test();
407
408    printf("\nν”„λ‘œκ·Έλž¨μ„ μ’…λ£Œν•©λ‹ˆλ‹€.\n");
409    return 0;
410}