1/*
2 * 문자열 알고리즘 (String Algorithms)
3 * KMP, Rabin-Karp, Z-알고리즘, 매나커
4 *
5 * 패턴 매칭과 문자열 처리 알고리즘입니다.
6 */
7
8#include <stdio.h>
9#include <stdlib.h>
10#include <string.h>
11
12#define MAX_LEN 100001
13#define MOD 1000000007
14#define BASE 31
15
16/* =============================================================================
17 * 1. KMP (Knuth-Morris-Pratt)
18 * ============================================================================= */
19
20/* 실패 함수 (부분 일치 테이블) 계산 */
21int* compute_failure(const char* pattern) {
22 int m = strlen(pattern);
23 int* fail = calloc(m, sizeof(int));
24
25 int j = 0;
26 for (int i = 1; i < m; i++) {
27 while (j > 0 && pattern[i] != pattern[j]) {
28 j = fail[j - 1];
29 }
30 if (pattern[i] == pattern[j]) {
31 fail[i] = ++j;
32 }
33 }
34
35 return fail;
36}
37
38/* KMP 검색 - 모든 매칭 위치 반환 */
39int* kmp_search(const char* text, const char* pattern, int* match_count) {
40 int n = strlen(text);
41 int m = strlen(pattern);
42 int* fail = compute_failure(pattern);
43 int* matches = malloc(n * sizeof(int));
44 *match_count = 0;
45
46 int j = 0;
47 for (int i = 0; i < n; i++) {
48 while (j > 0 && text[i] != pattern[j]) {
49 j = fail[j - 1];
50 }
51 if (text[i] == pattern[j]) {
52 j++;
53 if (j == m) {
54 matches[(*match_count)++] = i - m + 1;
55 j = fail[j - 1];
56 }
57 }
58 }
59
60 free(fail);
61 return matches;
62}
63
64/* =============================================================================
65 * 2. Rabin-Karp
66 * ============================================================================= */
67
68long long compute_hash(const char* s, int len) {
69 long long hash = 0;
70 long long power = 1;
71 for (int i = 0; i < len; i++) {
72 hash = (hash + (s[i] - 'a' + 1) * power) % MOD;
73 power = (power * BASE) % MOD;
74 }
75 return hash;
76}
77
78int* rabin_karp(const char* text, const char* pattern, int* match_count) {
79 int n = strlen(text);
80 int m = strlen(pattern);
81 int* matches = malloc(n * sizeof(int));
82 *match_count = 0;
83
84 if (m > n) return matches;
85
86 long long pattern_hash = compute_hash(pattern, m);
87 long long text_hash = compute_hash(text, m);
88 long long power = 1;
89
90 for (int i = 0; i < m - 1; i++) {
91 power = (power * BASE) % MOD;
92 }
93
94 for (int i = 0; i <= n - m; i++) {
95 if (text_hash == pattern_hash) {
96 /* 해시 충돌 확인 */
97 int match = 1;
98 for (int j = 0; j < m; j++) {
99 if (text[i + j] != pattern[j]) {
100 match = 0;
101 break;
102 }
103 }
104 if (match) matches[(*match_count)++] = i;
105 }
106
107 if (i < n - m) {
108 /* 롤링 해시 */
109 text_hash = (text_hash - (text[i] - 'a' + 1) + MOD) % MOD;
110 text_hash = (text_hash * mod_inv(BASE, MOD)) % MOD;
111 text_hash = (text_hash + (text[i + m] - 'a' + 1) * power) % MOD;
112 }
113 }
114
115 return matches;
116}
117
118/* 모듈러 역원 (간단 버전) */
119long long mod_inv(long long a, long long mod) {
120 long long result = 1;
121 long long exp = mod - 2;
122 while (exp > 0) {
123 if (exp & 1) result = (result * a) % mod;
124 a = (a * a) % mod;
125 exp >>= 1;
126 }
127 return result;
128}
129
130/* 간단한 Rabin-Karp (롤링 해시 개선) */
131int* rabin_karp_simple(const char* text, const char* pattern, int* match_count) {
132 int n = strlen(text);
133 int m = strlen(pattern);
134 int* matches = malloc(n * sizeof(int));
135 *match_count = 0;
136
137 if (m > n) return matches;
138
139 long long pattern_hash = 0;
140 long long text_hash = 0;
141 long long power = 1;
142
143 /* 초기 해시 계산 */
144 for (int i = 0; i < m; i++) {
145 pattern_hash = (pattern_hash * BASE + pattern[i]) % MOD;
146 text_hash = (text_hash * BASE + text[i]) % MOD;
147 if (i < m - 1) power = (power * BASE) % MOD;
148 }
149
150 for (int i = 0; i <= n - m; i++) {
151 if (text_hash == pattern_hash) {
152 if (strncmp(text + i, pattern, m) == 0) {
153 matches[(*match_count)++] = i;
154 }
155 }
156 if (i < n - m) {
157 text_hash = ((text_hash - text[i] * power % MOD + MOD) * BASE + text[i + m]) % MOD;
158 }
159 }
160
161 return matches;
162}
163
164/* =============================================================================
165 * 3. Z-알고리즘
166 * ============================================================================= */
167
168int* z_function(const char* s) {
169 int n = strlen(s);
170 int* z = calloc(n, sizeof(int));
171 int l = 0, r = 0;
172
173 for (int i = 1; i < n; i++) {
174 if (i < r) {
175 z[i] = (z[i - l] < r - i) ? z[i - l] : r - i;
176 }
177 while (i + z[i] < n && s[z[i]] == s[i + z[i]]) {
178 z[i]++;
179 }
180 if (i + z[i] > r) {
181 l = i;
182 r = i + z[i];
183 }
184 }
185
186 return z;
187}
188
189/* Z-알고리즘으로 패턴 매칭 */
190int* z_search(const char* text, const char* pattern, int* match_count) {
191 int n = strlen(text);
192 int m = strlen(pattern);
193
194 /* pattern + "$" + text 연결 */
195 char* combined = malloc(n + m + 2);
196 sprintf(combined, "%s$%s", pattern, text);
197
198 int* z = z_function(combined);
199 int* matches = malloc(n * sizeof(int));
200 *match_count = 0;
201
202 for (int i = m + 1; i < n + m + 1; i++) {
203 if (z[i] == m) {
204 matches[(*match_count)++] = i - m - 1;
205 }
206 }
207
208 free(combined);
209 free(z);
210 return matches;
211}
212
213/* =============================================================================
214 * 4. 매나커 알고리즘 (Manacher)
215 * ============================================================================= */
216
217/* 가장 긴 팰린드롬 부분문자열 */
218char* manacher(const char* s) {
219 int n = strlen(s);
220 if (n == 0) return strdup("");
221
222 /* 문자 사이에 # 삽입 */
223 int len = 2 * n + 1;
224 char* t = malloc(len + 1);
225 for (int i = 0; i < n; i++) {
226 t[2 * i] = '#';
227 t[2 * i + 1] = s[i];
228 }
229 t[len - 1] = '#';
230 t[len] = '\0';
231
232 int* p = calloc(len, sizeof(int));
233 int center = 0, right = 0;
234 int max_len = 0, max_center = 0;
235
236 for (int i = 0; i < len; i++) {
237 if (i < right) {
238 int mirror = 2 * center - i;
239 p[i] = (p[mirror] < right - i) ? p[mirror] : right - i;
240 }
241
242 while (i - p[i] - 1 >= 0 && i + p[i] + 1 < len &&
243 t[i - p[i] - 1] == t[i + p[i] + 1]) {
244 p[i]++;
245 }
246
247 if (i + p[i] > right) {
248 center = i;
249 right = i + p[i];
250 }
251
252 if (p[i] > max_len) {
253 max_len = p[i];
254 max_center = i;
255 }
256 }
257
258 /* 원본 문자열에서 추출 */
259 int start = (max_center - max_len) / 2;
260 char* result = malloc(max_len + 1);
261 strncpy(result, s + start, max_len);
262 result[max_len] = '\0';
263
264 free(t);
265 free(p);
266 return result;
267}
268
269/* =============================================================================
270 * 5. 문자열 해싱
271 * ============================================================================= */
272
273typedef struct {
274 long long* prefix_hash;
275 long long* power;
276 int len;
277} StringHash;
278
279StringHash* create_string_hash(const char* s) {
280 int n = strlen(s);
281 StringHash* sh = malloc(sizeof(StringHash));
282 sh->prefix_hash = malloc((n + 1) * sizeof(long long));
283 sh->power = malloc((n + 1) * sizeof(long long));
284 sh->len = n;
285
286 sh->prefix_hash[0] = 0;
287 sh->power[0] = 1;
288
289 for (int i = 0; i < n; i++) {
290 sh->prefix_hash[i + 1] = (sh->prefix_hash[i] * BASE + s[i]) % MOD;
291 sh->power[i + 1] = (sh->power[i] * BASE) % MOD;
292 }
293
294 return sh;
295}
296
297long long get_hash(StringHash* sh, int l, int r) {
298 long long hash = (sh->prefix_hash[r + 1] -
299 sh->prefix_hash[l] * sh->power[r - l + 1] % MOD + MOD) % MOD;
300 return hash;
301}
302
303void free_string_hash(StringHash* sh) {
304 free(sh->prefix_hash);
305 free(sh->power);
306 free(sh);
307}
308
309/* =============================================================================
310 * 6. 유용한 문자열 함수들
311 * ============================================================================= */
312
313/* 접미사 배열 (간단 버전, O(n² log n)) */
314int* suffix_array_simple(const char* s) {
315 int n = strlen(s);
316 int* sa = malloc(n * sizeof(int));
317 for (int i = 0; i < n; i++) sa[i] = i;
318
319 /* 정렬 */
320 for (int i = 0; i < n - 1; i++) {
321 for (int j = i + 1; j < n; j++) {
322 if (strcmp(s + sa[i], s + sa[j]) > 0) {
323 int temp = sa[i];
324 sa[i] = sa[j];
325 sa[j] = temp;
326 }
327 }
328 }
329
330 return sa;
331}
332
333/* LCP 배열 (Kasai 알고리즘) */
334int* lcp_array(const char* s, int* sa) {
335 int n = strlen(s);
336 int* rank = malloc(n * sizeof(int));
337 int* lcp = malloc(n * sizeof(int));
338
339 for (int i = 0; i < n; i++) rank[sa[i]] = i;
340
341 int k = 0;
342 for (int i = 0; i < n; i++) {
343 if (rank[i] == 0) {
344 lcp[0] = 0;
345 continue;
346 }
347 int j = sa[rank[i] - 1];
348 while (i + k < n && j + k < n && s[i + k] == s[j + k]) k++;
349 lcp[rank[i]] = k;
350 if (k > 0) k--;
351 }
352
353 free(rank);
354 return lcp;
355}
356
357/* =============================================================================
358 * 테스트
359 * ============================================================================= */
360
361int main(void) {
362 printf("============================================================\n");
363 printf("문자열 알고리즘 예제\n");
364 printf("============================================================\n");
365
366 /* 1. KMP */
367 printf("\n[1] KMP 알고리즘\n");
368 const char* text1 = "ABABDABACDABABCABAB";
369 const char* pattern1 = "ABABCABAB";
370 int match_count;
371 int* matches = kmp_search(text1, pattern1, &match_count);
372 printf(" 텍스트: %s\n", text1);
373 printf(" 패턴: %s\n", pattern1);
374 printf(" 매칭 위치 (%d개): ", match_count);
375 for (int i = 0; i < match_count; i++) printf("%d ", matches[i]);
376 printf("\n");
377 free(matches);
378
379 /* 실패 함수 시각화 */
380 const char* pattern2 = "ABAABAB";
381 int* fail = compute_failure(pattern2);
382 printf(" 패턴 \"%s\"의 실패 함수: ", pattern2);
383 for (int i = 0; i < (int)strlen(pattern2); i++) printf("%d ", fail[i]);
384 printf("\n");
385 free(fail);
386
387 /* 2. Z-알고리즘 */
388 printf("\n[2] Z-알고리즘\n");
389 const char* str = "aabxaabxcaabxaabxay";
390 int* z = z_function(str);
391 printf(" 문자열: %s\n", str);
392 printf(" Z-배열: ");
393 for (int i = 0; i < (int)strlen(str); i++) printf("%d ", z[i]);
394 printf("\n");
395 free(z);
396
397 matches = z_search(text1, pattern1, &match_count);
398 printf(" Z-알고리즘 매칭 위치: ");
399 for (int i = 0; i < match_count; i++) printf("%d ", matches[i]);
400 printf("\n");
401 free(matches);
402
403 /* 3. Rabin-Karp */
404 printf("\n[3] Rabin-Karp\n");
405 const char* text2 = "abracadabra";
406 const char* pattern3 = "abra";
407 matches = rabin_karp_simple(text2, pattern3, &match_count);
408 printf(" 텍스트: %s\n", text2);
409 printf(" 패턴: %s\n", pattern3);
410 printf(" 매칭 위치: ");
411 for (int i = 0; i < match_count; i++) printf("%d ", matches[i]);
412 printf("\n");
413 free(matches);
414
415 /* 4. 매나커 */
416 printf("\n[4] 매나커 알고리즘\n");
417 const char* str2 = "babad";
418 char* palindrome = manacher(str2);
419 printf(" 문자열: %s\n", str2);
420 printf(" 가장 긴 팰린드롬: %s\n", palindrome);
421 free(palindrome);
422
423 const char* str3 = "forgeeksskeegfor";
424 palindrome = manacher(str3);
425 printf(" 문자열: %s\n", str3);
426 printf(" 가장 긴 팰린드롬: %s\n", palindrome);
427 free(palindrome);
428
429 /* 5. 문자열 해싱 */
430 printf("\n[5] 문자열 해싱\n");
431 const char* str4 = "abcabc";
432 StringHash* sh = create_string_hash(str4);
433 printf(" 문자열: %s\n", str4);
434 printf(" hash[0:2] = %lld\n", get_hash(sh, 0, 2));
435 printf(" hash[3:5] = %lld\n", get_hash(sh, 3, 5));
436 printf(" 동일 여부: %s\n",
437 get_hash(sh, 0, 2) == get_hash(sh, 3, 5) ? "예" : "아니오");
438 free_string_hash(sh);
439
440 /* 6. 접미사 배열 */
441 printf("\n[6] 접미사 배열\n");
442 const char* str5 = "banana";
443 int* sa = suffix_array_simple(str5);
444 printf(" 문자열: %s\n", str5);
445 printf(" 접미사 배열: ");
446 for (int i = 0; i < (int)strlen(str5); i++) printf("%d ", sa[i]);
447 printf("\n");
448 printf(" 정렬된 접미사:\n");
449 for (int i = 0; i < (int)strlen(str5); i++) {
450 printf(" %d: %s\n", sa[i], str5 + sa[i]);
451 }
452 free(sa);
453
454 /* 7. 복잡도 */
455 printf("\n[7] 복잡도\n");
456 printf(" | 알고리즘 | 전처리 | 검색 |\n");
457 printf(" |-----------------|-----------|----------|\n");
458 printf(" | KMP | O(m) | O(n) |\n");
459 printf(" | Rabin-Karp | O(m) | O(n+m)* |\n");
460 printf(" | Z-알고리즘 | O(n+m) | - |\n");
461 printf(" | 매나커 | - | O(n) |\n");
462 printf(" * 평균, 최악 O(nm)\n");
463
464 printf("\n============================================================\n");
465
466 return 0;
467}