22_string_algorithm.cpp

Download
cpp 536 lines 14.6 KB
  1/*
  2 * ๋ฌธ์ž์—ด ์•Œ๊ณ ๋ฆฌ์ฆ˜ (String Algorithms)
  3 * KMP, Rabin-Karp, Z-Algorithm, Suffix Array
  4 *
  5 * ๋ฌธ์ž์—ด ๊ฒ€์ƒ‰๊ณผ ์ฒ˜๋ฆฌ๋ฅผ ์œ„ํ•œ ์•Œ๊ณ ๋ฆฌ์ฆ˜์ž…๋‹ˆ๋‹ค.
  6 */
  7
  8#include <iostream>
  9#include <vector>
 10#include <string>
 11#include <algorithm>
 12#include <unordered_map>
 13
 14using namespace std;
 15
 16// =============================================================================
 17// 1. KMP ์•Œ๊ณ ๋ฆฌ์ฆ˜
 18// =============================================================================
 19
 20// ์‹คํŒจ ํ•จ์ˆ˜ (๋ถ€๋ถ„ ์ผ์น˜ ํ…Œ์ด๋ธ”)
 21vector<int> computeFailure(const string& pattern) {
 22    int m = pattern.length();
 23    vector<int> failure(m, 0);
 24
 25    int k = 0;
 26    for (int i = 1; i < m; i++) {
 27        while (k > 0 && pattern[k] != pattern[i]) {
 28            k = failure[k - 1];
 29        }
 30        if (pattern[k] == pattern[i]) {
 31            k++;
 32        }
 33        failure[i] = k;
 34    }
 35
 36    return failure;
 37}
 38
 39// KMP ๊ฒ€์ƒ‰
 40vector<int> kmpSearch(const string& text, const string& pattern) {
 41    vector<int> result;
 42    if (pattern.empty()) return result;
 43
 44    vector<int> failure = computeFailure(pattern);
 45    int n = text.length(), m = pattern.length();
 46    int j = 0;
 47
 48    for (int i = 0; i < n; i++) {
 49        while (j > 0 && text[i] != pattern[j]) {
 50            j = failure[j - 1];
 51        }
 52        if (text[i] == pattern[j]) {
 53            j++;
 54        }
 55        if (j == m) {
 56            result.push_back(i - m + 1);
 57            j = failure[j - 1];
 58        }
 59    }
 60
 61    return result;
 62}
 63
 64// =============================================================================
 65// 2. Rabin-Karp ์•Œ๊ณ ๋ฆฌ์ฆ˜
 66// =============================================================================
 67
 68class RabinKarp {
 69private:
 70    const long long BASE = 31;
 71    const long long MOD = 1e9 + 9;
 72
 73public:
 74    vector<int> search(const string& text, const string& pattern) {
 75        vector<int> result;
 76        int n = text.length(), m = pattern.length();
 77        if (m > n) return result;
 78
 79        // ํŒจํ„ด ํ•ด์‹œ
 80        long long patternHash = 0;
 81        long long textHash = 0;
 82        long long power = 1;
 83
 84        for (int i = 0; i < m; i++) {
 85            patternHash = (patternHash * BASE + pattern[i]) % MOD;
 86            textHash = (textHash * BASE + text[i]) % MOD;
 87            if (i < m - 1) {
 88                power = (power * BASE) % MOD;
 89            }
 90        }
 91
 92        for (int i = 0; i <= n - m; i++) {
 93            if (patternHash == textHash) {
 94                // ํ•ด์‹œ ์ถฉ๋Œ ํ™•์ธ
 95                if (text.substr(i, m) == pattern) {
 96                    result.push_back(i);
 97                }
 98            }
 99
100            if (i < n - m) {
101                textHash = (textHash - text[i] * power % MOD + MOD) % MOD;
102                textHash = (textHash * BASE + text[i + m]) % MOD;
103            }
104        }
105
106        return result;
107    }
108
109    // ๋กค๋ง ํ•ด์‹œ๋กœ ๋ถ€๋ถ„ ๋ฌธ์ž์—ด ๋น„๊ต
110    long long hash(const string& s) {
111        long long h = 0;
112        for (char c : s) {
113            h = (h * BASE + c) % MOD;
114        }
115        return h;
116    }
117};
118
119// =============================================================================
120// 3. Z-์•Œ๊ณ ๋ฆฌ์ฆ˜
121// =============================================================================
122
123vector<int> zFunction(const string& s) {
124    int n = s.length();
125    vector<int> z(n, 0);
126    int l = 0, r = 0;
127
128    for (int i = 1; i < n; i++) {
129        if (i < r) {
130            z[i] = min(r - i, z[i - l]);
131        }
132        while (i + z[i] < n && s[z[i]] == s[i + z[i]]) {
133            z[i]++;
134        }
135        if (i + z[i] > r) {
136            l = i;
137            r = i + z[i];
138        }
139    }
140
141    return z;
142}
143
144vector<int> zSearch(const string& text, const string& pattern) {
145    string combined = pattern + "$" + text;
146    vector<int> z = zFunction(combined);
147    vector<int> result;
148
149    int m = pattern.length();
150    for (int i = m + 1; i < (int)combined.length(); i++) {
151        if (z[i] == m) {
152            result.push_back(i - m - 1);
153        }
154    }
155
156    return result;
157}
158
159// =============================================================================
160// 4. ์ ‘๋ฏธ์‚ฌ ๋ฐฐ์—ด (Suffix Array)
161// =============================================================================
162
163vector<int> buildSuffixArray(const string& s) {
164    int n = s.length();
165    vector<int> sa(n), rank_(n), tmp(n);
166
167    for (int i = 0; i < n; i++) {
168        sa[i] = i;
169        rank_[i] = s[i];
170    }
171
172    for (int k = 1; k < n; k *= 2) {
173        auto cmp = [&](int a, int b) {
174            if (rank_[a] != rank_[b]) return rank_[a] < rank_[b];
175            int ra = (a + k < n) ? rank_[a + k] : -1;
176            int rb = (b + k < n) ? rank_[b + k] : -1;
177            return ra < rb;
178        };
179
180        sort(sa.begin(), sa.end(), cmp);
181
182        tmp[sa[0]] = 0;
183        for (int i = 1; i < n; i++) {
184            tmp[sa[i]] = tmp[sa[i-1]] + (cmp(sa[i-1], sa[i]) ? 1 : 0);
185        }
186        rank_ = tmp;
187    }
188
189    return sa;
190}
191
192// LCP ๋ฐฐ์—ด (Kasai's Algorithm)
193vector<int> buildLCPArray(const string& s, const vector<int>& sa) {
194    int n = s.length();
195    vector<int> rank_(n), lcp(n);
196
197    for (int i = 0; i < n; i++) {
198        rank_[sa[i]] = i;
199    }
200
201    int k = 0;
202    for (int i = 0; i < n; i++) {
203        if (rank_[i] == 0) {
204            k = 0;
205            continue;
206        }
207
208        int j = sa[rank_[i] - 1];
209        while (i + k < n && j + k < n && s[i + k] == s[j + k]) {
210            k++;
211        }
212        lcp[rank_[i]] = k;
213        if (k > 0) k--;
214    }
215
216    return lcp;
217}
218
219// =============================================================================
220// 5. Manacher ์•Œ๊ณ ๋ฆฌ์ฆ˜ (๊ฐ€์žฅ ๊ธด ํŒฐ๋ฆฐ๋“œ๋กฌ)
221// =============================================================================
222
223string manacher(const string& s) {
224    // ๋ฌธ์ž ์‚ฌ์ด์— # ์‚ฝ์ž…
225    string t = "#";
226    for (char c : s) {
227        t += c;
228        t += '#';
229    }
230
231    int n = t.length();
232    vector<int> p(n, 0);
233    int center = 0, right = 0;
234
235    for (int i = 0; i < n; i++) {
236        if (i < right) {
237            p[i] = min(right - i, p[2 * center - i]);
238        }
239        while (i - p[i] - 1 >= 0 && i + p[i] + 1 < n &&
240               t[i - p[i] - 1] == t[i + p[i] + 1]) {
241            p[i]++;
242        }
243        if (i + p[i] > right) {
244            center = i;
245            right = i + p[i];
246        }
247    }
248
249    // ๊ฐ€์žฅ ๊ธด ํŒฐ๋ฆฐ๋“œ๋กฌ ์ฐพ๊ธฐ
250    int maxLen = 0, maxCenter = 0;
251    for (int i = 0; i < n; i++) {
252        if (p[i] > maxLen) {
253            maxLen = p[i];
254            maxCenter = i;
255        }
256    }
257
258    int start = (maxCenter - maxLen) / 2;
259    return s.substr(start, maxLen);
260}
261
262// =============================================================================
263// 6. Trie (๊ฐ„๋‹จ ๋ฒ„์ „)
264// =============================================================================
265
266class SimpleTrie {
267private:
268    struct Node {
269        unordered_map<char, Node*> children;
270        bool isEnd = false;
271    };
272    Node* root;
273
274public:
275    SimpleTrie() : root(new Node()) {}
276
277    void insert(const string& word) {
278        Node* curr = root;
279        for (char c : word) {
280            if (!curr->children.count(c)) {
281                curr->children[c] = new Node();
282            }
283            curr = curr->children[c];
284        }
285        curr->isEnd = true;
286    }
287
288    bool search(const string& word) {
289        Node* curr = root;
290        for (char c : word) {
291            if (!curr->children.count(c)) return false;
292            curr = curr->children[c];
293        }
294        return curr->isEnd;
295    }
296
297    bool startsWith(const string& prefix) {
298        Node* curr = root;
299        for (char c : prefix) {
300            if (!curr->children.count(c)) return false;
301            curr = curr->children[c];
302        }
303        return true;
304    }
305};
306
307// =============================================================================
308// 7. ๋ฌธ์ž์—ด ํ•ด์‹ฑ
309// =============================================================================
310
311class StringHash {
312private:
313    const long long BASE = 31;
314    const long long MOD = 1e9 + 9;
315    vector<long long> hash_, power_;
316
317public:
318    StringHash(const string& s) {
319        int n = s.length();
320        hash_.resize(n + 1, 0);
321        power_.resize(n + 1, 1);
322
323        for (int i = 0; i < n; i++) {
324            hash_[i + 1] = (hash_[i] * BASE + s[i]) % MOD;
325            power_[i + 1] = (power_[i] * BASE) % MOD;
326        }
327    }
328
329    // [l, r) ๊ตฌ๊ฐ„ ํ•ด์‹œ
330    long long getHash(int l, int r) {
331        return (hash_[r] - hash_[l] * power_[r - l] % MOD + MOD) % MOD;
332    }
333};
334
335// =============================================================================
336// 8. ์•„ํ˜ธ-์ฝ”๋ผ์‹ (Aho-Corasick)
337// =============================================================================
338
339class AhoCorasick {
340private:
341    static const int ALPHABET = 26;
342    struct Node {
343        int children[ALPHABET];
344        int fail;
345        vector<int> output;  // ๋งค์นญ๋˜๋Š” ํŒจํ„ด ์ธ๋ฑ์Šค
346
347        Node() : fail(0) {
348            fill(children, children + ALPHABET, -1);
349        }
350    };
351
352    vector<Node> trie;
353
354    int charToIdx(char c) { return c - 'a'; }
355
356public:
357    AhoCorasick() {
358        trie.emplace_back();  // ๋ฃจํŠธ
359    }
360
361    void addPattern(const string& pattern, int idx) {
362        int curr = 0;
363        for (char c : pattern) {
364            int ci = charToIdx(c);
365            if (trie[curr].children[ci] == -1) {
366                trie[curr].children[ci] = trie.size();
367                trie.emplace_back();
368            }
369            curr = trie[curr].children[ci];
370        }
371        trie[curr].output.push_back(idx);
372    }
373
374    void build() {
375        queue<int> q;
376
377        for (int i = 0; i < ALPHABET; i++) {
378            if (trie[0].children[i] != -1) {
379                trie[trie[0].children[i]].fail = 0;
380                q.push(trie[0].children[i]);
381            }
382        }
383
384        while (!q.empty()) {
385            int curr = q.front();
386            q.pop();
387
388            for (int i = 0; i < ALPHABET; i++) {
389                int next = trie[curr].children[i];
390                if (next != -1) {
391                    int fail = trie[curr].fail;
392                    while (fail && trie[fail].children[i] == -1) {
393                        fail = trie[fail].fail;
394                    }
395                    trie[next].fail = (trie[fail].children[i] != -1 && trie[fail].children[i] != next)
396                                      ? trie[fail].children[i] : 0;
397
398                    // output ๋ณ‘ํ•ฉ
399                    for (int idx : trie[trie[next].fail].output) {
400                        trie[next].output.push_back(idx);
401                    }
402                    q.push(next);
403                }
404            }
405        }
406    }
407
408    vector<pair<int, int>> search(const string& text) {
409        vector<pair<int, int>> result;  // {์œ„์น˜, ํŒจํ„ด ์ธ๋ฑ์Šค}
410        int curr = 0;
411
412        for (int i = 0; i < (int)text.length(); i++) {
413            int ci = charToIdx(text[i]);
414
415            while (curr && trie[curr].children[ci] == -1) {
416                curr = trie[curr].fail;
417            }
418            curr = (trie[curr].children[ci] != -1) ? trie[curr].children[ci] : 0;
419
420            for (int idx : trie[curr].output) {
421                result.push_back({i, idx});
422            }
423        }
424
425        return result;
426    }
427};
428
429// =============================================================================
430// ํ…Œ์ŠคํŠธ
431// =============================================================================
432
433#include <queue>
434
435void printVector(const vector<int>& v) {
436    cout << "[";
437    for (size_t i = 0; i < v.size(); i++) {
438        cout << v[i];
439        if (i < v.size() - 1) cout << ", ";
440    }
441    cout << "]";
442}
443
444int main() {
445    cout << "============================================================" << endl;
446    cout << "๋ฌธ์ž์—ด ์•Œ๊ณ ๋ฆฌ์ฆ˜ ์˜ˆ์ œ" << endl;
447    cout << "============================================================" << endl;
448
449    string text = "ABABDABACDABABCABAB";
450    string pattern = "ABAB";
451
452    // 1. KMP
453    cout << "\n[1] KMP ์•Œ๊ณ ๋ฆฌ์ฆ˜" << endl;
454    cout << "    ํ…์ŠคํŠธ: \"" << text << "\"" << endl;
455    cout << "    ํŒจํ„ด: \"" << pattern << "\"" << endl;
456    auto kmpResult = kmpSearch(text, pattern);
457    cout << "    ๋ฐœ๊ฒฌ ์œ„์น˜: ";
458    printVector(kmpResult);
459    cout << endl;
460
461    auto failure = computeFailure(pattern);
462    cout << "    ์‹คํŒจ ํ•จ์ˆ˜: ";
463    printVector(failure);
464    cout << endl;
465
466    // 2. Rabin-Karp
467    cout << "\n[2] Rabin-Karp ์•Œ๊ณ ๋ฆฌ์ฆ˜" << endl;
468    RabinKarp rk;
469    auto rkResult = rk.search(text, pattern);
470    cout << "    ๋ฐœ๊ฒฌ ์œ„์น˜: ";
471    printVector(rkResult);
472    cout << endl;
473
474    // 3. Z-Algorithm
475    cout << "\n[3] Z-์•Œ๊ณ ๋ฆฌ์ฆ˜" << endl;
476    auto zResult = zSearch(text, pattern);
477    cout << "    ๋ฐœ๊ฒฌ ์œ„์น˜: ";
478    printVector(zResult);
479    cout << endl;
480
481    string zStr = "aabxaab";
482    auto z = zFunction(zStr);
483    cout << "    Z(\"aabxaab\"): ";
484    printVector(z);
485    cout << endl;
486
487    // 4. ์ ‘๋ฏธ์‚ฌ ๋ฐฐ์—ด
488    cout << "\n[4] ์ ‘๋ฏธ์‚ฌ ๋ฐฐ์—ด" << endl;
489    string s = "banana";
490    auto sa = buildSuffixArray(s);
491    cout << "    \"banana\" SA: ";
492    printVector(sa);
493    cout << endl;
494    auto lcp = buildLCPArray(s, sa);
495    cout << "    LCP: ";
496    printVector(lcp);
497    cout << endl;
498
499    // 5. Manacher
500    cout << "\n[5] Manacher ์•Œ๊ณ ๋ฆฌ์ฆ˜" << endl;
501    cout << "    \"babad\" ๊ฐ€์žฅ ๊ธด ํŒฐ๋ฆฐ๋“œ๋กฌ: \"" << manacher("babad") << "\"" << endl;
502    cout << "    \"cbbd\" ๊ฐ€์žฅ ๊ธด ํŒฐ๋ฆฐ๋“œ๋กฌ: \"" << manacher("cbbd") << "\"" << endl;
503
504    // 6. Trie
505    cout << "\n[6] Trie" << endl;
506    SimpleTrie trie;
507    trie.insert("apple");
508    trie.insert("app");
509    cout << "    insert: apple, app" << endl;
510    cout << "    search(apple): " << (trie.search("apple") ? "true" : "false") << endl;
511    cout << "    search(app): " << (trie.search("app") ? "true" : "false") << endl;
512    cout << "    startsWith(ap): " << (trie.startsWith("ap") ? "true" : "false") << endl;
513
514    // 7. ๋ฌธ์ž์—ด ํ•ด์‹ฑ
515    cout << "\n[7] ๋ฌธ์ž์—ด ํ•ด์‹ฑ" << endl;
516    StringHash sh("abcabc");
517    cout << "    \"abcabc\" ํ•ด์‹œ" << endl;
518    cout << "    hash[0,3) = hash[3,6): " <<
519         (sh.getHash(0, 3) == sh.getHash(3, 6) ? "true" : "false") << endl;
520
521    // 8. ๋ณต์žก๋„ ์š”์•ฝ
522    cout << "\n[8] ๋ณต์žก๋„ ์š”์•ฝ" << endl;
523    cout << "    | ์•Œ๊ณ ๋ฆฌ์ฆ˜       | ์ „์ฒ˜๋ฆฌ     | ๊ฒ€์ƒ‰        |" << endl;
524    cout << "    |----------------|------------|-------------|" << endl;
525    cout << "    | KMP            | O(m)       | O(n)        |" << endl;
526    cout << "    | Rabin-Karp     | O(m)       | O(n) ํ‰๊ท    |" << endl;
527    cout << "    | Z-Algorithm    | O(n)       | O(n)        |" << endl;
528    cout << "    | Suffix Array   | O(n log n) | O(m log n)  |" << endl;
529    cout << "    | Manacher       | -          | O(n)        |" << endl;
530    cout << "    | Aho-Corasick   | O(ฮฃm)      | O(n + ๊ฒฐ๊ณผ) |" << endl;
531
532    cout << "\n============================================================" << endl;
533
534    return 0;
535}