11_trie.py

Download
python 479 lines 14.2 KB
  1"""
  2트라이 (Trie / Prefix Tree)
  3Trie Data Structure
  4
  5λ¬Έμžμ—΄ 검색과 접두사 기반 연산을 μœ„ν•œ 트라이λ₯Ό κ΅¬ν˜„ν•©λ‹ˆλ‹€.
  6"""
  7
  8from typing import List, Optional, Dict
  9from collections import defaultdict
 10
 11
 12# =============================================================================
 13# 1. κΈ°λ³Έ 트라이 (λ”•μ…”λ„ˆλ¦¬ 기반)
 14# =============================================================================
 15
 16class TrieNode:
 17    """트라이 λ…Έλ“œ"""
 18
 19    def __init__(self):
 20        self.children: Dict[str, 'TrieNode'] = {}
 21        self.is_end: bool = False
 22        self.count: int = 0  # 이 μ ‘λ‘μ‚¬λ‘œ μ‹œμž‘ν•˜λŠ” 단어 수
 23
 24
 25class Trie:
 26    """
 27    트라이 (접두사 트리)
 28    - μ‚½μž…: O(m), m = 단어 길이
 29    - 검색: O(m)
 30    - 접두사 검색: O(m)
 31    """
 32
 33    def __init__(self):
 34        self.root = TrieNode()
 35
 36    def insert(self, word: str) -> None:
 37        """단어 μ‚½μž… - O(m)"""
 38        node = self.root
 39
 40        for char in word:
 41            if char not in node.children:
 42                node.children[char] = TrieNode()
 43            node = node.children[char]
 44            node.count += 1
 45
 46        node.is_end = True
 47
 48    def search(self, word: str) -> bool:
 49        """단어 쑴재 μ—¬λΆ€ 검색 - O(m)"""
 50        node = self._find_node(word)
 51        return node is not None and node.is_end
 52
 53    def starts_with(self, prefix: str) -> bool:
 54        """μ ‘λ‘μ‚¬λ‘œ μ‹œμž‘ν•˜λŠ” 단어 쑴재 μ—¬λΆ€ - O(m)"""
 55        return self._find_node(prefix) is not None
 56
 57    def count_prefix(self, prefix: str) -> int:
 58        """μ ‘λ‘μ‚¬λ‘œ μ‹œμž‘ν•˜λŠ” 단어 개수 - O(m)"""
 59        node = self._find_node(prefix)
 60        return node.count if node else 0
 61
 62    def _find_node(self, prefix: str) -> Optional[TrieNode]:
 63        """접두사에 ν•΄λ‹Ήν•˜λŠ” λ…Έλ“œ μ°ΎκΈ°"""
 64        node = self.root
 65
 66        for char in prefix:
 67            if char not in node.children:
 68                return None
 69            node = node.children[char]
 70
 71        return node
 72
 73    def delete(self, word: str) -> bool:
 74        """단어 μ‚­μ œ - O(m)"""
 75
 76        def _delete(node: TrieNode, word: str, depth: int) -> bool:
 77            if depth == len(word):
 78                if not node.is_end:
 79                    return False
 80                node.is_end = False
 81                return len(node.children) == 0
 82
 83            char = word[depth]
 84            if char not in node.children:
 85                return False
 86
 87            should_delete = _delete(node.children[char], word, depth + 1)
 88
 89            if should_delete:
 90                del node.children[char]
 91                return len(node.children) == 0 and not node.is_end
 92
 93            node.children[char].count -= 1
 94            return False
 95
 96        return _delete(self.root, word, 0)
 97
 98
 99# =============================================================================
100# 2. μžλ™μ™„μ„± (Autocomplete)
101# =============================================================================
102
103class AutocompleteSystem:
104    """μžλ™μ™„μ„± μ‹œμŠ€ν…œ"""
105
106    def __init__(self):
107        self.root = TrieNode()
108
109    def add_word(self, word: str, weight: int = 1) -> None:
110        """단어 μΆ”κ°€ (κ°€μ€‘μΉ˜ 포함)"""
111        node = self.root
112
113        for char in word:
114            if char not in node.children:
115                node.children[char] = TrieNode()
116            node = node.children[char]
117
118        node.is_end = True
119        node.count = weight  # μ—¬κΈ°μ„œ countλŠ” λΉˆλ„/κ°€μ€‘μΉ˜
120
121    def autocomplete(self, prefix: str, limit: int = 5) -> List[str]:
122        """μ ‘λ‘μ‚¬λ‘œ μ‹œμž‘ν•˜λŠ” 단어 λͺ©λ‘ - O(m + k), k = κ²°κ³Ό 수"""
123        node = self.root
124
125        # 접두사 λ…Έλ“œ μ°ΎκΈ°
126        for char in prefix:
127            if char not in node.children:
128                return []
129            node = node.children[char]
130
131        # DFS둜 λͺ¨λ“  단어 μˆ˜μ§‘
132        results = []
133        self._collect_words(node, prefix, results)
134
135        # λΉˆλ„μˆœ μ •λ ¬ ν›„ μƒμœ„ limit개
136        results.sort(key=lambda x: x[1], reverse=True)
137        return [word for word, _ in results[:limit]]
138
139    def _collect_words(self, node: TrieNode, prefix: str, results: List) -> None:
140        """λ…Έλ“œμ—μ„œ λͺ¨λ“  μ™„μ„± 단어 μˆ˜μ§‘"""
141        if node.is_end:
142            results.append((prefix, node.count))
143
144        for char, child in node.children.items():
145            self._collect_words(child, prefix + char, results)
146
147
148# =============================================================================
149# 3. μ™€μΌλ“œμΉ΄λ“œ 검색
150# =============================================================================
151
152class WildcardTrie:
153    """μ™€μΌλ“œμΉ΄λ“œ '.'λ₯Ό μ§€μ›ν•˜λŠ” 트라이"""
154
155    def __init__(self):
156        self.root = TrieNode()
157
158    def add_word(self, word: str) -> None:
159        node = self.root
160        for char in word:
161            if char not in node.children:
162                node.children[char] = TrieNode()
163            node = node.children[char]
164        node.is_end = True
165
166    def search(self, word: str) -> bool:
167        """'.'λŠ” μž„μ˜μ˜ ν•œ λ¬Έμžμ™€ λ§€μΉ­"""
168        return self._search(self.root, word, 0)
169
170    def _search(self, node: TrieNode, word: str, index: int) -> bool:
171        if index == len(word):
172            return node.is_end
173
174        char = word[index]
175
176        if char == '.':
177            # λͺ¨λ“  μžμ‹ λ…Έλ“œ μ‹œλ„
178            for child in node.children.values():
179                if self._search(child, word, index + 1):
180                    return True
181            return False
182        else:
183            if char not in node.children:
184                return False
185            return self._search(node.children[char], word, index + 1)
186
187
188# =============================================================================
189# 4. 졜μž₯ 곡톡 접두사 (LCP)
190# =============================================================================
191
192def longest_common_prefix(words: List[str]) -> str:
193    """
194    단어 λ°°μ—΄μ˜ 졜μž₯ 곡톡 접두사
195    트라이 ν™œμš©
196    """
197    if not words:
198        return ""
199
200    # 트라이 ꡬ성
201    trie = Trie()
202    for word in words:
203        trie.insert(word)
204
205    # λ£¨νŠΈμ—μ„œ λΆ„κΈ°μ κΉŒμ§€ 이동
206    prefix = []
207    node = trie.root
208
209    while node:
210        # μžμ‹μ΄ ν•˜λ‚˜μ΄κ³  μ’…λ£Œ λ…Έλ“œκ°€ 아닐 λ•Œλ§Œ 계속
211        if len(node.children) == 1 and not node.is_end:
212            char = list(node.children.keys())[0]
213            prefix.append(char)
214            node = node.children[char]
215        else:
216            break
217
218    return ''.join(prefix)
219
220
221# =============================================================================
222# 5. 단어 사전 (Word Dictionary)
223# =============================================================================
224
225class WordDictionary:
226    """
227    단어 μΆ”κ°€/검색 사전
228    - μ •ν™•ν•œ 검색
229    - 접두사 검색
230    - μ™€μΌλ“œμΉ΄λ“œ 검색
231    """
232
233    def __init__(self):
234        self.trie = Trie()
235        self.wildcard_trie = WildcardTrie()
236
237    def add_word(self, word: str) -> None:
238        self.trie.insert(word)
239        self.wildcard_trie.add_word(word)
240
241    def search_exact(self, word: str) -> bool:
242        """μ •ν™•ν•œ 단어 검색"""
243        return self.trie.search(word)
244
245    def search_prefix(self, prefix: str) -> bool:
246        """접두사 검색"""
247        return self.trie.starts_with(prefix)
248
249    def search_pattern(self, pattern: str) -> bool:
250        """νŒ¨ν„΄ 검색 (. = μž„μ˜ 문자)"""
251        return self.wildcard_trie.search(pattern)
252
253
254# =============================================================================
255# 6. XOR 트라이 (μ΅œλŒ€ XOR 쌍 μ°ΎκΈ°)
256# =============================================================================
257
258class XORTrie:
259    """
260    λΉ„νŠΈ 트라이둜 μ΅œλŒ€ XOR 쌍 μ°ΎκΈ°
261    각 숫자λ₯Ό μ΄μ§„μˆ˜λ‘œ μ €μž₯
262    """
263
264    def __init__(self, max_bits: int = 31):
265        self.root = {}
266        self.max_bits = max_bits
267
268    def insert(self, num: int) -> None:
269        """숫자 μ‚½μž… - O(max_bits)"""
270        node = self.root
271
272        for i in range(self.max_bits, -1, -1):
273            bit = (num >> i) & 1
274            if bit not in node:
275                node[bit] = {}
276            node = node[bit]
277
278    def find_max_xor(self, num: int) -> int:
279        """numκ³Ό XORν–ˆμ„ λ•Œ μ΅œλŒ€κ°€ λ˜λŠ” κ°’ λ°˜ν™˜ - O(max_bits)"""
280        node = self.root
281        result = 0
282
283        for i in range(self.max_bits, -1, -1):
284            bit = (num >> i) & 1
285            # XOR μ΅œλŒ€ν™”λ₯Ό μœ„ν•΄ λ°˜λŒ€ λΉ„νŠΈ 선택
286            toggle_bit = 1 - bit
287
288            if toggle_bit in node:
289                result |= (1 << i)
290                node = node[toggle_bit]
291            elif bit in node:
292                node = node[bit]
293            else:
294                break
295
296        return result
297
298
299def find_maximum_xor(nums: List[int]) -> int:
300    """
301    λ°°μ—΄μ—μ„œ 두 수의 XOR μ΅œλŒ“κ°’
302    μ‹œκ°„λ³΅μž‘λ„: O(n * max_bits)
303    """
304    if len(nums) < 2:
305        return 0
306
307    xor_trie = XORTrie()
308    max_xor = 0
309
310    for num in nums:
311        xor_trie.insert(num)
312        max_xor = max(max_xor, xor_trie.find_max_xor(num))
313
314    return max_xor
315
316
317# =============================================================================
318# 7. μ‹€μ „ 문제: 단어 검색 II (Word Search II)
319# =============================================================================
320
321def find_words(board: List[List[str]], words: List[str]) -> List[str]:
322    """
323    2D λ³΄λ“œμ—μ„œ 단어 λͺ©λ‘ μ°ΎκΈ°
324    트라이 + DFS
325    """
326    # 트라이 ꡬ성
327    trie = Trie()
328    for word in words:
329        trie.insert(word)
330
331    rows, cols = len(board), len(board[0])
332    result = set()
333
334    def dfs(r: int, c: int, node: TrieNode, path: str) -> None:
335        if node.is_end:
336            result.add(path)
337
338        if r < 0 or r >= rows or c < 0 or c >= cols:
339            return
340
341        char = board[r][c]
342        if char == '#' or char not in node.children:
343            return
344
345        board[r][c] = '#'  # λ°©λ¬Έ ν‘œμ‹œ
346        next_node = node.children[char]
347
348        for dr, dc in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
349            dfs(r + dr, c + dc, next_node, path + char)
350
351        board[r][c] = char  # 볡원
352
353    for r in range(rows):
354        for c in range(cols):
355            dfs(r, c, trie.root, "")
356
357    return list(result)
358
359
360# =============================================================================
361# 8. 접두사/접미사 λ™μ‹œ 검색
362# =============================================================================
363
364class PrefixSuffixTrie:
365    """접두사와 접미사λ₯Ό λ™μ‹œμ— κ²€μƒ‰ν•˜λŠ” 트라이"""
366
367    def __init__(self, words: List[str]):
368        self.trie = {}
369
370        for idx, word in enumerate(words):
371            # λͺ¨λ“  접미사#단어 ν˜•νƒœλ‘œ μ €μž₯
372            key = word + '#' + word
373            for i in range(len(word) + 1):
374                node = self.trie
375                for char in key[i:]:
376                    if char not in node:
377                        node[char] = {'idx': -1}
378                    node = node[char]
379                    node['idx'] = idx  # κ°€μž₯ 큰 인덱슀 μ €μž₯
380
381    def search(self, prefix: str, suffix: str) -> int:
382        """접두사와 접미사 λͺ¨λ‘ λ§Œμ‘±ν•˜λŠ” λ‹¨μ–΄μ˜ 인덱슀"""
383        key = suffix + '#' + prefix
384        node = self.trie
385
386        for char in key:
387            if char not in node:
388                return -1
389            node = node[char]
390
391        return node.get('idx', -1)
392
393
394# =============================================================================
395# ν…ŒμŠ€νŠΈ
396# =============================================================================
397
398def main():
399    print("=" * 60)
400    print("트라이 (Trie / Prefix Tree) 예제")
401    print("=" * 60)
402
403    # 1. 기본 트라이
404    print("\n[1] 기본 트라이")
405    trie = Trie()
406    words = ["apple", "app", "application", "banana", "band"]
407    for word in words:
408        trie.insert(word)
409    print(f"    μ‚½μž…: {words}")
410    print(f"    search('app'): {trie.search('app')}")
411    print(f"    search('application'): {trie.search('application')}")
412    print(f"    search('apply'): {trie.search('apply')}")
413    print(f"    starts_with('app'): {trie.starts_with('app')}")
414    print(f"    count_prefix('app'): {trie.count_prefix('app')}")
415
416    # 2. μžλ™μ™„μ„±
417    print("\n[2] μžλ™μ™„μ„±")
418    auto = AutocompleteSystem()
419    search_words = [("hello", 5), ("help", 3), ("helicopter", 2), ("hero", 4), ("world", 1)]
420    for word, weight in search_words:
421        auto.add_word(word, weight)
422    print(f"    단어/λΉˆλ„: {search_words}")
423    print(f"    'hel' μžλ™μ™„μ„±: {auto.autocomplete('hel')}")
424    print(f"    'he' μžλ™μ™„μ„±: {auto.autocomplete('he')}")
425
426    # 3. μ™€μΌλ“œμΉ΄λ“œ 검색
427    print("\n[3] μ™€μΌλ“œμΉ΄λ“œ 검색")
428    wild = WildcardTrie()
429    for word in ["bad", "dad", "mad", "pad"]:
430        wild.add_word(word)
431    print(f"    단어: ['bad', 'dad', 'mad', 'pad']")
432    print(f"    search('.ad'): {wild.search('.ad')}")
433    print(f"    search('b..'): {wild.search('b..')}")
434    print(f"    search('..d'): {wild.search('..d')}")
435    print(f"    search('b.d'): {wild.search('b.d')}")
436
437    # 4. 졜μž₯ 곡톡 접두사
438    print("\n[4] 졜μž₯ 곡톡 접두사")
439    words_lcp = ["flower", "flow", "flight"]
440    lcp = longest_common_prefix(words_lcp)
441    print(f"    단어: {words_lcp}")
442    print(f"    LCP: '{lcp}'")
443
444    # 5. XOR 트라이
445    print("\n[5] μ΅œλŒ€ XOR (XOR 트라이)")
446    nums = [3, 10, 5, 25, 2, 8]
447    max_xor = find_maximum_xor(nums)
448    print(f"    λ°°μ—΄: {nums}")
449    print(f"    μ΅œλŒ€ XOR: {max_xor}")
450    print(f"    (5 XOR 25 = {5 ^ 25})")
451
452    # 6. 단어 사전
453    print("\n[6] 단어 사전")
454    dictionary = WordDictionary()
455    for word in ["hello", "help", "world"]:
456        dictionary.add_word(word)
457    print(f"    단어: ['hello', 'help', 'world']")
458    print(f"    exact 'help': {dictionary.search_exact('help')}")
459    print(f"    prefix 'hel': {dictionary.search_prefix('hel')}")
460    print(f"    pattern 'h.l.o': {dictionary.search_pattern('h.l.o')}")
461
462    # 7. 단어 μ‚­μ œ
463    print("\n[7] 단어 μ‚­μ œ")
464    trie2 = Trie()
465    for word in ["apple", "app"]:
466        trie2.insert(word)
467    print(f"    μ‚½μž…: ['apple', 'app']")
468    print(f"    search('app'): {trie2.search('app')}")
469    trie2.delete("app")
470    print(f"    delete('app') ν›„")
471    print(f"    search('app'): {trie2.search('app')}")
472    print(f"    search('apple'): {trie2.search('apple')}")
473
474    print("\n" + "=" * 60)
475
476
477if __name__ == "__main__":
478    main()