ํธ๋ผ์ด (Trie)
ํธ๋ผ์ด (Trie)¶
๊ฐ์¶
ํธ๋ผ์ด(Trie)๋ ๋ฌธ์์ด์ ํจ์จ์ ์ผ๋ก ์ ์ฅํ๊ณ ๊ฒ์ํ๋ ํธ๋ฆฌ ์๋ฃ๊ตฌ์กฐ์ ๋๋ค. ์ ๋์ฌ ํธ๋ฆฌ(Prefix Tree)๋ผ๊ณ ๋ ๋ถ๋ฆฌ๋ฉฐ, ์๋์์ฑ, ์ฌ์ ๊ฒ์ ๋ฑ์ ํ์ฉ๋ฉ๋๋ค.
๋ชฉ์ฐจ¶
1. ํธ๋ผ์ด ๊ฐ๋ ¶
1.1 ๊ตฌ์กฐ¶
๋จ์ด: "apple", "app", "application", "bat", "ball"
(root)
/ \
a b
| |
p a
| / \
p t l
/ \ | |
l l $ l
| | |
e i $
| |
$ c
|
a
|
t
|
i
|
o
|
n
|
$
$ = ๋จ์ด ๋ ํ์ (isEnd)
ํน์ง:
- ๋ฃจํธ๋ ๋น ๋
ธ๋
- ๊ฐ ๊ฐ์ ์ ๋ฌธ์ ํ๋๋ฅผ ๋ํ๋
- ๊ณตํต ์ ๋์ฌ๋ฅผ ๊ณต์
1.2 ์๊ฐ ๋ณต์ก๋¶
m = ๋ฌธ์์ด ๊ธธ์ด
โโโโโโโโโโโโโโโฌโโโโโโโโโโโโโโฌโโโโโโโโโโโโโโโโโ
โ ์ฐ์ฐ โ ์๊ฐ โ ์ค๋ช
โ
โโโโโโโโโโโโโโโผโโโโโโโโโโโโโโผโโโโโโโโโโโโโโโโโค
โ ์ฝ์
โ O(m) โ ๋ฌธ์์ด ๊ธธ์ด๋งํผ โ
โ ๊ฒ์ โ O(m) โ ๋ฌธ์์ด ๊ธธ์ด๋งํผ โ
โ ์ ๋์ฌ ๊ฒ์ โ O(m) โ ์ ๋์ฌ ๊ธธ์ด๋งํผ โ
โ ์ญ์ โ O(m) โ ๋ฌธ์์ด ๊ธธ์ด๋งํผ โ
โโโโโโโโโโโโโโโดโโโโโโโโโโโโโโดโโโโโโโโโโโโโโโโโ
๊ณต๊ฐ: O(์ด ๋ฌธ์ ์) ๋๋ O(n ร m ร ์ํ๋ฒณ ํฌ๊ธฐ)
1.3 ํธ๋ผ์ด vs ํด์์ ¶
โโโโโโโโโโโโโโโโโโฌโโโโโโโโโโโโโโฌโโโโโโโโโโโโโโ
โ ๊ธฐ์ค โ ํธ๋ผ์ด โ ํด์์
โ
โโโโโโโโโโโโโโโโโโผโโโโโโโโโโโโโโผโโโโโโโโโโโโโโค
โ ๊ฒ์ โ O(m) โ O(m) ํ๊ท โ
โ ์ ๋์ฌ ๊ฒ์ โ O(p) โ โ O(n ร m) โ โ
โ ์ ๋ ฌ๋ ์ํ โ ๊ฐ๋ฅ โ โ ๋ถ๊ฐ๋ฅ โ โ
โ ๊ณต๊ฐ ํจ์จ โ ๋ฎ์ โ ๋์ โ
โ ์๋์์ฑ โ ์ต์ โ โ ๋นํจ์จ โ โ
โโโโโโโโโโโโโโโโโโดโโโโโโโโโโโโโโดโโโโโโโโโโโโโโ
p = ์ ๋์ฌ ๊ธธ์ด, n = ๋จ์ด ์
2. ๊ธฐ๋ณธ ๊ตฌํ¶
2.1 ๋ฐฐ์ด ๊ธฐ๋ฐ (๊ณ ์ ์ํ๋ฒณ)¶
class TrieNode:
def __init__(self):
self.children = [None] * 26 # a-z
self.is_end = False
class Trie:
def __init__(self):
self.root = TrieNode()
def _char_to_index(self, c):
return ord(c) - ord('a')
def insert(self, word):
"""๋จ์ด ์ฝ์
- O(m)"""
node = self.root
for c in word:
idx = self._char_to_index(c)
if node.children[idx] is None:
node.children[idx] = TrieNode()
node = node.children[idx]
node.is_end = True
def search(self, word):
"""๋จ์ด ๊ฒ์ - O(m)"""
node = self.root
for c in word:
idx = self._char_to_index(c)
if node.children[idx] is None:
return False
node = node.children[idx]
return node.is_end
def starts_with(self, prefix):
"""์ ๋์ฌ ์กด์ฌ ์ฌ๋ถ - O(p)"""
node = self.root
for c in prefix:
idx = self._char_to_index(c)
if node.children[idx] is None:
return False
node = node.children[idx]
return True
# ์ฌ์ฉ ์์
trie = Trie()
trie.insert("apple")
trie.insert("app")
trie.insert("application")
print(trie.search("app")) # True
print(trie.search("appl")) # False
print(trie.starts_with("appl")) # True
2.2 ๋์ ๋๋ฆฌ ๊ธฐ๋ฐ (์ ์ฐํ ์ํ๋ฒณ)¶
class TrieNodeDict:
def __init__(self):
self.children = {} # char โ TrieNode
self.is_end = False
class TrieDict:
def __init__(self):
self.root = TrieNodeDict()
def insert(self, word):
node = self.root
for c in word:
if c not in node.children:
node.children[c] = TrieNodeDict()
node = node.children[c]
node.is_end = True
def search(self, word):
node = self.root
for c in word:
if c not in node.children:
return False
node = node.children[c]
return node.is_end
def starts_with(self, prefix):
node = self.root
for c in prefix:
if c not in node.children:
return False
node = node.children[c]
return True
def delete(self, word):
"""๋จ์ด ์ญ์ """
def _delete(node, word, depth):
if depth == len(word):
if not node.is_end:
return False # ๋จ์ด ์์
node.is_end = False
return len(node.children) == 0
c = word[depth]
if c not in node.children:
return False
should_delete = _delete(node.children[c], word, depth + 1)
if should_delete:
del node.children[c]
return len(node.children) == 0 and not node.is_end
return False
_delete(self.root, word, 0)
2.3 C++ ๊ตฌํ¶
#include <string>
#include <unordered_map>
using namespace std;
class TrieNode {
public:
unordered_map<char, TrieNode*> children;
bool isEnd = false;
};
class Trie {
private:
TrieNode* root;
public:
Trie() {
root = new TrieNode();
}
void insert(const string& word) {
TrieNode* node = root;
for (char c : word) {
if (node->children.find(c) == node->children.end()) {
node->children[c] = new TrieNode();
}
node = node->children[c];
}
node->isEnd = true;
}
bool search(const string& word) {
TrieNode* node = root;
for (char c : word) {
if (node->children.find(c) == node->children.end()) {
return false;
}
node = node->children[c];
}
return node->isEnd;
}
bool startsWith(const string& prefix) {
TrieNode* node = root;
for (char c : prefix) {
if (node->children.find(c) == node->children.end()) {
return false;
}
node = node->children[c];
}
return true;
}
};
3. ํธ๋ผ์ด ์ฐ์ฐ¶
3.1 ์๋์์ฑ (๋ชจ๋ ๋จ์ด ์ฐพ๊ธฐ)¶
class AutocompleteTrie(TrieDict):
def autocomplete(self, prefix):
"""์ ๋์ฌ๋ก ์์ํ๋ ๋ชจ๋ ๋จ์ด ๋ฐํ"""
node = self.root
for c in prefix:
if c not in node.children:
return []
node = node.children[c]
result = []
self._collect_words(node, prefix, result)
return result
def _collect_words(self, node, current, result):
if node.is_end:
result.append(current)
for c, child in node.children.items():
self._collect_words(child, current + c, result)
# ์ฌ์ฉ ์์
trie = AutocompleteTrie()
for word in ["apple", "app", "application", "apply", "banana"]:
trie.insert(word)
print(trie.autocomplete("app"))
# ['app', 'apple', 'application', 'apply']
3.2 ๋จ์ด ๊ฐ์ ์ธ๊ธฐ¶
class CountingTrie:
def __init__(self):
self.root = {}
def insert(self, word):
node = self.root
for c in word:
if c not in node:
node[c] = {'count': 0}
node = node[c]
node['count'] += 1 # ์ด ์ ๋์ฌ๋ฅผ ๊ฐ์ง ๋จ์ด ์
node['$'] = True # ๋จ์ด ๋
def count_prefix(self, prefix):
"""์ ๋์ฌ๋ก ์์ํ๋ ๋จ์ด ์"""
node = self.root
for c in prefix:
if c not in node:
return 0
node = node[c]
return node.get('count', 0)
def count_words(self):
"""์ ์ฒด ๋จ์ด ์"""
def dfs(node):
count = 1 if '$' in node else 0
for c, child in node.items():
if c not in ['count', '$']:
count += dfs(child)
return count
return dfs(self.root)
# ์ฌ์ฉ ์์
trie = CountingTrie()
for word in ["apple", "app", "application"]:
trie.insert(word)
print(trie.count_prefix("app")) # 3
print(trie.count_prefix("appl")) # 2
print(trie.count_words()) # 3
3.3 ๊ฐ์ฅ ๊ธด ๊ณตํต ์ ๋์ฌ¶
def longest_common_prefix(words):
"""๋ชจ๋ ๋จ์ด์ ๊ฐ์ฅ ๊ธด ๊ณตํต ์ ๋์ฌ"""
if not words:
return ""
trie = TrieDict()
for word in words:
trie.insert(word)
prefix = []
node = trie.root
while True:
# ์์์ด ํ๋์ด๊ณ , ๋จ์ด ๋์ด ์๋๋ฉด ๊ณ์
if len(node.children) != 1 or node.is_end:
break
c, child = next(iter(node.children.items()))
prefix.append(c)
node = child
return ''.join(prefix)
# ์์
words = ["flower", "flow", "flight"]
print(longest_common_prefix(words)) # "fl"
3.4 ์์ผ๋์นด๋ ๊ฒ์¶
class WildcardTrie(TrieDict):
def search_with_wildcard(self, word):
"""'.'์ ๋ชจ๋ ๋ฌธ์์ ๋งค์นญ"""
return self._search(self.root, word, 0)
def _search(self, node, word, idx):
if idx == len(word):
return node.is_end
c = word[idx]
if c == '.':
# ๋ชจ๋ ์์ ํ์
for child in node.children.values():
if self._search(child, word, idx + 1):
return True
return False
else:
if c not in node.children:
return False
return self._search(node.children[c], word, idx + 1)
# ์์
trie = WildcardTrie()
trie.insert("bad")
trie.insert("dad")
trie.insert("mad")
print(trie.search_with_wildcard("pad")) # False
print(trie.search_with_wildcard("bad")) # True
print(trie.search_with_wildcard(".ad")) # True
print(trie.search_with_wildcard("b..")) # True
4. XOR ํธ๋ผ์ด¶
4.1 ๊ฐ๋ ¶
XOR ํธ๋ผ์ด: ์ ์๋ฅผ ์ด์ง์๋ก ์ ์ฅํ๋ ํธ๋ผ์ด
- ์ต๋ XOR ์ ์ฐพ๊ธฐ์ ํ์ฉ
- ๊ฐ ๋นํธ๋ฅผ ๋์ ์๋ฆฌ๋ถํฐ ์ ์ฅ
์์: 3, 10, 5๋ฅผ ์ ์ฅ (4๋นํธ)
3 = 0011
10 = 1010
5 = 0101
root
/ \
0 1
/ \ \
0 1 0
| | |
1 0 1
| | |
1 1 0
โ โ โ
3 5 10
4.2 ์ต๋ XOR ์¶
class XORTrie:
def __init__(self, max_bits=30):
self.root = {}
self.max_bits = max_bits
def insert(self, num):
"""์ซ์ ์ฝ์
"""
node = self.root
for i in range(self.max_bits - 1, -1, -1):
bit = (num >> i) & 1
if bit not in node:
node[bit] = {}
node = node[bit]
def find_max_xor(self, num):
"""num๊ณผ XORํ์ ๋ ์ต๋๊ฐ์ ๋ง๋๋ ์์ XOR ๊ฒฐ๊ณผ"""
node = self.root
result = 0
for i in range(self.max_bits - 1, -1, -1):
bit = (num >> i) & 1
# ๋ฐ๋ ๋นํธ๋ฅผ ์ ํํ๋ฉด XOR์ด ์ต๋
opposite = 1 - bit
if opposite in node:
result |= (1 << i)
node = node[opposite]
elif bit in node:
node = node[bit]
else:
break
return result
def find_maximum_xor(nums):
"""๋ฐฐ์ด์์ ์ต๋ XOR ์ ์ฐพ๊ธฐ"""
if len(nums) < 2:
return 0
trie = XORTrie()
max_xor = 0
for num in nums:
trie.insert(num)
max_xor = max(max_xor, trie.find_max_xor(num))
return max_xor
# ์์
nums = [3, 10, 5, 25, 2, 8]
print(find_maximum_xor(nums)) # 28 (5 XOR 25 = 28)
4.3 ๊ตฌ๊ฐ XOR ์ต๋¶
class PersistentXORTrie:
"""
๊ตฌ๊ฐ [l, r]์์ k์ XOR ์ต๋๊ฐ
์คํ๋ผ์ธ ์ฟผ๋ฆฌ + Persistent Trie
"""
def __init__(self, max_bits=30):
self.max_bits = max_bits
self.nodes = [[0, 0]] # [left_child, right_child]
self.count = [0] # ๊ฐ ๋
ธ๋๋ฅผ ์ง๋๋ ์ซ์ ๊ฐ์
self.roots = [0] # ๋ฒ์ ๋ณ ๋ฃจํธ
def insert(self, prev_root, num):
"""์ด์ ๋ฒ์ ์์ num ์ถ๊ฐ"""
new_root = len(self.nodes)
self.nodes.append([0, 0])
self.count.append(0)
curr = new_root
prev = prev_root
for i in range(self.max_bits - 1, -1, -1):
bit = (num >> i) & 1
# ์ ๋
ธ๋ ์์ฑ
child = len(self.nodes)
self.nodes.append([0, 0])
self.count.append(0)
# ๋ฐ๋์ชฝ ์์์ ์ด์ ๋ฒ์ ์์ ๋ณต์ฌ
self.nodes[curr][1 - bit] = self.nodes[prev][1 - bit] if prev else 0
self.nodes[curr][bit] = child
# ์นด์ดํธ ์
๋ฐ์ดํธ
self.count[child] = (self.count[self.nodes[prev][bit]] if prev else 0) + 1
curr = child
prev = self.nodes[prev][bit] if prev else 0
self.roots.append(new_root)
return new_root
def query(self, l_root, r_root, num):
"""๋ฒ์ (l, r] ์ฌ์ด์์ num๊ณผ ์ต๋ XOR"""
result = 0
l_node = l_root
r_node = r_root
for i in range(self.max_bits - 1, -1, -1):
bit = (num >> i) & 1
opposite = 1 - bit
l_opp_count = self.count[self.nodes[l_node][opposite]] if l_node else 0
r_opp_count = self.count[self.nodes[r_node][opposite]] if r_node else 0
if r_opp_count - l_opp_count > 0:
result |= (1 << i)
l_node = self.nodes[l_node][opposite] if l_node else 0
r_node = self.nodes[r_node][opposite]
else:
l_node = self.nodes[l_node][bit] if l_node else 0
r_node = self.nodes[r_node][bit]
return result
5. ํ์ฉ ๋ฌธ์ ¶
5.1 ๋จ์ด ์ถ๊ฐ ๋ฐ ๊ฒ์¶
# LeetCode 211. Design Add and Search Words Data Structure
class WordDictionary:
def __init__(self):
self.trie = WildcardTrie()
def addWord(self, word):
self.trie.insert(word)
def search(self, word):
return self.trie.search_with_wildcard(word)
5.2 ๋จ์ด ๋์ฒดํ๊ธฐ¶
def replace_words(dictionary, sentence):
"""
๋ฌธ์ฅ์ ๊ฐ ๋จ์ด๋ฅผ ์ฌ์ ์ ์ ๋์ฌ๋ก ๋์ฒด
dictionary = ["cat", "bat", "rat"]
sentence = "the cattle was rattled by the battery"
โ "the cat was rat by the bat"
"""
trie = TrieDict()
for word in dictionary:
trie.insert(word)
def find_root(word):
node = trie.root
for i, c in enumerate(word):
if c not in node.children:
return word
node = node.children[c]
if node.is_end:
return word[:i + 1]
return word
words = sentence.split()
return ' '.join(find_root(word) for word in words)
# ์์
dictionary = ["cat", "bat", "rat"]
sentence = "the cattle was rattled by the battery"
print(replace_words(dictionary, sentence))
# "the cat was rat by the bat"
5.3 ์ฐ๊ฒฐ ๋จ์ด¶
def find_all_concatenated_words(words):
"""
๋ค๋ฅธ ๋จ์ด๋ค์ ์ด์ด๋ถ์ฌ ๋ง๋ค ์ ์๋ ๋จ์ด ์ฐพ๊ธฐ
"""
trie = TrieDict()
for word in words:
if word:
trie.insert(word)
def can_form(word, start, count):
if start == len(word):
return count >= 2
node = trie.root
for i in range(start, len(word)):
c = word[i]
if c not in node.children:
return False
node = node.children[c]
if node.is_end:
if can_form(word, i + 1, count + 1):
return True
return False
result = []
for word in words:
if word and can_form(word, 0, 0):
result.append(word)
return result
# ์์
words = ["cat", "cats", "catsdogcats", "dog", "dogcatsdog",
"hippopotamuses", "rat", "ratcatdogcat"]
print(find_all_concatenated_words(words))
# ["catsdogcats", "dogcatsdog", "ratcatdogcat"]
5.4 Suffix Trie (์ ๋ฏธ์ฌ ํธ๋ผ์ด)¶
class SuffixTrie:
"""๋ฌธ์์ด์ ๋ชจ๋ ์ ๋ฏธ์ฌ๋ฅผ ์ ์ฅํ๋ ํธ๋ผ์ด"""
def __init__(self, text):
self.root = {}
self._build(text)
def _build(self, text):
for i in range(len(text)):
node = self.root
for c in text[i:]:
if c not in node:
node[c] = {}
node = node[c]
node['$'] = i # ์์ ์ธ๋ฑ์ค ์ ์ฅ
def search(self, pattern):
"""ํจํด์ด ์กด์ฌํ๋ ๋ชจ๋ ์์ ์์น"""
node = self.root
for c in pattern:
if c not in node:
return []
node = node[c]
# ์ด ๋
ธ๋ ์๋์ ๋ชจ๋ $ ์์ง
result = []
self._collect(node, result)
return result
def _collect(self, node, result):
if '$' in node:
result.append(node['$'])
for c, child in node.items():
if c != '$':
self._collect(child, result)
# ์์
st = SuffixTrie("banana")
print(st.search("ana")) # [1, 3]
print(st.search("nan")) # [2]
6. ์ฐ์ต ๋ฌธ์ ¶
์ถ์ฒ ๋ฌธ์ ¶
| ๋์ด๋ | ๋ฌธ์ | ํ๋ซํผ | ์ ํ |
|---|---|---|---|
| โญโญ | Implement Trie | LeetCode | ๊ธฐ๋ณธ ๊ตฌํ |
| โญโญโญ | ์ ํ๋ฒํธ ๋ชฉ๋ก | ๋ฐฑ์ค | ์ ๋์ฌ |
| โญโญโญ | Design Search Autocomplete | LeetCode | ์๋์์ฑ |
| โญโญโญ | Add and Search Word | LeetCode | ์์ผ๋์นด๋ |
| โญโญโญโญ | Maximum XOR of Two Numbers | LeetCode | XOR ํธ๋ผ์ด |
| โญโญโญโญ | ๋ฌธ์์ด ์งํฉ | ๋ฐฑ์ค | ๊ฒ์ |
์๊ฐ/๊ณต๊ฐ ๋ณต์ก๋¶
โโโโโโโโโโโโโโโโโโโฌโโโโโโโโโโโโโโฌโโโโโโโโโโโโโโโโโโโโโโ
โ ์ฐ์ฐ โ ์๊ฐ โ ๊ณต๊ฐ โ
โโโโโโโโโโโโโโโโโโโผโโโโโโโโโโโโโโผโโโโโโโโโโโโโโโโโโโโโโค
โ ์ฝ์
โ O(m) โ O(m ร ์ํ๋ฒณ) โ
โ ๊ฒ์ โ O(m) โ - โ
โ ์ ๋์ฌ ๊ฒ์ โ O(p) โ - โ
โ ์๋์์ฑ โ O(p + k) โ O(๊ฒฐ๊ณผ ๊ธธ์ด) โ
โ XOR ์ต๋ โ O(log MAX) โ O(n ร log MAX) โ
โโโโโโโโโโโโโโโโโโโดโโโโโโโโโโโโโโดโโโโโโโโโโโโโโโโโโโโโโ
m = ๋จ์ด ๊ธธ์ด, p = ์ ๋์ฌ ๊ธธ์ด, k = ๊ฒฐ๊ณผ ๊ฐ์
๋ค์ ๋จ๊ณ¶
- 12_Graph_Basics.md - ๊ทธ๋ํ ๊ธฐ์ด