Trie
Trie¶
Overview¶
A Trie is a tree data structure for efficiently storing and searching strings. Also called a Prefix Tree, it is used for autocomplete, dictionary search, and more.
Table of Contents¶
1. Trie Concepts¶
1.1 Structure¶
Words: "apple", "app", "application", "bat", "ball"
(root)
/ \
a b
| |
p a
| / \
p t l
/ \ | |
l l $ l
| | |
e i $
| |
$ c
|
a
|
t
|
i
|
o
|
n
|
$
$ = End of word marker (isEnd)
Features:
- Root is an empty node
- Each edge represents one character
- Common prefixes are shared
1.2 Time Complexity¶
m = string length
βββββββββββββββ¬ββββββββββββββ¬βββββββββββββββββ
β Operation β Time β Description β
βββββββββββββββΌββββββββββββββΌβββββββββββββββββ€
β Insert β O(m) β String length β
β Search β O(m) β String length β
β Prefix β O(m) β Prefix length β
β Delete β O(m) β String length β
βββββββββββββββ΄ββββββββββββββ΄βββββββββββββββββ
Space: O(total characters) or O(n Γ m Γ alphabet size)
1.3 Trie vs HashSet¶
ββββββββββββββββββ¬ββββββββββββββ¬ββββββββββββββ
β Criteria β Trie β HashSet β
ββββββββββββββββββΌββββββββββββββΌββββββββββββββ€
β Search β O(m) β O(m) avg β
β Prefix Search β O(p) β β O(n Γ m) β β
β Sorted Iter β Possible β β Not possibleββ
β Space β Lower β Higher β
β Autocomplete β Optimal β β Inefficient ββ
ββββββββββββββββββ΄ββββββββββββββ΄ββββββββββββββ
p = prefix length, n = number of words
2. Basic Implementation¶
2.1 Array-based (Fixed Alphabet)¶
class TrieNode:
def __init__(self):
self.children = [None] * 26 # a-z
self.is_end = False
class Trie:
def __init__(self):
self.root = TrieNode()
def _char_to_index(self, c):
return ord(c) - ord('a')
def insert(self, word):
"""Insert word - O(m)"""
node = self.root
for c in word:
idx = self._char_to_index(c)
if node.children[idx] is None:
node.children[idx] = TrieNode()
node = node.children[idx]
node.is_end = True
def search(self, word):
"""Search word - O(m)"""
node = self.root
for c in word:
idx = self._char_to_index(c)
if node.children[idx] is None:
return False
node = node.children[idx]
return node.is_end
def starts_with(self, prefix):
"""Check if prefix exists - O(p)"""
node = self.root
for c in prefix:
idx = self._char_to_index(c)
if node.children[idx] is None:
return False
node = node.children[idx]
return True
# Usage example
trie = Trie()
trie.insert("apple")
trie.insert("app")
trie.insert("application")
print(trie.search("app")) # True
print(trie.search("appl")) # False
print(trie.starts_with("appl")) # True
2.2 Dictionary-based (Flexible Alphabet)¶
class TrieNodeDict:
def __init__(self):
self.children = {} # char β TrieNode
self.is_end = False
class TrieDict:
def __init__(self):
self.root = TrieNodeDict()
def insert(self, word):
node = self.root
for c in word:
if c not in node.children:
node.children[c] = TrieNodeDict()
node = node.children[c]
node.is_end = True
def search(self, word):
node = self.root
for c in word:
if c not in node.children:
return False
node = node.children[c]
return node.is_end
def starts_with(self, prefix):
node = self.root
for c in prefix:
if c not in node.children:
return False
node = node.children[c]
return True
def delete(self, word):
"""Delete word"""
def _delete(node, word, depth):
if depth == len(word):
if not node.is_end:
return False # Word not found
node.is_end = False
return len(node.children) == 0
c = word[depth]
if c not in node.children:
return False
should_delete = _delete(node.children[c], word, depth + 1)
if should_delete:
del node.children[c]
return len(node.children) == 0 and not node.is_end
return False
_delete(self.root, word, 0)
2.3 C++ Implementation¶
#include <string>
#include <unordered_map>
using namespace std;
class TrieNode {
public:
unordered_map<char, TrieNode*> children;
bool isEnd = false;
};
class Trie {
private:
TrieNode* root;
public:
Trie() {
root = new TrieNode();
}
void insert(const string& word) {
TrieNode* node = root;
for (char c : word) {
if (node->children.find(c) == node->children.end()) {
node->children[c] = new TrieNode();
}
node = node->children[c];
}
node->isEnd = true;
}
bool search(const string& word) {
TrieNode* node = root;
for (char c : word) {
if (node->children.find(c) == node->children.end()) {
return false;
}
node = node->children[c];
}
return node->isEnd;
}
bool startsWith(const string& prefix) {
TrieNode* node = root;
for (char c : prefix) {
if (node->children.find(c) == node->children.end()) {
return false;
}
node = node->children[c];
}
return true;
}
};
3. Trie Operations¶
3.1 Autocomplete (Find All Words)¶
class AutocompleteTrie(TrieDict):
def autocomplete(self, prefix):
"""Return all words starting with prefix"""
node = self.root
for c in prefix:
if c not in node.children:
return []
node = node.children[c]
result = []
self._collect_words(node, prefix, result)
return result
def _collect_words(self, node, current, result):
if node.is_end:
result.append(current)
for c, child in node.children.items():
self._collect_words(child, current + c, result)
# Usage example
trie = AutocompleteTrie()
for word in ["apple", "app", "application", "apply", "banana"]:
trie.insert(word)
print(trie.autocomplete("app"))
# ['app', 'apple', 'application', 'apply']
3.2 Counting Words¶
class CountingTrie:
def __init__(self):
self.root = {}
def insert(self, word):
node = self.root
for c in word:
if c not in node:
node[c] = {'count': 0}
node = node[c]
node['count'] += 1 # Number of words with this prefix
node['$'] = True # End of word
def count_prefix(self, prefix):
"""Count words starting with prefix"""
node = self.root
for c in prefix:
if c not in node:
return 0
node = node[c]
return node.get('count', 0)
def count_words(self):
"""Count total words"""
def dfs(node):
count = 1 if '$' in node else 0
for c, child in node.items():
if c not in ['count', '$']:
count += dfs(child)
return count
return dfs(self.root)
# Usage example
trie = CountingTrie()
for word in ["apple", "app", "application"]:
trie.insert(word)
print(trie.count_prefix("app")) # 3
print(trie.count_prefix("appl")) # 2
print(trie.count_words()) # 3
3.3 Longest Common Prefix¶
def longest_common_prefix(words):
"""Longest common prefix of all words"""
if not words:
return ""
trie = TrieDict()
for word in words:
trie.insert(word)
prefix = []
node = trie.root
while True:
# Continue if only one child and not end of word
if len(node.children) != 1 or node.is_end:
break
c, child = next(iter(node.children.items()))
prefix.append(c)
node = child
return ''.join(prefix)
# Example
words = ["flower", "flow", "flight"]
print(longest_common_prefix(words)) # "fl"
3.4 Wildcard Search¶
class WildcardTrie(TrieDict):
def search_with_wildcard(self, word):
"""'.' matches any character"""
return self._search(self.root, word, 0)
def _search(self, node, word, idx):
if idx == len(word):
return node.is_end
c = word[idx]
if c == '.':
# Search all children
for child in node.children.values():
if self._search(child, word, idx + 1):
return True
return False
else:
if c not in node.children:
return False
return self._search(node.children[c], word, idx + 1)
# Example
trie = WildcardTrie()
trie.insert("bad")
trie.insert("dad")
trie.insert("mad")
print(trie.search_with_wildcard("pad")) # False
print(trie.search_with_wildcard("bad")) # True
print(trie.search_with_wildcard(".ad")) # True
print(trie.search_with_wildcard("b..")) # True
4. XOR Trie¶
4.1 Concept¶
XOR Trie: Trie storing integers as binary numbers
- Used for finding maximum XOR pair
- Store each bit from most significant
Example: Store 3, 10, 5 (4 bits)
3 = 0011
10 = 1010
5 = 0101
root
/ \
0 1
/ \ \
0 1 0
| | |
1 0 1
| | |
1 1 0
β β β
3 5 10
4.2 Maximum XOR Pair¶
class XORTrie:
def __init__(self, max_bits=30):
self.root = {}
self.max_bits = max_bits
def insert(self, num):
"""Insert number"""
node = self.root
for i in range(self.max_bits - 1, -1, -1):
bit = (num >> i) & 1
if bit not in node:
node[bit] = {}
node = node[bit]
def find_max_xor(self, num):
"""Find XOR result with number that maximizes XOR with num"""
node = self.root
result = 0
for i in range(self.max_bits - 1, -1, -1):
bit = (num >> i) & 1
# Choosing opposite bit maximizes XOR
opposite = 1 - bit
if opposite in node:
result |= (1 << i)
node = node[opposite]
elif bit in node:
node = node[bit]
else:
break
return result
def find_maximum_xor(nums):
"""Find maximum XOR pair in array"""
if len(nums) < 2:
return 0
trie = XORTrie()
max_xor = 0
for num in nums:
trie.insert(num)
max_xor = max(max_xor, trie.find_max_xor(num))
return max_xor
# Example
nums = [3, 10, 5, 25, 2, 8]
print(find_maximum_xor(nums)) # 28 (5 XOR 25 = 28)
4.3 Range XOR Maximum¶
class PersistentXORTrie:
"""
Maximum XOR with k in range [l, r]
Offline queries + Persistent Trie
"""
def __init__(self, max_bits=30):
self.max_bits = max_bits
self.nodes = [[0, 0]] # [left_child, right_child]
self.count = [0] # Number of numbers passing each node
self.roots = [0] # Root per version
def insert(self, prev_root, num):
"""Add num from previous version"""
new_root = len(self.nodes)
self.nodes.append([0, 0])
self.count.append(0)
curr = new_root
prev = prev_root
for i in range(self.max_bits - 1, -1, -1):
bit = (num >> i) & 1
# Create new node
child = len(self.nodes)
self.nodes.append([0, 0])
self.count.append(0)
# Copy opposite child from previous version
self.nodes[curr][1 - bit] = self.nodes[prev][1 - bit] if prev else 0
self.nodes[curr][bit] = child
# Update count
self.count[child] = (self.count[self.nodes[prev][bit]] if prev else 0) + 1
curr = child
prev = self.nodes[prev][bit] if prev else 0
self.roots.append(new_root)
return new_root
def query(self, l_root, r_root, num):
"""Maximum XOR with num between versions (l, r]"""
result = 0
l_node = l_root
r_node = r_root
for i in range(self.max_bits - 1, -1, -1):
bit = (num >> i) & 1
opposite = 1 - bit
l_opp_count = self.count[self.nodes[l_node][opposite]] if l_node else 0
r_opp_count = self.count[self.nodes[r_node][opposite]] if r_node else 0
if r_opp_count - l_opp_count > 0:
result |= (1 << i)
l_node = self.nodes[l_node][opposite] if l_node else 0
r_node = self.nodes[r_node][opposite]
else:
l_node = self.nodes[l_node][bit] if l_node else 0
r_node = self.nodes[r_node][bit]
return result
5. Application Problems¶
5.1 Add and Search Word¶
# LeetCode 211. Design Add and Search Words Data Structure
class WordDictionary:
def __init__(self):
self.trie = WildcardTrie()
def addWord(self, word):
self.trie.insert(word)
def search(self, word):
return self.trie.search_with_wildcard(word)
5.2 Replace Words¶
def replace_words(dictionary, sentence):
"""
Replace each word in sentence with dictionary prefix
dictionary = ["cat", "bat", "rat"]
sentence = "the cattle was rattled by the battery"
β "the cat was rat by the bat"
"""
trie = TrieDict()
for word in dictionary:
trie.insert(word)
def find_root(word):
node = trie.root
for i, c in enumerate(word):
if c not in node.children:
return word
node = node.children[c]
if node.is_end:
return word[:i + 1]
return word
words = sentence.split()
return ' '.join(find_root(word) for word in words)
# Example
dictionary = ["cat", "bat", "rat"]
sentence = "the cattle was rattled by the battery"
print(replace_words(dictionary, sentence))
# "the cat was rat by the bat"
5.3 Concatenated Words¶
def find_all_concatenated_words(words):
"""
Find words that can be formed by concatenating other words
"""
trie = TrieDict()
for word in words:
if word:
trie.insert(word)
def can_form(word, start, count):
if start == len(word):
return count >= 2
node = trie.root
for i in range(start, len(word)):
c = word[i]
if c not in node.children:
return False
node = node.children[c]
if node.is_end:
if can_form(word, i + 1, count + 1):
return True
return False
result = []
for word in words:
if word and can_form(word, 0, 0):
result.append(word)
return result
# Example
words = ["cat", "cats", "catsdogcats", "dog", "dogcatsdog",
"hippopotamuses", "rat", "ratcatdogcat"]
print(find_all_concatenated_words(words))
# ["catsdogcats", "dogcatsdog", "ratcatdogcat"]
5.4 Suffix Trie¶
class SuffixTrie:
"""Trie storing all suffixes of a string"""
def __init__(self, text):
self.root = {}
self._build(text)
def _build(self, text):
for i in range(len(text)):
node = self.root
for c in text[i:]:
if c not in node:
node[c] = {}
node = node[c]
node['$'] = i # Store starting index
def search(self, pattern):
"""All starting positions where pattern exists"""
node = self.root
for c in pattern:
if c not in node:
return []
node = node[c]
# Collect all $ under this node
result = []
self._collect(node, result)
return result
def _collect(self, node, result):
if '$' in node:
result.append(node['$'])
for c, child in node.items():
if c != '$':
self._collect(child, result)
# Example
st = SuffixTrie("banana")
print(st.search("ana")) # [1, 3]
print(st.search("nan")) # [2]
6. Practice Problems¶
Recommended Problems¶
| Difficulty | Problem | Platform | Type |
|---|---|---|---|
| ββ | Implement Trie | LeetCode | Basic implementation |
| βββ | Phone List | BOJ | Prefix |
| βββ | Design Search Autocomplete | LeetCode | Autocomplete |
| βββ | Add and Search Word | LeetCode | Wildcard |
| ββββ | Maximum XOR of Two Numbers | LeetCode | XOR Trie |
| ββββ | String Set | BOJ | Search |
Time/Space Complexity¶
βββββββββββββββββββ¬ββββββββββββββ¬ββββββββββββββββββββββ
β Operation β Time β Space β
βββββββββββββββββββΌββββββββββββββΌββββββββββββββββββββββ€
β Insert β O(m) β O(m Γ alphabet) β
β Search β O(m) β - β
β Prefix Search β O(p) β - β
β Autocomplete β O(p + k) β O(result length) β
β XOR Maximum β O(log MAX) β O(n Γ log MAX) β
βββββββββββββββββββ΄ββββββββββββββ΄ββββββββββββββββββββββ
m = word length, p = prefix length, k = result count
Next Steps¶
- 12_Graph_Basics.md - Graph Basics