04_hash_table.py

Download
python 473 lines 14.2 KB
  1"""
  2ํ•ด์‹œ ํ…Œ์ด๋ธ” (Hash Table)
  3Hash Table Implementation
  4
  5ํ•ด์‹œ ํ•จ์ˆ˜์™€ ์ถฉ๋Œ ํ•ด๊ฒฐ ๋ฐฉ๋ฒ•์„ ๊ตฌํ˜„ํ•ฉ๋‹ˆ๋‹ค.
  6"""
  7
  8from typing import Any, List, Optional, Tuple
  9from collections import OrderedDict
 10
 11
 12# =============================================================================
 13# 1. ํ•ด์‹œ ํ•จ์ˆ˜ (Hash Functions)
 14# =============================================================================
 15
 16def hash_division(key: int, table_size: int) -> int:
 17    """๋‚˜๋ˆ—์…ˆ ํ•ด์‹œ ํ•จ์ˆ˜ - O(1)"""
 18    return key % table_size
 19
 20
 21def hash_multiplication(key: int, table_size: int, A: float = 0.6180339887) -> int:
 22    """๊ณฑ์…ˆ ํ•ด์‹œ ํ•จ์ˆ˜ (Knuth ์ œ์•ˆ A๊ฐ’) - O(1)"""
 23    return int(table_size * ((key * A) % 1))
 24
 25
 26def hash_string(s: str, table_size: int) -> int:
 27    """๋ฌธ์ž์—ด ํ•ด์‹œ ํ•จ์ˆ˜ (๋‹คํ•ญ์‹ ๋กค๋ง ํ•ด์‹œ) - O(n)"""
 28    hash_val = 0
 29    base = 31
 30    for char in s:
 31        hash_val = (hash_val * base + ord(char)) % table_size
 32    return hash_val
 33
 34
 35# =============================================================================
 36# 2. ์ฒด์ด๋‹ (Chaining)
 37# =============================================================================
 38
 39class HashTableChaining:
 40    """์ฒด์ด๋‹ ๋ฐฉ์‹ ํ•ด์‹œ ํ…Œ์ด๋ธ”"""
 41
 42    def __init__(self, size: int = 10):
 43        self.size = size
 44        self.table: List[List[Tuple[Any, Any]]] = [[] for _ in range(size)]
 45        self.count = 0
 46
 47    def _hash(self, key: Any) -> int:
 48        """ํ•ด์‹œ ํ•จ์ˆ˜"""
 49        if isinstance(key, int):
 50            return key % self.size
 51        return hash(key) % self.size
 52
 53    def put(self, key: Any, value: Any) -> None:
 54        """ํ‚ค-๊ฐ’ ์‚ฝ์ž… - ํ‰๊ท  O(1), ์ตœ์•… O(n)"""
 55        index = self._hash(key)
 56
 57        # ๊ธฐ์กด ํ‚ค ์—…๋ฐ์ดํŠธ
 58        for i, (k, v) in enumerate(self.table[index]):
 59            if k == key:
 60                self.table[index][i] = (key, value)
 61                return
 62
 63        # ์ƒˆ ํ‚ค ์ถ”๊ฐ€
 64        self.table[index].append((key, value))
 65        self.count += 1
 66
 67    def get(self, key: Any) -> Optional[Any]:
 68        """ํ‚ค๋กœ ๊ฐ’ ์กฐํšŒ - ํ‰๊ท  O(1), ์ตœ์•… O(n)"""
 69        index = self._hash(key)
 70
 71        for k, v in self.table[index]:
 72            if k == key:
 73                return v
 74        return None
 75
 76    def remove(self, key: Any) -> bool:
 77        """ํ‚ค-๊ฐ’ ์‚ญ์ œ - ํ‰๊ท  O(1), ์ตœ์•… O(n)"""
 78        index = self._hash(key)
 79
 80        for i, (k, v) in enumerate(self.table[index]):
 81            if k == key:
 82                self.table[index].pop(i)
 83                self.count -= 1
 84                return True
 85        return False
 86
 87    def __contains__(self, key: Any) -> bool:
 88        return self.get(key) is not None
 89
 90    def load_factor(self) -> float:
 91        return self.count / self.size
 92
 93
 94# =============================================================================
 95# 3. ์˜คํ”ˆ ์–ด๋“œ๋ ˆ์‹ฑ - ์„ ํ˜• ํƒ์‚ฌ (Linear Probing)
 96# =============================================================================
 97
 98class HashTableLinearProbing:
 99    """์„ ํ˜• ํƒ์‚ฌ ๋ฐฉ์‹ ํ•ด์‹œ ํ…Œ์ด๋ธ”"""
100
101    DELETED = object()  # ์‚ญ์ œ ๋งˆ์ปค
102
103    def __init__(self, size: int = 10):
104        self.size = size
105        self.keys: List[Any] = [None] * size
106        self.values: List[Any] = [None] * size
107        self.count = 0
108
109    def _hash(self, key: Any) -> int:
110        if isinstance(key, int):
111            return key % self.size
112        return hash(key) % self.size
113
114    def _probe(self, key: Any, for_insert: bool = False) -> int:
115        """์„ ํ˜• ํƒ์‚ฌ๋กœ ์Šฌ๋กฏ ์ฐพ๊ธฐ"""
116        index = self._hash(key)
117        first_deleted = -1
118
119        for i in range(self.size):
120            probe_index = (index + i) % self.size
121
122            if self.keys[probe_index] is None:
123                if for_insert and first_deleted != -1:
124                    return first_deleted
125                return probe_index
126
127            if self.keys[probe_index] is self.DELETED:
128                if first_deleted == -1:
129                    first_deleted = probe_index
130                continue
131
132            if self.keys[probe_index] == key:
133                return probe_index
134
135        return first_deleted if first_deleted != -1 else -1
136
137    def put(self, key: Any, value: Any) -> bool:
138        """ํ‚ค-๊ฐ’ ์‚ฝ์ž… - ํ‰๊ท  O(1), ์ตœ์•… O(n)"""
139        if self.count >= self.size * 0.7:  # ๋กœ๋“œ ํŒฉํ„ฐ 70%
140            self._resize()
141
142        index = self._probe(key, for_insert=True)
143        if index == -1:
144            return False
145
146        if self.keys[index] is None or self.keys[index] is self.DELETED:
147            self.count += 1
148
149        self.keys[index] = key
150        self.values[index] = value
151        return True
152
153    def get(self, key: Any) -> Optional[Any]:
154        """ํ‚ค๋กœ ๊ฐ’ ์กฐํšŒ - ํ‰๊ท  O(1), ์ตœ์•… O(n)"""
155        index = self._probe(key)
156
157        if index != -1 and self.keys[index] not in (None, self.DELETED):
158            return self.values[index]
159        return None
160
161    def remove(self, key: Any) -> bool:
162        """ํ‚ค-๊ฐ’ ์‚ญ์ œ (์‚ญ์ œ ๋งˆ์ปค ์‚ฌ์šฉ) - ํ‰๊ท  O(1), ์ตœ์•… O(n)"""
163        index = self._probe(key)
164
165        if index != -1 and self.keys[index] not in (None, self.DELETED):
166            self.keys[index] = self.DELETED
167            self.values[index] = None
168            self.count -= 1
169            return True
170        return False
171
172    def _resize(self) -> None:
173        """ํ…Œ์ด๋ธ” ํฌ๊ธฐ 2๋ฐฐ ํ™•์žฅ"""
174        old_keys = self.keys
175        old_values = self.values
176
177        self.size *= 2
178        self.keys = [None] * self.size
179        self.values = [None] * self.size
180        self.count = 0
181
182        for k, v in zip(old_keys, old_values):
183            if k is not None and k is not self.DELETED:
184                self.put(k, v)
185
186
187# =============================================================================
188# 4. ์˜คํ”ˆ ์–ด๋“œ๋ ˆ์‹ฑ - ์ด์ค‘ ํ•ด์‹ฑ (Double Hashing)
189# =============================================================================
190
191class HashTableDoubleHashing:
192    """์ด์ค‘ ํ•ด์‹ฑ ๋ฐฉ์‹ ํ•ด์‹œ ํ…Œ์ด๋ธ”"""
193
194    DELETED = object()
195
196    def __init__(self, size: int = 11):  # ์†Œ์ˆ˜ ๊ถŒ์žฅ
197        self.size = size
198        self.keys: List[Any] = [None] * size
199        self.values: List[Any] = [None] * size
200        self.count = 0
201
202    def _hash1(self, key: Any) -> int:
203        if isinstance(key, int):
204            return key % self.size
205        return hash(key) % self.size
206
207    def _hash2(self, key: Any) -> int:
208        """๋‘ ๋ฒˆ์งธ ํ•ด์‹œ ํ•จ์ˆ˜ (0์ด ์•„๋‹Œ ๊ฐ’ ๋ฐ˜ํ™˜)"""
209        if isinstance(key, int):
210            return 7 - (key % 7)  # 7์€ size๋ณด๋‹ค ์ž‘์€ ์†Œ์ˆ˜
211        return 7 - (hash(key) % 7)
212
213    def _probe(self, key: Any, for_insert: bool = False) -> int:
214        """์ด์ค‘ ํ•ด์‹ฑ์œผ๋กœ ์Šฌ๋กฏ ์ฐพ๊ธฐ"""
215        h1 = self._hash1(key)
216        h2 = self._hash2(key)
217        first_deleted = -1
218
219        for i in range(self.size):
220            index = (h1 + i * h2) % self.size
221
222            if self.keys[index] is None:
223                if for_insert and first_deleted != -1:
224                    return first_deleted
225                return index
226
227            if self.keys[index] is self.DELETED:
228                if first_deleted == -1:
229                    first_deleted = index
230                continue
231
232            if self.keys[index] == key:
233                return index
234
235        return first_deleted if first_deleted != -1 else -1
236
237    def put(self, key: Any, value: Any) -> bool:
238        index = self._probe(key, for_insert=True)
239        if index == -1:
240            return False
241
242        if self.keys[index] is None or self.keys[index] is self.DELETED:
243            self.count += 1
244
245        self.keys[index] = key
246        self.values[index] = value
247        return True
248
249    def get(self, key: Any) -> Optional[Any]:
250        index = self._probe(key)
251        if index != -1 and self.keys[index] not in (None, self.DELETED):
252            return self.values[index]
253        return None
254
255    def remove(self, key: Any) -> bool:
256        index = self._probe(key)
257        if index != -1 and self.keys[index] not in (None, self.DELETED):
258            self.keys[index] = self.DELETED
259            self.values[index] = None
260            self.count -= 1
261            return True
262        return False
263
264
265# =============================================================================
266# 5. LRU ์บ์‹œ (Least Recently Used Cache)
267# =============================================================================
268
269class LRUCache:
270    """
271    LRU ์บ์‹œ ๊ตฌํ˜„
272    OrderedDict๋ฅผ ์‚ฌ์šฉํ•œ O(1) get/put
273    """
274
275    def __init__(self, capacity: int):
276        self.capacity = capacity
277        self.cache = OrderedDict()
278
279    def get(self, key: Any) -> Optional[Any]:
280        """ํ‚ค ์กฐํšŒ ๋ฐ ์ตœ๊ทผ ์‚ฌ์šฉ์œผ๋กœ ์ด๋™ - O(1)"""
281        if key not in self.cache:
282            return None
283
284        # ์ตœ๊ทผ ์‚ฌ์šฉ์œผ๋กœ ์ด๋™
285        self.cache.move_to_end(key)
286        return self.cache[key]
287
288    def put(self, key: Any, value: Any) -> None:
289        """ํ‚ค-๊ฐ’ ์‚ฝ์ž… - O(1)"""
290        if key in self.cache:
291            self.cache.move_to_end(key)
292        else:
293            if len(self.cache) >= self.capacity:
294                # ๊ฐ€์žฅ ์˜ค๋ž˜๋œ ํ•ญ๋ชฉ ์ œ๊ฑฐ
295                self.cache.popitem(last=False)
296
297        self.cache[key] = value
298
299    def __str__(self) -> str:
300        return str(list(self.cache.items()))
301
302
303# =============================================================================
304# 6. ์‹ค์ „ ๋ฌธ์ œ: Two Sum
305# =============================================================================
306
307def two_sum(nums: List[int], target: int) -> Optional[Tuple[int, int]]:
308    """
309    ํ•ฉ์ด target์ธ ๋‘ ์ˆ˜์˜ ์ธ๋ฑ์Šค ์ฐพ๊ธฐ
310    ์‹œ๊ฐ„๋ณต์žก๋„: O(n), ๊ณต๊ฐ„๋ณต์žก๋„: O(n)
311    """
312    seen = {}  # ๊ฐ’ -> ์ธ๋ฑ์Šค
313
314    for i, num in enumerate(nums):
315        complement = target - num
316        if complement in seen:
317            return (seen[complement], i)
318        seen[num] = i
319
320    return None
321
322
323# =============================================================================
324# 7. ์‹ค์ „ ๋ฌธ์ œ: ๋นˆ๋„ ์„ธ๊ธฐ
325# =============================================================================
326
327def frequency_count(arr: List[Any]) -> dict:
328    """
329    ๋ฐฐ์—ด ์š”์†Œ์˜ ๋นˆ๋„ ๊ณ„์‚ฐ
330    ์‹œ๊ฐ„๋ณต์žก๋„: O(n), ๊ณต๊ฐ„๋ณต์žก๋„: O(k) (k = ๊ณ ์œ  ์š”์†Œ ์ˆ˜)
331    """
332    freq = {}
333    for item in arr:
334        freq[item] = freq.get(item, 0) + 1
335    return freq
336
337
338def most_frequent(arr: List[Any]) -> Optional[Any]:
339    """๊ฐ€์žฅ ๋นˆ๋ฒˆํ•œ ์š”์†Œ ์ฐพ๊ธฐ"""
340    freq = frequency_count(arr)
341    if not freq:
342        return None
343    return max(freq, key=freq.get)
344
345
346# =============================================================================
347# 8. ์‹ค์ „ ๋ฌธ์ œ: ๋ถ€๋ถ„ ๋ฐฐ์—ด ํ•ฉ์ด K์ธ ๊ฐœ์ˆ˜
348# =============================================================================
349
350def subarray_sum_count(nums: List[int], k: int) -> int:
351    """
352    ํ•ฉ์ด k์ธ ์—ฐ์† ๋ถ€๋ถ„ ๋ฐฐ์—ด ๊ฐœ์ˆ˜
353    ํ”„๋ฆฌํ”ฝ์Šค ํ•ฉ + ํ•ด์‹œ๋งต ํ™œ์šฉ
354    ์‹œ๊ฐ„๋ณต์žก๋„: O(n), ๊ณต๊ฐ„๋ณต์žก๋„: O(n)
355    """
356    count = 0
357    prefix_sum = 0
358    prefix_count = {0: 1}  # ํ”„๋ฆฌํ”ฝ์Šค ํ•ฉ -> ๋“ฑ์žฅ ํšŸ์ˆ˜
359
360    for num in nums:
361        prefix_sum += num
362
363        # prefix_sum - k๊ฐ€ ์ด์ „์— ์žˆ์—ˆ๋‹ค๋ฉด, ๊ทธ ์‚ฌ์ด๊ฐ€ ํ•ฉ k
364        if prefix_sum - k in prefix_count:
365            count += prefix_count[prefix_sum - k]
366
367        prefix_count[prefix_sum] = prefix_count.get(prefix_sum, 0) + 1
368
369    return count
370
371
372# =============================================================================
373# 9. ์‹ค์ „ ๋ฌธ์ œ: ์•„๋‚˜๊ทธ๋žจ ๊ทธ๋ฃน
374# =============================================================================
375
376def group_anagrams(strs: List[str]) -> List[List[str]]:
377    """
378    ์•„๋‚˜๊ทธ๋žจ๋ผ๋ฆฌ ๊ทธ๋ฃนํ™”
379    ์‹œ๊ฐ„๋ณต์žก๋„: O(n * k log k), n=๋ฌธ์ž์—ด ์ˆ˜, k=์ตœ๋Œ€ ๋ฌธ์ž์—ด ๊ธธ์ด
380    """
381    groups = {}
382
383    for s in strs:
384        # ์ •๋ ฌ๋œ ๋ฌธ์ž์—ด์„ ํ‚ค๋กœ ์‚ฌ์šฉ
385        key = ''.join(sorted(s))
386        if key not in groups:
387            groups[key] = []
388        groups[key].append(s)
389
390    return list(groups.values())
391
392
393# =============================================================================
394# ํ…Œ์ŠคํŠธ
395# =============================================================================
396
397def main():
398    print("=" * 60)
399    print("ํ•ด์‹œ ํ…Œ์ด๋ธ” (Hash Table) ์˜ˆ์ œ")
400    print("=" * 60)
401
402    # 1. ํ•ด์‹œ ํ•จ์ˆ˜ ํ…Œ์ŠคํŠธ
403    print("\n[1] ํ•ด์‹œ ํ•จ์ˆ˜")
404    print(f"    hash_division(42, 10) = {hash_division(42, 10)}")
405    print(f"    hash_multiplication(42, 10) = {hash_multiplication(42, 10)}")
406    print(f"    hash_string('hello', 10) = {hash_string('hello', 10)}")
407
408    # 2. ์ฒด์ด๋‹ ํ…Œ์ŠคํŠธ
409    print("\n[2] ์ฒด์ด๋‹ ๋ฐฉ์‹")
410    ht_chain = HashTableChaining(5)
411    for i, name in enumerate(['Alice', 'Bob', 'Charlie', 'David', 'Eve']):
412        ht_chain.put(name, i * 10)
413    print(f"    get('Charlie') = {ht_chain.get('Charlie')}")
414    print(f"    'Bob' in table = {'Bob' in ht_chain}")
415    print(f"    load_factor = {ht_chain.load_factor():.2f}")
416
417    # 3. ์„ ํ˜• ํƒ์‚ฌ ํ…Œ์ŠคํŠธ
418    print("\n[3] ์„ ํ˜• ํƒ์‚ฌ ๋ฐฉ์‹")
419    ht_linear = HashTableLinearProbing(10)
420    for i in range(7):
421        ht_linear.put(i * 5, f"value_{i}")
422    print(f"    get(10) = {ht_linear.get(10)}")
423    ht_linear.remove(10)
424    print(f"    get(10) after remove = {ht_linear.get(10)}")
425
426    # 4. LRU ์บ์‹œ ํ…Œ์ŠคํŠธ
427    print("\n[4] LRU ์บ์‹œ")
428    cache = LRUCache(3)
429    cache.put('a', 1)
430    cache.put('b', 2)
431    cache.put('c', 3)
432    print(f"    ์บ์‹œ: {cache}")
433    cache.get('a')  # 'a'๋ฅผ ์ตœ๊ทผ์œผ๋กœ ์ด๋™
434    cache.put('d', 4)  # 'b' ์ œ๊ฑฐ๋จ
435    print(f"    get('a'), put('d') ํ›„: {cache}")
436
437    # 5. Two Sum
438    print("\n[5] Two Sum")
439    nums = [2, 7, 11, 15]
440    target = 9
441    result = two_sum(nums, target)
442    print(f"    nums = {nums}, target = {target}")
443    print(f"    ๊ฒฐ๊ณผ: ์ธ๋ฑ์Šค {result}")
444
445    # 6. ๋นˆ๋„ ์„ธ๊ธฐ
446    print("\n[6] ๋นˆ๋„ ์„ธ๊ธฐ")
447    arr = ['a', 'b', 'a', 'c', 'a', 'b']
448    freq = frequency_count(arr)
449    print(f"    ๋ฐฐ์—ด: {arr}")
450    print(f"    ๋นˆ๋„: {freq}")
451    print(f"    ์ตœ๋นˆ๊ฐ’: {most_frequent(arr)}")
452
453    # 7. ๋ถ€๋ถ„ ๋ฐฐ์—ด ํ•ฉ
454    print("\n[7] ๋ถ€๋ถ„ ๋ฐฐ์—ด ํ•ฉ์ด K์ธ ๊ฐœ์ˆ˜")
455    nums = [1, 1, 1]
456    k = 2
457    count = subarray_sum_count(nums, k)
458    print(f"    nums = {nums}, k = {k}")
459    print(f"    ๊ฒฐ๊ณผ: {count}๊ฐœ")
460
461    # 8. ์•„๋‚˜๊ทธ๋žจ ๊ทธ๋ฃน
462    print("\n[8] ์•„๋‚˜๊ทธ๋žจ ๊ทธ๋ฃน")
463    strs = ["eat", "tea", "tan", "ate", "nat", "bat"]
464    groups = group_anagrams(strs)
465    print(f"    ์ž…๋ ฅ: {strs}")
466    print(f"    ๊ทธ๋ฃน: {groups}")
467
468    print("\n" + "=" * 60)
469
470
471if __name__ == "__main__":
472    main()