28. κ²μ μ΄λ‘ (Game Theory)
28. κ²μ μ΄λ‘ (Game Theory)¶
νμ΅ λͺ©ν¶
- μ‘°ν© κ²μ μ΄λ‘ μ κΈ°λ³Έ κ°λ μ΄ν΄
- μΉλ¦¬ μνμ ν¨λ°° μν λΆμ
- λ κ²μ(Nim Game) ν΄κ²°
- μ€νλΌκ·Έ-κ·Έλ°λ μ 리 νμ©
- λ―Έλλ§₯μ€ μκ³ λ¦¬μ¦ κ΅¬ν
1. μ‘°ν© κ²μ μ΄λ‘ κΈ°μ΄¶
μ‘°ν© κ²μμ νΉμ±¶
βββββββββββββββββββββββββββββββββββββββββββββββββββ
β μ‘°ν© κ²μ (Combinatorial Game) β
βββββββββββββββββββββββββββββββββββββββββββββββββββ€
β 1. λ λͺ
μ νλ μ΄μ΄κ° λ²κ°μ κ°λ©° νλ μ΄ β
β 2. μμ μ 보 κ²μ (λͺ¨λ μ λ³΄κ° κ³΅κ°λ¨) β
β 3. μ΄μ μμκ° μμ (κ²°μ λ‘ μ ) β
β 4. μ νν μμ μν β
β 5. μ΄λν μ μλ νλ μ΄μ΄κ° ν¨λ°° β
βββββββββββββββββββββββββββββββββββββββββββββββββββ
μΉλ¦¬ μν (W) vs ν¨λ°° μν (L)¶
ν¨λ°° μν (L): μμ§μΌ μ μλ λͺ¨λ μνκ° W
μΉλ¦¬ μν (W): μμ§μΌ μ μλ μν μ€ Lμ΄ νλλΌλ μμ
Start
|
W βββ μλλ₯Ό Lλ‘ λ³΄λΌ μ μμ
/ \
L W
| |
W L βββ λͺ¨λ μ΄λμ΄ Wλ‘λ§ κ°
κ·μΉ: Lμμ μμνλ©΄ ν¨λ°°, Wμμ μμνλ©΄ μΉλ¦¬
κΈ°λ³Έ μμ : λ κ°μ Έκ°κΈ° κ²μ¶
def stone_game(n, moves):
"""
nκ°μ λμμ moves 리μ€νΈμ κ°μλ§νΌ κ°μ Έκ° μ μμ
λ§μ§λ§ λμ κ°μ Έκ°λ μ¬λμ΄ μΉλ¦¬
"""
# dp[i] = Trueλ©΄ μΉλ¦¬ μν
dp = [False] * (n + 1)
for i in range(1, n + 1):
for move in moves:
if i >= move and not dp[i - move]:
dp[i] = True
break
return dp[n]
# μ: λ 10κ°, ν λ²μ 1, 2, 3κ° κ°μ Έκ° μ μμ
print(stone_game(10, [1, 2, 3])) # True
2. λ κ²μ (Nim Game)¶
κ·μΉ¶
- μ¬λ¬ λλ―Έμ λμ΄ μμ
- κ° ν΄μ ν λλ―Έμμ 1κ° μ΄μμ λμ κ°μ Έκ°
- λ§μ§λ§ λμ κ°μ Έκ°λ μ¬λμ΄ μΉλ¦¬
λλ―Έ: [3, 4, 5]
Player 1: λλ―Έ 2μμ 2κ° κ°μ Έκ° β [3, 2, 5]
Player 2: λλ―Έ 3μμ 3κ° κ°μ Έκ° β [3, 2, 2]
...
λ μ 리 (Nim Theorem)¶
XOR Sum = 0μ΄λ©΄ ν¨λ°°, β 0μ΄λ©΄ μΉλ¦¬
μ: [3, 4, 5]
3 = 011
4 = 100
5 = 101
---------
XOR = 010 = 2 β 0 β 첫 λ²μ§Έ νλ μ΄μ΄ μΉλ¦¬
μ: [1, 2, 3]
1 = 01
2 = 10
3 = 11
--------
XOR = 00 = 0 β 첫 λ²μ§Έ νλ μ΄μ΄ ν¨λ°°
ꡬν¶
def nim_game(piles):
"""
λ κ²μμ μΉμ νλ³
Returns: Trueλ©΄ 첫 λ²μ§Έ νλ μ΄μ΄ μΉλ¦¬
"""
xor_sum = 0
for pile in piles:
xor_sum ^= pile
return xor_sum != 0
def nim_winning_move(piles):
"""
μΉλ¦¬λ₯Ό μν μ΅μ μ μ°ΎκΈ°
Returns: (λλ―Έ μΈλ±μ€, κ°μ Έκ° λ μ) λλ None
"""
xor_sum = 0
for pile in piles:
xor_sum ^= pile
if xor_sum == 0:
return None # μ΄λ―Έ ν¨λ°° μν
# XOR sumμ 0μΌλ‘ λ§λλ μ μ°ΎκΈ°
for i, pile in enumerate(piles):
target = pile ^ xor_sum
if target < pile:
return (i, pile - target)
return None
# μ¬μ© μμ
piles = [3, 4, 5]
if nim_game(piles):
print("첫 λ²μ§Έ νλ μ΄μ΄ μΉλ¦¬!")
move = nim_winning_move(piles)
print(f"μ΅μ μ: λλ―Έ {move[0]}μμ {move[1]}κ° κ°μ Έκ°κΈ°")
else:
print("λ λ²μ§Έ νλ μ΄μ΄ μΉλ¦¬!")
λ³ν: MisΓ¨re Nim¶
λ§μ§λ§ λμ κ°μ Έκ°λ μ¬λμ΄ ν¨λ°°νλ λ²μ
def misere_nim(piles):
"""
MisΓ¨re Nim: λ§μ§λ§ λμ κ°μ Έκ°λ©΄ ν¨λ°°
"""
xor_sum = 0
all_ones = True
for pile in piles:
xor_sum ^= pile
if pile > 1:
all_ones = False
if all_ones:
# λͺ¨λ λλ―Έκ° 1κ° μ΄νλ©΄, λλ―Έ κ°μκ° νμλ©΄ ν¨λ°°
return len([p for p in piles if p > 0]) % 2 == 0
else:
# μΌλ° λκ³Ό λμΌ
return xor_sum != 0
3. μ€νλΌκ·Έ-κ·Έλ°λ μ 리¶
Grundy Number (Nimber)¶
λͺ¨λ μ‘°ν© κ²μ μνμλ Grundy numberκ° μκ³ , μ΄λ ν΄λΉ μνμ λλ±ν λ λλ―Έμ ν¬κΈ°μ λλ€.
Grundy(state) = mex({Grundy(next_state) for all next_state})
mex = minimum excludant (μ§ν©μ μλ μ΅μ λΉμμ μ μ)
mex({0, 1, 2}) = 3
mex({0, 2, 3}) = 1
mex({1, 2}) = 0
κ·μΉ¶
Grundy = 0: ν¨λ°° μν (L)
Grundy > 0: μΉλ¦¬ μν (W)
μ¬λ¬ λ
립 κ²μμ κ²°ν©:
Grundy(G1 + G2) = Grundy(G1) XOR Grundy(G2)
ꡬν¶
def calculate_grundy(state, get_next_states, memo=None):
"""
μνμ Grundy number κ³μ°
state: νμ¬ κ²μ μν
get_next_states: κ°λ₯ν λ€μ μνλ€μ λ°ννλ ν¨μ
"""
if memo is None:
memo = {}
if state in memo:
return memo[state]
next_states = get_next_states(state)
if not next_states:
# μ΄λ λΆκ° = Grundy 0
memo[state] = 0
return 0
# λ€μ μνλ€μ Grundy number μ§ν©
grundy_set = set()
for next_state in next_states:
grundy_set.add(calculate_grundy(next_state, get_next_states, memo))
# mex κ³μ°
mex = 0
while mex in grundy_set:
mex += 1
memo[state] = mex
return mex
# μ: 1~3κ° κ°μ Έκ°κΈ° κ²μ
def get_next_states(n):
"""nκ° λμμ 1, 2, 3κ°λ₯Ό κ°μ Έκ° ν μν"""
return [n - k for k in [1, 2, 3] if n >= k]
for n in range(10):
g = calculate_grundy(n, get_next_states)
print(f"Grundy({n}) = {g}")
# μΆλ ₯: 0, 1, 2, 3, 0, 1, 2, 3, 0, 1
# ν¨ν΄: n % 4
λ³΅ν© κ²μ¶
def combined_game_winner(games):
"""
μ¬λ¬ λ
립 κ²μμ κ²°ν©
games: κ° κ²μμ Grundy number 리μ€νΈ
"""
xor_sum = 0
for g in games:
xor_sum ^= g
return xor_sum != 0
# μ: μΈ κ°μ λ κ²μ (5κ°, 7κ°, 3κ°)
grundy_5 = calculate_grundy(5, get_next_states) # 1
grundy_7 = calculate_grundy(7, get_next_states) # 3
grundy_3 = calculate_grundy(3, get_next_states) # 3
# κ²°ν©: 1 XOR 3 XOR 3 = 1 β 0 β 첫 λ²μ§Έ νλ μ΄μ΄ μΉλ¦¬
print(combined_game_winner([grundy_5, grundy_7, grundy_3]))
4. λ―Έλλ§₯μ€ μκ³ λ¦¬μ¦¶
κ°λ ¶
λ νλ μ΄μ΄κ° μ΅μ μΌλ‘ νλ μ΄ν λμ κ²°κ³Όλ₯Ό κ³μ°
Max νλ μ΄μ΄: μ μλ₯Ό μ΅λννλ € ν¨
Min νλ μ΄μ΄: μ μλ₯Ό μ΅μννλ € ν¨
Max
/ | \
3 5 2
/| | |\
Min 3 1 5 2 4
Maxλ 5λ₯Ό μ ν (μ΅λ)
κΈ°λ³Έ ꡬν¶
def minimax(state, depth, is_maximizing, get_moves, evaluate, is_terminal):
"""
λ―Έλλ§₯μ€ μκ³ λ¦¬μ¦
state: νμ¬ κ²μ μν
depth: νμ κΉμ΄
is_maximizing: Trueλ©΄ Max νλ μ΄μ΄ μ°¨λ‘
get_moves: κ°λ₯ν μ λ°ν
evaluate: μνμ μ μ λ°ν
is_terminal: κ²μ μ’
λ£ μ¬λΆ
"""
if depth == 0 or is_terminal(state):
return evaluate(state)
moves = get_moves(state)
if is_maximizing:
max_eval = float('-inf')
for move in moves:
next_state = apply_move(state, move)
eval_score = minimax(next_state, depth - 1, False,
get_moves, evaluate, is_terminal)
max_eval = max(max_eval, eval_score)
return max_eval
else:
min_eval = float('inf')
for move in moves:
next_state = apply_move(state, move)
eval_score = minimax(next_state, depth - 1, True,
get_moves, evaluate, is_terminal)
min_eval = min(min_eval, eval_score)
return min_eval
μν-λ² ν κ°μ§μΉκΈ°¶
def alphabeta(state, depth, alpha, beta, is_maximizing,
get_moves, evaluate, is_terminal):
"""
μν-λ² ν κ°μ§μΉκΈ°λ‘ μ΅μ νλ λ―Έλλ§₯μ€
alpha: Max νλ μ΄μ΄μ μ΅μ (νν)
beta: Min νλ μ΄μ΄μ μ΅μ (μν)
"""
if depth == 0 or is_terminal(state):
return evaluate(state)
moves = get_moves(state)
if is_maximizing:
max_eval = float('-inf')
for move in moves:
next_state = apply_move(state, move)
eval_score = alphabeta(next_state, depth - 1, alpha, beta, False,
get_moves, evaluate, is_terminal)
max_eval = max(max_eval, eval_score)
alpha = max(alpha, eval_score)
if beta <= alpha:
break # κ°μ§μΉκΈ°
return max_eval
else:
min_eval = float('inf')
for move in moves:
next_state = apply_move(state, move)
eval_score = alphabeta(next_state, depth - 1, alpha, beta, True,
get_moves, evaluate, is_terminal)
min_eval = min(min_eval, eval_score)
beta = min(beta, eval_score)
if beta <= alpha:
break # κ°μ§μΉκΈ°
return min_eval
# νΈμΆ
score = alphabeta(initial_state, max_depth, float('-inf'), float('inf'), True,
get_moves, evaluate, is_terminal)
5. ν±νν μμ ¶
class TicTacToe:
def __init__(self):
self.board = [[' '] * 3 for _ in range(3)]
self.current_player = 'X'
def get_moves(self):
moves = []
for i in range(3):
for j in range(3):
if self.board[i][j] == ' ':
moves.append((i, j))
return moves
def make_move(self, row, col):
self.board[row][col] = self.current_player
self.current_player = 'O' if self.current_player == 'X' else 'X'
def undo_move(self, row, col):
self.board[row][col] = ' '
self.current_player = 'O' if self.current_player == 'X' else 'X'
def check_winner(self):
# κ°λ‘, μΈλ‘, λκ°μ 체ν¬
lines = []
for i in range(3):
lines.append([self.board[i][j] for j in range(3)]) # κ°λ‘
lines.append([self.board[j][i] for j in range(3)]) # μΈλ‘
lines.append([self.board[i][i] for i in range(3)]) # λκ°μ
lines.append([self.board[i][2-i] for i in range(3)]) # λ°λ λκ°μ
for line in lines:
if line[0] == line[1] == line[2] != ' ':
return line[0]
return None
def is_terminal(self):
if self.check_winner():
return True
return len(self.get_moves()) == 0
def evaluate(self):
winner = self.check_winner()
if winner == 'X':
return 1
elif winner == 'O':
return -1
return 0
def minimax(self, is_maximizing):
if self.is_terminal():
return self.evaluate()
if is_maximizing:
best = float('-inf')
for move in self.get_moves():
self.make_move(*move)
best = max(best, self.minimax(False))
self.undo_move(*move)
return best
else:
best = float('inf')
for move in self.get_moves():
self.make_move(*move)
best = min(best, self.minimax(True))
self.undo_move(*move)
return best
def best_move(self):
"""νμ¬ νλ μ΄μ΄μ μ΅μ μ μ°ΎκΈ°"""
best_score = float('-inf') if self.current_player == 'X' else float('inf')
best_move = None
is_max = self.current_player == 'X'
for move in self.get_moves():
self.make_move(*move)
score = self.minimax(not is_max)
self.undo_move(*move)
if is_max and score > best_score:
best_score = score
best_move = move
elif not is_max and score < best_score:
best_score = score
best_move = move
return best_move
# μ¬μ© μμ
game = TicTacToe()
while not game.is_terminal():
move = game.best_move()
print(f"{game.current_player}κ° {move}μ λ ")
game.make_move(*move)
winner = game.check_winner()
print(f"μΉμ: {winner if winner else '무μΉλΆ'}")
6. μ€μ λ¬Έμ ν¨ν΄¶
ν¨ν΄ 1: λ¨μ μΉν¨ κ²μ¶
def simple_game_winner(n):
"""
nκ° λ, 1~3κ° κ°μ Έκ°κΈ°
λ§μ§λ§ κ°μ Έκ°λ©΄ μΉλ¦¬
"""
return n % 4 != 0
# Grundy ν¨ν΄μΌλ‘ λΉ λ₯΄κ² ν΄κ²°
ν¨ν΄ 2: μ¬λ¬ λλ―Έ κ²μ¶
def multi_pile_game(piles, max_take):
"""
κ° λλ―Έμμ 1~max_takeκ° κ°μ Έκ°κΈ°
"""
# κ° λλ―Έμ Grundy = pile % (max_take + 1)
xor_sum = 0
for pile in piles:
xor_sum ^= (pile % (max_take + 1))
return xor_sum != 0
ν¨ν΄ 3: κ·Έλν κ²μ¶
def graph_game(adj, start):
"""
κ·Έλνμμ ν ν° μ΄λ κ²μ
μ΄λν μ μμΌλ©΄ ν¨λ°°
"""
n = len(adj)
grundy = [-1] * n
def calc_grundy(node):
if grundy[node] != -1:
return grundy[node]
if not adj[node]: # μ΄λ λΆκ°
grundy[node] = 0
return 0
reachable = set()
for next_node in adj[node]:
reachable.add(calc_grundy(next_node))
mex = 0
while mex in reachable:
mex += 1
grundy[node] = mex
return mex
return calc_grundy(start) != 0
ν¨ν΄ 4: κ²μ λΆν ¶
def split_game(n):
"""
nκ° λμ λ κ°μ λΉμ΄μμ§ μμ λλ―Έλ‘ λΆν
λΆν λΆκ°λ₯νλ©΄ ν¨λ°°
"""
grundy = [0] * (n + 1)
for i in range(2, n + 1):
reachable = set()
for j in range(1, i // 2 + 1):
# iλ₯Ό jμ i-jλ‘ λΆν
reachable.add(grundy[j] ^ grundy[i - j])
mex = 0
while mex in reachable:
mex += 1
grundy[i] = mex
return grundy[n] != 0
7. κ³ κΈ κΈ°λ²¶
Nimber κ³±μ ¶
Nimberλ λ§μ (XOR) μΈμ κ³±μ λ μ μλ©λλ€.
def nim_multiply(a, b):
"""
Nimber κ³±μ
(μμ μμ λν΄)
"""
if a < 2 or b < 2:
return a * b
# 2μ κ±°λμ κ³± λΆν΄
highest_a = a.bit_length() - 1
highest_b = b.bit_length() - 1
if highest_a > highest_b:
a, b = b, a
highest_a, highest_b = highest_b, highest_a
if highest_a == 0:
return a * b
# μ¬κ·μ κ³μ° (볡μ‘νλ―λ‘ μΌλ°μ μΌλ‘ ν
μ΄λΈ μ¬μ©)
# μ¬κΈ°μ κ°λ¨ν κ²½μ°λ§ μ²λ¦¬
return a * b # μ€μ λ‘λ λ 볡μ‘
κ²μ νΈλ¦¬ μ μ₯¶
class GameTree:
def __init__(self):
self.cache = {}
def solve(self, state, get_next, hash_state):
"""
λ©λͺ¨μ΄μ μ΄μ
μΌλ‘ κ²μ νΈλ¦¬ νμ
"""
h = hash_state(state)
if h in self.cache:
return self.cache[h]
next_states = get_next(state)
if not next_states:
self.cache[h] = False # μ΄λ λΆκ° = ν¨λ°°
return False
# λ€μ μν μ€ νλλΌλ ν¨λ°° μνλ©΄ νμ¬λ μΉλ¦¬
for ns in next_states:
if not self.solve(ns, get_next, hash_state):
self.cache[h] = True
return True
self.cache[h] = False
return False
8. μκ° λ³΅μ‘λ μ 리¶
| μκ³ λ¦¬μ¦ | μκ° λ³΅μ‘λ | λΉκ³ |
|---|---|---|
| λ κ²μ | O(n) | n = λλ―Έ μ |
| Grundy κ³μ° | O(μν μ Γ λΆκΈ° μ) | λ©λͺ¨μ΄μ μ΄μ νμ |
| λ―Έλλ§₯μ€ | O(b^d) | b = λΆκΈ°, d = κΉμ΄ |
| μν-λ² ν | O(b^(d/2)) ~ O(b^d) | μ΅μ μμ μ |
9. μμ£Ό νλ μ€μ¶
μ€μ 1: mex κ³μ° μ€λ₯¶
# μλͺ»λ¨: μ΅μκ° μ°ΎκΈ°
mex = min(grundy_set)
# μ¬λ°λ¦: μλ μ΅μ λΉμμ
mex = 0
while mex in grundy_set:
mex += 1
μ€μ 2: XOR μ°μ° μ°μ μμ¶
# μλͺ»λ¨
result = a ^ b == 0 # (a ^ b) == 0μ΄ μλ!
# μ¬λ°λ¦
result = (a ^ b) == 0
μ€μ 3: μ’ λ£ μ‘°κ±΄¶
# μ΄λ κ°λ₯ν μνκ° μμΌλ©΄ Grundy = 0
if not next_states:
return 0 # ν¨λ°° μν
10. μ°μ΅ λ¬Έμ ¶
| λμ΄λ | λ¬Έμ μ ν | ν΅μ¬ κ°λ |
|---|---|---|
| β β β | λ κ°μ Έκ°κΈ° | κΈ°λ³Έ W/L λΆμ |
| β β β | λ κ²μ | XOR νμ© |
| β β β | Grundy κ³μ° | mex ν¨μ |
| β β β | λ³΅ν© κ²μ | XOR κ²°ν© |
| β β β β | λ―Έλλ§₯μ€ | μν-λ² ν |
λ€μ λ¨κ³¶
- 28_Advanced_DP_Optimization.md - κ³ κΈ DP μ΅μ ν
νμ΅ μ κ²¶
- λ κ²μμμ XOR sumμ΄ 0μ΄λ©΄ μ ν¨λ°°μΈκ°?
- Grundy numberμ mex ν¨μλ?
- μ¬λ¬ λ 립 κ²μμ Grundyλ μ΄λ»κ² κ³μ°νλκ°?
- μν-λ² ν κ°μ§μΉκΈ°κ° μλνλ μ리λ?