2. NumPy ๊ณ ๊ธ
2. NumPy ๊ณ ๊ธ¶
์ด์ : NumPy ๊ธฐ์ด | ๋ค์: Pandas ๊ธฐ์ด
๊ฐ์¶
NumPy์ ๊ณ ๊ธ ๊ธฐ๋ฅ์ธ ์ ํ๋์, ํต๊ณ ํจ์, ๋์ ์์ฑ, ๊ตฌ์กฐํ๋ ๋ฐฐ์ด, ๊ทธ๋ฆฌ๊ณ ์ฑ๋ฅ ์ต์ ํ ๊ธฐ๋ฒ์ ๋ค๋ฃน๋๋ค.
1. ์ ํ๋์ (Linear Algebra)¶
1.1 ํ๋ ฌ ๊ณฑ์ ¶
import numpy as np
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])
# ํ๋ ฌ ๊ณฑ์
(dot product)
C = np.dot(A, B)
print(C)
# [[19 22]
# [43 50]]
# @ ์ฐ์ฐ์ (Python 3.5+)
C = A @ B
# matmul ํจ์
C = np.matmul(A, B)
# ๋ฒกํฐ ๋ด์
v1 = np.array([1, 2, 3])
v2 = np.array([4, 5, 6])
dot_product = np.dot(v1, v2) # 32
1.2 ํ๋ ฌ ๋ถํด¶
A = np.array([[1, 2], [3, 4]])
# ํ๋ ฌ์ (Determinant)
det = np.linalg.det(A)
print(det) # -2.0
# ์ญํ๋ ฌ (Inverse)
A_inv = np.linalg.inv(A)
print(A_inv)
# [[-2. 1. ]
# [ 1.5 -0.5]]
# ๊ฒ์ฆ: A @ A_inv = I
print(A @ A_inv)
# [[1. 0.]
# [0. 1.]]
# ๊ณ ์ ๊ฐ๊ณผ ๊ณ ์ ๋ฒกํฐ
eigenvalues, eigenvectors = np.linalg.eig(A)
print("๊ณ ์ ๊ฐ:", eigenvalues)
print("๊ณ ์ ๋ฒกํฐ:\n", eigenvectors)
# ํน์ด๊ฐ ๋ถํด (SVD)
U, S, Vt = np.linalg.svd(A)
print("U:\n", U)
print("S:", S)
print("Vt:\n", Vt)
# QR ๋ถํด
Q, R = np.linalg.qr(A)
# ์ด๋ ์คํค ๋ถํด (๋์นญ ์์ ์น ํ๋ ฌ)
B = np.array([[4, 2], [2, 5]])
L = np.linalg.cholesky(B)
1.3 ์ ํ ๋ฐฉ์ ์ ํ์ด¶
# Ax = b ํํ์ ์ ํ ์์คํ
# 2x + y = 5
# x + 3y = 6
A = np.array([[2, 1], [1, 3]])
b = np.array([5, 6])
# ํด ๊ตฌํ๊ธฐ
x = np.linalg.solve(A, b)
print(x) # [1.8 1.4]
# ๊ฒ์ฆ
print(A @ x) # [5. 6.]
# ์ต์ ์์น๋ฒ (Least Squares)
A = np.array([[1, 1], [1, 2], [1, 3], [1, 4]])
b = np.array([2, 3, 4.5, 5])
x, residuals, rank, s = np.linalg.lstsq(A, b, rcond=None)
print("๊ณ์:", x) # [0.75 1.1]
1.4 ํ๋ ฌ ๋ ธ๋ฆ๊ณผ ์กฐ๊ฑด์¶
A = np.array([[1, 2], [3, 4]])
# ํ๋ก๋ฒ ๋์ฐ์ค ๋
ธ๋ฆ (Frobenius norm)
fro_norm = np.linalg.norm(A, 'fro')
# L2 ๋
ธ๋ฆ (์คํํธ๋ด ๋
ธ๋ฆ)
l2_norm = np.linalg.norm(A, 2)
# L1 ๋
ธ๋ฆ
l1_norm = np.linalg.norm(A, 1)
# ๋ฌดํ๋ ๋
ธ๋ฆ
inf_norm = np.linalg.norm(A, np.inf)
# ๋ฒกํฐ ๋
ธ๋ฆ
v = np.array([3, 4])
print(np.linalg.norm(v)) # 5.0 (์ ํด๋ฆฌ๋ ๊ฑฐ๋ฆฌ)
# ์กฐ๊ฑด์ (Condition Number)
cond = np.linalg.cond(A)
print("์กฐ๊ฑด์:", cond)
1.5 ํ๋ ฌ ๋ญํฌ์ ํธ๋ ์ด์ค¶
A = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
# ๋ญํฌ
rank = np.linalg.matrix_rank(A)
print("๋ญํฌ:", rank) # 2
# ํธ๋ ์ด์ค (๋๊ฐ์ ํฉ)
trace = np.trace(A)
print("ํธ๋ ์ด์ค:", trace) # 15
2. ํต๊ณ ํจ์¶
2.1 ๊ธฐ์ ํต๊ณ¶
data = np.array([23, 45, 67, 89, 12, 34, 56, 78, 90, 11])
# ๊ธฐ๋ณธ ํต๊ณ
print("ํ๊ท :", np.mean(data)) # 50.5
print("์ค์๊ฐ:", np.median(data)) # 50.5
print("ํ์คํธ์ฐจ:", np.std(data)) # 28.07
print("๋ถ์ฐ:", np.var(data)) # 788.25
print("์ต์:", np.min(data)) # 11
print("์ต๋:", np.max(data)) # 90
print("๋ฒ์:", np.ptp(data)) # 79 (peak to peak)
# ๋ฐฑ๋ถ์์
print("25%:", np.percentile(data, 25))
print("50%:", np.percentile(data, 50))
print("75%:", np.percentile(data, 75))
# ๋ถ์์
print("1์ฌ๋ถ์:", np.quantile(data, 0.25))
print("3์ฌ๋ถ์:", np.quantile(data, 0.75))
2.2 ์๊ด๊ณ์์ ๊ณต๋ถ์ฐ¶
x = np.array([1, 2, 3, 4, 5])
y = np.array([2, 4, 5, 4, 5])
# ์๊ด๊ณ์ ํ๋ ฌ
corr_matrix = np.corrcoef(x, y)
print(corr_matrix)
# [[1. 0.77]
# [0.77 1. ]]
# ๊ณต๋ถ์ฐ ํ๋ ฌ
cov_matrix = np.cov(x, y)
print(cov_matrix)
# [[2.5 1.5]
# [1.5 1.3]]
# ๋ค๋ณ๋ ๋ฐ์ดํฐ
data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(np.corrcoef(data)) # ๋ณ์ ๊ฐ ์๊ด๊ณ์
2.3 ํ์คํ ๊ทธ๋จ๊ณผ ๋น๋¶
data = np.random.randn(1000)
# ํ์คํ ๊ทธ๋จ ๊ณ์ฐ
counts, bin_edges = np.histogram(data, bins=10)
print("๋น๋:", counts)
print("๊ตฌ๊ฐ:", bin_edges)
# ๋น ์ง์
counts, bin_edges = np.histogram(data, bins=[-3, -2, -1, 0, 1, 2, 3])
# ๊ณ ์ ๊ฐ๊ณผ ๋น๋
arr = np.array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])
unique, counts = np.unique(arr, return_counts=True)
print("๊ณ ์ ๊ฐ:", unique) # [1 2 3 4]
print("๋น๋:", counts) # [1 2 3 4]
3. ๋์ ์์ฑ¶
3.1 ๊ธฐ๋ณธ ๋์ ์์ฑ¶
# ์๋ก์ด ๋ฐฉ์ (NumPy 1.17+)
rng = np.random.default_rng(seed=42)
# ๊ท ์ผ ๋ถํฌ [0, 1)
print(rng.random(5))
# ์ ์ ๋์
print(rng.integers(1, 100, size=10))
# ๊ท ์ผ ๋ถํฌ [low, high)
print(rng.uniform(0, 10, size=5))
# ๋ ๊ฑฐ์ ๋ฐฉ์
np.random.seed(42)
print(np.random.rand(5)) # [0, 1) ๊ท ์ผ ๋ถํฌ
print(np.random.randint(1, 10, 5)) # ์ ์ ๋์
3.2 ํ๋ฅ ๋ถํฌ¶
rng = np.random.default_rng(42)
# ์ ๊ท ๋ถํฌ (๊ฐ์ฐ์์)
normal = rng.normal(loc=0, scale=1, size=1000) # ํ๊ท 0, ํ์คํธ์ฐจ 1
print(f"ํ๊ท : {normal.mean():.3f}, ํ์คํธ์ฐจ: {normal.std():.3f}")
# ํ์ค ์ ๊ท ๋ถํฌ
standard_normal = rng.standard_normal(1000)
# ์ดํญ ๋ถํฌ
binomial = rng.binomial(n=10, p=0.5, size=1000) # n๋ฒ ์ํ, ์ฑ๊ณต ํ๋ฅ p
# ํฌ์์ก ๋ถํฌ
poisson = rng.poisson(lam=5, size=1000) # ํ๊ท 5
# ์ง์ ๋ถํฌ
exponential = rng.exponential(scale=2, size=1000)
# ๋ฒ ํ ๋ถํฌ
beta = rng.beta(a=2, b=5, size=1000)
# ๊ฐ๋ง ๋ถํฌ
gamma = rng.gamma(shape=2, scale=1, size=1000)
# ์นด์ด์ ๊ณฑ ๋ถํฌ
chisquare = rng.chisquare(df=5, size=1000)
# t ๋ถํฌ
t = rng.standard_t(df=10, size=1000)
3.3 ๋๋ค ์ํ๋ง¶
rng = np.random.default_rng(42)
arr = np.array([10, 20, 30, 40, 50])
# ๋๋ค ์ ํ
sample = rng.choice(arr, size=3, replace=False) # ๋น๋ณต์ ์ถ์ถ
print(sample)
# ํ๋ฅ ๊ฐ์ค์น
weights = [0.1, 0.1, 0.3, 0.3, 0.2]
sample = rng.choice(arr, size=10, p=weights)
# ๋ฐฐ์ด ์
ํ
arr_copy = arr.copy()
rng.shuffle(arr_copy)
print(arr_copy)
# ์์ด (์ ๋ฐฐ์ด ๋ฐํ)
permuted = rng.permutation(arr)
print(permuted)
4. ๊ตฌ์กฐํ๋ ๋ฐฐ์ด¶
4.1 ๊ตฌ์กฐํ๋ dtype¶
# ๊ตฌ์กฐํ๋ ๋ฐฐ์ด ์ ์
dt = np.dtype([
('name', 'U20'), # ์ ๋์ฝ๋ ๋ฌธ์์ด (์ต๋ 20์)
('age', 'i4'), # 32๋นํธ ์ ์
('height', 'f8'), # 64๋นํธ ์ค์
('is_student', '?') # ๋ถ๋ฆฌ์ธ
])
# ๋ฐ์ดํฐ ์์ฑ
data = np.array([
('Alice', 25, 165.5, True),
('Bob', 30, 178.2, False),
('Charlie', 22, 172.0, True)
], dtype=dt)
# ํ๋ ์ ๊ทผ
print(data['name']) # ['Alice' 'Bob' 'Charlie']
print(data['age']) # [25 30 22]
print(data[0]) # ('Alice', 25, 165.5, True)
print(data[0]['name']) # Alice
# ์กฐ๊ฑด ํํฐ๋ง
students = data[data['is_student']]
print(students['name'])
4.2 ๋ ์ฝ๋ ๋ฐฐ์ด¶
# recarray๋ก ๋ณํ (์์ฑ ์ ๊ทผ ๊ฐ๋ฅ)
rec = data.view(np.recarray)
print(rec.name) # ['Alice' 'Bob' 'Charlie']
print(rec.age) # [25 30 22]
print(rec[0].name) # Alice
5. ๋ฉ๋ชจ๋ฆฌ ๋ ์ด์์๊ณผ ์ฑ๋ฅ¶
5.1 C-order vs Fortran-order¶
# C-order (ํ ์ฐ์ ): ๊ธฐ๋ณธ๊ฐ
c_arr = np.array([[1, 2, 3], [4, 5, 6]], order='C')
print(c_arr.flags['C_CONTIGUOUS']) # True
# Fortran-order (์ด ์ฐ์ )
f_arr = np.array([[1, 2, 3], [4, 5, 6]], order='F')
print(f_arr.flags['F_CONTIGUOUS']) # True
# ๋ฉ๋ชจ๋ฆฌ ๋ ์ด์์ ํ์ธ
print(c_arr.strides) # (24, 8) - ํ ์ด๋ 24๋ฐ์ดํธ, ์ด ์ด๋ 8๋ฐ์ดํธ
print(f_arr.strides) # (8, 16)
5.2 ๋ทฐ์ ๋ณต์ฌ ์ฑ๋ฅ¶
import time
arr = np.arange(10000000)
# ์ฌ๋ผ์ด์ฑ (๋ทฐ) - ๋น ๋ฆ
start = time.time()
view = arr[::2]
print(f"๋ทฐ ์์ฑ: {time.time() - start:.6f}์ด")
# ๋ณต์ฌ - ๋๋ฆผ
start = time.time()
copy = arr[::2].copy()
print(f"๋ณต์ฌ: {time.time() - start:.6f}์ด")
5.3 ๋ฒกํฐํ vs ๋ฃจํ¶
import time
n = 1000000
arr = np.random.rand(n)
# ํ์ด์ฌ ๋ฃจํ (๋๋ฆผ)
start = time.time()
result = []
for x in arr:
result.append(x ** 2)
print(f"ํ์ด์ฌ ๋ฃจํ: {time.time() - start:.4f}์ด")
# NumPy ๋ฒกํฐํ (๋น ๋ฆ)
start = time.time()
result = arr ** 2
print(f"NumPy ๋ฒกํฐํ: {time.time() - start:.4f}์ด")
5.4 Universal Functions ์ต์ ํ¶
# where ์ฌ์ฉ
arr = np.array([1, -2, 3, -4, 5])
result = np.where(arr > 0, arr, 0) # ์์๋ ์ ์ง, ์์๋ 0
print(result) # [1 0 3 0 5]
# select ์ฌ์ฉ (๋ค์ค ์กฐ๊ฑด)
conditions = [arr < 0, arr == 0, arr > 0]
choices = [-1, 0, 1]
result = np.select(conditions, choices)
print(result) # [ 1 -1 1 -1 1]
# clip ์ฌ์ฉ
arr = np.array([-5, -2, 0, 3, 7, 10])
result = np.clip(arr, 0, 5) # 0๊ณผ 5 ์ฌ์ด๋ก ์ ํ
print(result) # [0 0 0 3 5 5]
6. ๊ณ ๊ธ ์ธ๋ฑ์ฑ๊ณผ ๋ง์คํน¶
6.1 np.where ํ์ฉ¶
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
# ์กฐ๊ฑด์ ๋ง์กฑํ๋ ์ธ๋ฑ์ค
indices = np.where(arr > 5)
print(indices) # (array([1, 2, 2, 2]), array([2, 0, 1, 2]))
# ์กฐ๊ฑด๋ถ ๊ฐ ํ ๋น
result = np.where(arr % 2 == 0, 'even', 'odd')
print(result)
6.2 np.take์ np.put¶
arr = np.array([10, 20, 30, 40, 50])
# take: ์ธ๋ฑ์ค๋ก ์์ ๊ฐ์ ธ์ค๊ธฐ
indices = [0, 2, 4]
print(np.take(arr, indices)) # [10 30 50]
# put: ์ธ๋ฑ์ค ์์น์ ๊ฐ ๋ฃ๊ธฐ
np.put(arr, [0, 2, 4], [100, 300, 500])
print(arr) # [100 20 300 40 500]
6.3 ๋ง์คํฌ ๋ฐฐ์ด¶
# ๋ง์คํฌ ๋ฐฐ์ด ์์ฑ
data = np.array([1, 2, -999, 4, -999, 6])
mask = (data == -999)
masked_arr = np.ma.masked_array(data, mask)
print(masked_arr) # [1 2 -- 4 -- 6]
print(masked_arr.mean()) # 3.25 (๋ง์คํฌ๋ ๊ฐ ์ ์ธ)
# ๋ง์คํฌ๋ ๊ฐ ์ฑ์ฐ๊ธฐ
filled = masked_arr.filled(0)
print(filled) # [1 2 0 4 0 6]
7. ๋ฐฐ์ด ์ ์ฅ๊ณผ ๋ก๋ฉ¶
7.1 ๋ฐ์ด๋๋ฆฌ ํ์¶
arr = np.array([[1, 2, 3], [4, 5, 6]])
# ๋จ์ผ ๋ฐฐ์ด ์ ์ฅ/๋ก๋ฉ
np.save('array.npy', arr)
loaded = np.load('array.npy')
# ์ฌ๋ฌ ๋ฐฐ์ด ์ ์ฅ/๋ก๋ฉ
np.savez('arrays.npz', arr1=arr, arr2=arr*2)
data = np.load('arrays.npz')
print(data['arr1'])
print(data['arr2'])
# ์์ถ ์ ์ฅ
np.savez_compressed('arrays_compressed.npz', arr1=arr)
7.2 ํ ์คํธ ํ์¶
arr = np.array([[1, 2, 3], [4, 5, 6]])
# CSV ์ ์ฅ
np.savetxt('array.csv', arr, delimiter=',', fmt='%d')
# CSV ๋ก๋ฉ
loaded = np.loadtxt('array.csv', delimiter=',')
# ํค๋์ ํจ๊ป ์ ์ฅ
np.savetxt('array_header.csv', arr, delimiter=',',
header='col1,col2,col3', comments='')
# genfromtxt (๊ฒฐ์ธก๊ฐ ์ฒ๋ฆฌ ๊ฐ๋ฅ)
data = np.genfromtxt('array.csv', delimiter=',',
missing_values='NA', filling_values=0)
8. ๋ฉ๋ชจ๋ฆฌ ๋งคํ¶
๋์ฉ๋ ํ์ผ์ ๋ฉ๋ชจ๋ฆฌ์ ์ ๋ถ ๋ก๋ฉํ์ง ์๊ณ ์ฒ๋ฆฌํ ๋ ์ ์ฉํฉ๋๋ค.
# ๋ฉ๋ชจ๋ฆฌ ๋งคํ๋ ๋ฐฐ์ด ์์ฑ
shape = (10000, 10000)
dtype = np.float64
# ํ์ผ ๊ธฐ๋ฐ ๋ฉ๋ชจ๋ฆฌ ๋งคํ
mmap = np.memmap('large_array.dat', dtype=dtype, mode='w+', shape=shape)
mmap[:100, :100] = np.random.rand(100, 100)
mmap.flush() # ๋์คํฌ์ ์ฐ๊ธฐ
# ์ฝ๊ธฐ ์ ์ฉ ๋ก๋ฉ
mmap_read = np.memmap('large_array.dat', dtype=dtype, mode='r', shape=shape)
print(mmap_read[:10, :10])
์ฐ์ต ๋ฌธ์ ¶
๋ฌธ์ 1: ์ ํ ํ๊ท¶
๋ค์ ๋ฐ์ดํฐ์ ๋ํด ์ต์ ์์น๋ฒ์ผ๋ก ์ ํ ํ๊ท ๊ณ์๋ฅผ ๊ตฌํ์ธ์.
x = np.array([1, 2, 3, 4, 5])
y = np.array([2.2, 2.8, 3.6, 4.5, 5.1])
# ํ์ด
A = np.vstack([x, np.ones(len(x))]).T
m, c = np.linalg.lstsq(A, y, rcond=None)[0]
print(f"๊ธฐ์ธ๊ธฐ: {m:.3f}, ์ ํธ: {c:.3f}")
๋ฌธ์ 2: ๊ณต๋ถ์ฐ ํ๋ ฌ์ ๊ณ ์ ๊ฐ ๋ถํด¶
3๊ฐ์ ๋ณ์๋ฅผ ๊ฐ์ง ๋ฐ์ดํฐ์ ๊ณต๋ถ์ฐ ํ๋ ฌ์ ๊ตฌํ๊ณ ๊ณ ์ ๊ฐ ๋ถํดํ์ธ์.
data = np.random.randn(100, 3)
data[:, 1] = data[:, 0] * 2 + np.random.randn(100) * 0.1 # ์๊ด๊ด๊ณ ์ถ๊ฐ
# ํ์ด
cov_matrix = np.cov(data.T)
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
print("๊ณ ์ ๊ฐ:", eigenvalues)
print("๊ณ ์ ๋ฒกํฐ:\n", eigenvectors)
๋ฌธ์ 3: ๋ชฌํ ์นด๋ฅผ๋ก ์๋ฎฌ๋ ์ด์ ¶
๋์๋ฅผ ์ด์ฉํด ์์ ๋์ด(ฯ)๋ฅผ ์ถ์ ํ์ธ์.
# ํ์ด
n = 1000000
rng = np.random.default_rng(42)
x = rng.uniform(-1, 1, n)
y = rng.uniform(-1, 1, n)
inside = (x**2 + y**2) <= 1
pi_estimate = 4 * inside.sum() / n
print(f"ฯ ์ถ์ ๊ฐ: {pi_estimate:.6f}")
์์ฝ¶
| ๊ธฐ๋ฅ | ํจ์/๋ฉ์๋ |
|---|---|
| ํ๋ ฌ ๊ณฑ์ | np.dot(), @, np.matmul() |
| ์ ํ๋์ | np.linalg.inv(), solve(), eig(), svd() |
| ํต๊ณ | np.mean(), np.std(), np.corrcoef(), np.cov() |
| ๋์ | np.random.default_rng(), random(), normal(), choice() |
| ์ ์ฅ/๋ก๋ฉ | np.save(), np.load(), np.savetxt(), np.loadtxt() |
| ์ฑ๋ฅ | ๋ฒกํฐํ ์ฐ์ฐ, np.where(), ๋ฉ๋ชจ๋ฆฌ ๋งคํ |