8. Data Visualization Basics (Matplotlib)
8. Data Visualization Basics (Matplotlib)¶
Previous: Descriptive Stats & EDA | Next: Data Visualization Advanced
Overview¶
Matplotlib is Python's representative visualization library. This covers various chart types and customization methods.
1. Matplotlib ๊ธฐ์ด¶
1.1 ๊ธฐ๋ณธ ํ๋กฏ ์์ฑ¶
import matplotlib.pyplot as plt
import numpy as np
# ๋ฐ์ดํฐ ์ค๋น
x = np.linspace(0, 10, 100)
y = np.sin(x)
# Basic ํ๋กฏ
plt.plot(x, y)
plt.show()
# ์ ๋ชฉ๊ณผ ๋ ์ด๋ธ ์ถ๊ฐ
plt.plot(x, y)
plt.title('Sine Wave')
plt.xlabel('X axis')
plt.ylabel('Y axis')
plt.show()
# ์ ์ฅ
plt.plot(x, y)
plt.savefig('plot.png', dpi=300, bbox_inches='tight')
plt.close()
1.2 Figure์ Axes¶
# ๊ฐ์ฒด ์งํฅ ๋ฐฉ์ (๊ถ์ฅ)
fig, ax = plt.subplots(figsize=(10, 6))
x = np.linspace(0, 10, 100)
ax.plot(x, np.sin(x), label='sin')
ax.plot(x, np.cos(x), label='cos')
ax.set_title('Trigonometric Functions', fontsize=14)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
1.3 ์ฌ๋ฌ ํ๋กฏ (Subplots)¶
# 2x2 ์๋ธํ๋กฏ
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
x = np.linspace(0, 10, 100)
axes[0, 0].plot(x, np.sin(x))
axes[0, 0].set_title('Sine')
axes[0, 1].plot(x, np.cos(x))
axes[0, 1].set_title('Cosine')
axes[1, 0].plot(x, np.exp(-x/5) * np.sin(x))
axes[1, 0].set_title('Damped Sine')
axes[1, 1].plot(x, np.tan(x))
axes[1, 1].set_ylim(-5, 5)
axes[1, 1].set_title('Tangent')
plt.tight_layout()
plt.show()
# ๋ค๋ฅธ ํฌ๊ธฐ์ ์๋ธํ๋กฏ
fig = plt.figure(figsize=(12, 6))
ax1 = fig.add_subplot(1, 2, 1) # 1ํ 2์ด์ 1๋ฒ์งธ
ax2 = fig.add_subplot(2, 2, 2) # 2ํ 2์ด์ 2๋ฒ์งธ
ax3 = fig.add_subplot(2, 2, 4) # 2ํ 2์ด์ 4๋ฒ์งธ
ax1.plot(x, np.sin(x))
ax2.plot(x, np.cos(x))
ax3.plot(x, np.tan(x))
plt.tight_layout()
plt.show()
2. ์ ๊ทธ๋ํ (Line Plot)¶
2.1 ๊ธฐ๋ณธ ์ ๊ทธ๋ํ¶
x = np.arange(1, 11)
y1 = x ** 2
y2 = x ** 1.5
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(x, y1, label='xยฒ')
ax.plot(x, y2, label='x^1.5')
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_title('Power Functions')
ax.legend()
ax.grid(True, alpha=0.3)
plt.show()
2.2 ์ ์คํ์ผ ์ปค์คํฐ๋ง์ด์ง¶
x = np.linspace(0, 10, 50)
fig, ax = plt.subplots(figsize=(12, 6))
# ๋ค์ํ ์คํ์ผ
ax.plot(x, np.sin(x), 'b-', linewidth=2, label='์ค์ ')
ax.plot(x, np.sin(x + 1), 'r--', linewidth=2, label='์ ์ ')
ax.plot(x, np.sin(x + 2), 'g-.', linewidth=2, label='์ ์ +์ค์ ')
ax.plot(x, np.sin(x + 3), 'm:', linewidth=2, label='์ ')
# ๋ง์ปค ์ถ๊ฐ
ax.plot(x[::5], np.sin(x[::5] + 4), 'ko-', markersize=8, label='๋ง์ปค')
ax.legend()
ax.set_title('Line Styles')
plt.show()
# ์ ์คํ์ผ ์ต์
# '-': ์ค์ , '--': ์ ์ , '-.': ์ ์ +์ค์ , ':': ์
# ์์: 'b'(blue), 'g'(green), 'r'(red), 'c'(cyan), 'm'(magenta), 'y'(yellow), 'k'(black), 'w'(white)
# ๋ง์ปค: 'o'(์), 's'(์ฌ๊ฐ), '^'(์ผ๊ฐ), 'd'(๋ค์ด์๋ชฌ๋), 'x', '+', '*'
2.3 ์๊ณ์ด ๊ทธ๋ํ¶
import pandas as pd
# ์๊ณ์ด ๋ฐ์ดํฐ
dates = pd.date_range('2023-01-01', periods=365, freq='D')
values = np.cumsum(np.random.randn(365)) + 100
fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(dates, values, 'b-', linewidth=1)
ax.fill_between(dates, values, alpha=0.3)
ax.set_xlabel('Date')
ax.set_ylabel('Value')
ax.set_title('Time Series Plot')
# x์ถ ๋ ์ง ํฌ๋งท
import matplotlib.dates as mdates
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
3. ๋ง๋ ๊ทธ๋ํ (Bar Chart)¶
3.1 ์์ง ๋ง๋ ๊ทธ๋ํ¶
categories = ['A', 'B', 'C', 'D', 'E']
values = [23, 45, 56, 78, 32]
fig, ax = plt.subplots(figsize=(10, 6))
bars = ax.bar(categories, values, color='steelblue', edgecolor='black')
# ๊ฐ ๋ ์ด๋ธ ์ถ๊ฐ
for bar, val in zip(bars, values):
ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
str(val), ha='center', va='bottom', fontsize=12)
ax.set_xlabel('Category')
ax.set_ylabel('Value')
ax.set_title('Vertical Bar Chart')
plt.show()
3.2 ์ํ ๋ง๋ ๊ทธ๋ํ¶
categories = ['Very Long Category A', 'Category B', 'Category C', 'Category D']
values = [45, 32, 67, 54]
fig, ax = plt.subplots(figsize=(10, 6))
bars = ax.barh(categories, values, color='coral', edgecolor='black')
# ๊ฐ ๋ ์ด๋ธ
for bar, val in zip(bars, values):
ax.text(bar.get_width() + 1, bar.get_y() + bar.get_height()/2,
str(val), ha='left', va='center')
ax.set_xlabel('Value')
ax.set_title('Horizontal Bar Chart')
plt.show()
3.3 ๊ทธ๋ฃน ๋ง๋ ๊ทธ๋ํ¶
categories = ['Q1', 'Q2', 'Q3', 'Q4']
series1 = [20, 35, 30, 35]
series2 = [25, 32, 34, 20]
series3 = [22, 28, 36, 25]
x = np.arange(len(categories))
width = 0.25
fig, ax = plt.subplots(figsize=(12, 6))
bars1 = ax.bar(x - width, series1, width, label='2021', color='steelblue')
bars2 = ax.bar(x, series2, width, label='2022', color='coral')
bars3 = ax.bar(x + width, series3, width, label='2023', color='green')
ax.set_xlabel('Quarter')
ax.set_ylabel('Sales')
ax.set_title('Quarterly Sales Comparison')
ax.set_xticks(x)
ax.set_xticklabels(categories)
ax.legend()
plt.tight_layout()
plt.show()
3.4 ์คํ ๋ง๋ ๊ทธ๋ํ¶
categories = ['A', 'B', 'C', 'D']
values1 = [20, 35, 30, 35]
values2 = [25, 32, 34, 20]
values3 = [15, 25, 20, 30]
fig, ax = plt.subplots(figsize=(10, 6))
ax.bar(categories, values1, label='Series 1', color='steelblue')
ax.bar(categories, values2, bottom=values1, label='Series 2', color='coral')
ax.bar(categories, values3, bottom=np.array(values1) + np.array(values2),
label='Series 3', color='green')
ax.set_xlabel('Category')
ax.set_ylabel('Value')
ax.set_title('Stacked Bar Chart')
ax.legend()
plt.show()
4. ํ์คํ ๊ทธ๋จ (Histogram)¶
# ์ ๊ท๋ถํฌ ๋ฐ์ดํฐ
np.random.seed(42)
data = np.random.randn(1000)
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
# Basic ํ์คํ ๊ทธ๋จ
axes[0, 0].hist(data, bins=30, edgecolor='black', alpha=0.7)
axes[0, 0].set_title('Basic Histogram')
# ๋ฐ๋ ํ์คํ ๊ทธ๋จ
axes[0, 1].hist(data, bins=30, density=True, edgecolor='black', alpha=0.7)
# ์ ๊ท๋ถํฌ ๊ณก์ ์ถ๊ฐ
x = np.linspace(-4, 4, 100)
from scipy import stats
axes[0, 1].plot(x, stats.norm.pdf(x), 'r-', linewidth=2)
axes[0, 1].set_title('Density Histogram with Normal Curve')
# ๋์ ํ์คํ ๊ทธ๋จ
axes[1, 0].hist(data, bins=30, cumulative=True, edgecolor='black', alpha=0.7)
axes[1, 0].set_title('Cumulative Histogram')
# ์ฌ๋ฌ ๋ฐ์ดํฐ ๋น๊ต
data1 = np.random.randn(1000)
data2 = np.random.randn(1000) + 2
axes[1, 1].hist(data1, bins=30, alpha=0.5, label='Data 1', edgecolor='black')
axes[1, 1].hist(data2, bins=30, alpha=0.5, label='Data 2', edgecolor='black')
axes[1, 1].legend()
axes[1, 1].set_title('Overlapping Histograms')
plt.tight_layout()
plt.show()
5. ์ฐ์ ๋ (Scatter Plot)¶
5.1 ๊ธฐ๋ณธ ์ฐ์ ๋¶
np.random.seed(42)
x = np.random.randn(100)
y = 2 * x + np.random.randn(100) * 0.5
fig, ax = plt.subplots(figsize=(10, 6))
ax.scatter(x, y, alpha=0.7, edgecolors='black', s=50)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_title('Basic Scatter Plot')
# ์ถ์ธ์ ์ถ๊ฐ
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
ax.plot(x, p(x), "r--", linewidth=2, label=f'Trend: y={z[0]:.2f}x+{z[1]:.2f}')
ax.legend()
plt.show()
5.2 ๋ฒ๋ธ ์ฐจํธ¶
np.random.seed(42)
x = np.random.rand(50)
y = np.random.rand(50)
sizes = np.random.rand(50) * 500
colors = np.random.rand(50)
fig, ax = plt.subplots(figsize=(10, 8))
scatter = ax.scatter(x, y, s=sizes, c=colors, alpha=0.6,
cmap='viridis', edgecolors='black')
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_title('Bubble Chart')
# ์ปฌ๋ฌ๋ฐ ์ถ๊ฐ
cbar = plt.colorbar(scatter)
cbar.set_label('Color Value')
plt.show()
5.3 ์นดํ ๊ณ ๋ฆฌ๋ณ ์ฐ์ ๋¶
np.random.seed(42)
categories = ['A', 'B', 'C']
colors = ['red', 'blue', 'green']
fig, ax = plt.subplots(figsize=(10, 6))
for cat, color in zip(categories, colors):
x = np.random.randn(30) + ord(cat) - 65 # A=0, B=1, C=2
y = np.random.randn(30) + ord(cat) - 65
ax.scatter(x, y, c=color, label=cat, alpha=0.7, s=50, edgecolors='black')
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_title('Scatter Plot by Category')
ax.legend()
plt.show()
6. ํ์ด ์ฐจํธ (Pie Chart)¶
labels = ['Product A', 'Product B', 'Product C', 'Product D', 'Others']
sizes = [30, 25, 20, 15, 10]
colors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99', '#c2c2f0']
explode = (0.05, 0, 0, 0, 0) # ์ฒซ ๋ฒ์งธ ์กฐ๊ฐ ๋ถ๋ฆฌ
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# Basic ํ์ด ์ฐจํธ
axes[0].pie(sizes, labels=labels, colors=colors, explode=explode,
autopct='%1.1f%%', shadow=True, startangle=90)
axes[0].set_title('Basic Pie Chart')
# ๋๋ ์ฐจํธ
wedges, texts, autotexts = axes[1].pie(sizes, colors=colors, explode=explode,
autopct='%1.1f%%', startangle=90,
pctdistance=0.85)
centre_circle = plt.Circle((0,0), 0.70, fc='white')
axes[1].add_artist(centre_circle)
axes[1].legend(wedges, labels, loc='center left', bbox_to_anchor=(1, 0.5))
axes[1].set_title('Donut Chart')
plt.tight_layout()
plt.show()
7. ๋ฐ์ค ํ๋กฏ (Box Plot)¶
np.random.seed(42)
data = [np.random.normal(0, std, 100) for std in range(1, 5)]
fig, axes = plt.subplots(1, 2, figsize=(14, 6))
# Basic ๋ฐ์คํ๋กฏ
bp = axes[0].boxplot(data, labels=['A', 'B', 'C', 'D'])
axes[0].set_title('Basic Box Plot')
axes[0].set_ylabel('Value')
# ์ปค์คํฐ๋ง์ด์ง๋ ๋ฐ์คํ๋กฏ
bp = axes[1].boxplot(data, labels=['A', 'B', 'C', 'D'],
patch_artist=True, # ๋ฐ์ค ์์ ์ฑ์ฐ๊ธฐ
notch=True, # ๋
ธ์น (์ ๋ขฐ๊ตฌ๊ฐ)
showmeans=True, # ํ๊ท ํ์
meanline=True) # ํ๊ท ์
colors = ['pink', 'lightblue', 'lightgreen', 'lightyellow']
for patch, color in zip(bp['boxes'], colors):
patch.set_facecolor(color)
axes[1].set_title('Customized Box Plot')
axes[1].set_ylabel('Value')
plt.tight_layout()
plt.show()
# ์ํ ๋ฐ์คํ๋กฏ
fig, ax = plt.subplots(figsize=(10, 6))
ax.boxplot(data, labels=['A', 'B', 'C', 'D'], vert=False)
ax.set_title('Horizontal Box Plot')
plt.show()
8. ํํธ๋งต (Heatmap)¶
# ์๊ดํ๋ ฌ ํํธ๋งต
np.random.seed(42)
data = np.random.randn(10, 5)
df = pd.DataFrame(data, columns=['A', 'B', 'C', 'D', 'E'])
correlation = df.corr()
fig, ax = plt.subplots(figsize=(10, 8))
im = ax.imshow(correlation, cmap='coolwarm', aspect='auto', vmin=-1, vmax=1)
# ์ถ ๋ ์ด๋ธ
ax.set_xticks(range(len(correlation.columns)))
ax.set_yticks(range(len(correlation.columns)))
ax.set_xticklabels(correlation.columns)
ax.set_yticklabels(correlation.columns)
# ๊ฐ ํ์
for i in range(len(correlation)):
for j in range(len(correlation)):
text = ax.text(j, i, f'{correlation.iloc[i, j]:.2f}',
ha='center', va='center', color='black')
# ์ปฌ๋ฌ๋ฐ
cbar = plt.colorbar(im)
cbar.set_label('Correlation')
ax.set_title('Correlation Heatmap')
plt.tight_layout()
plt.show()
9. ์คํ์ผ๊ณผ ํ ๋ง¶
# Usage ๊ฐ๋ฅํ ์คํ์ผ ํ์ธ
print(plt.style.available)
# ์คํ์ผ ์ ์ฉ
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
styles = ['default', 'seaborn-v0_8', 'ggplot', 'dark_background']
x = np.linspace(0, 10, 100)
for ax, style in zip(axes.flat, styles):
with plt.style.context(style):
ax.plot(x, np.sin(x), label='sin')
ax.plot(x, np.cos(x), label='cos')
ax.set_title(f'Style: {style}')
ax.legend()
plt.tight_layout()
plt.show()
# ์ ์ญ ์คํ์ผ ์ค์
# plt.style.use('seaborn-v0_8')
10. ๊ทธ๋ํ ์ปค์คํฐ๋ง์ด์ง¶
# ํฐํธ ์ค์
plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['font.size'] = 12
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['axes.labelsize'] = 12
# ๊ทธ๋ํ ์์ ์ปค์คํฐ๋ง์ด์ง
fig, ax = plt.subplots(figsize=(12, 6))
x = np.linspace(0, 10, 100)
ax.plot(x, np.sin(x), linewidth=2, color='navy', label='sin(x)')
# ์ถ ๋ฒ์
ax.set_xlim(0, 10)
ax.set_ylim(-1.5, 1.5)
# ๋๊ธ
ax.set_xticks(np.arange(0, 11, 2))
ax.set_yticks(np.arange(-1, 1.5, 0.5))
# ๊ทธ๋ฆฌ๋
ax.grid(True, linestyle='--', alpha=0.5)
# ์ฃผ์
ax.annotate('Peak', xy=(np.pi/2, 1), xytext=(np.pi/2 + 1, 1.3),
arrowprops=dict(facecolor='black', shrink=0.05),
fontsize=12)
# ํ
์คํธ
ax.text(5, -1.3, 'Note: This is a sine wave', fontsize=10, style='italic')
# ์ ๋ชฉ๊ณผ ๋ ์ด๋ธ
ax.set_title('Customized Sine Wave Plot', fontsize=16, fontweight='bold')
ax.set_xlabel('X axis', fontsize=12)
ax.set_ylabel('Y axis', fontsize=12)
# ๋ฒ๋ก
ax.legend(loc='upper right', frameon=True, shadow=True)
# ์คํ์ธ (ํ
๋๋ฆฌ)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.tight_layout()
plt.show()
Summary¶
| ์ฐจํธ ์ ํ | ํจ์ | ์ฉ๋ |
|---|---|---|
| ์ ๊ทธ๋ํ | plot() |
์๊ณ์ด, ์ฐ์ ๋ฐ์ดํฐ |
| ๋ง๋ ๊ทธ๋ํ | bar(), barh() |
๋ฒ์ฃผํ ๋น๊ต |
| ํ์คํ ๊ทธ๋จ | hist() |
๋ถํฌ ํ์ธ |
| ์ฐ์ ๋ | scatter() |
๋ ๋ณ์ ๊ด๊ณ |
| ํ์ด ์ฐจํธ | pie() |
๋น์จ, ๊ตฌ์ฑ |
| ๋ฐ์ค ํ๋กฏ | boxplot() |
๋ถํฌ, ์ด์์น |
| ํํธ๋งต | imshow() |
ํ๋ ฌ ๋ฐ์ดํฐ |
| ์ปค์คํฐ๋ง์ด์ง | ๋ฉ์๋ |
|---|---|
| ์ ๋ชฉ/๋ ์ด๋ธ | set_title(), set_xlabel(), set_ylabel() |
| ๋ฒ์ | set_xlim(), set_ylim() |
| ๋๊ธ | set_xticks(), set_yticks() |
| ๋ฒ๋ก | legend() |
| ๊ทธ๋ฆฌ๋ | grid() |
| ์ ์ฅ | savefig() |