8. Data Visualization Basics (Matplotlib)

8. Data Visualization Basics (Matplotlib)

Previous: Descriptive Stats & EDA | Next: Data Visualization Advanced

Overview

Matplotlib is Python's representative visualization library. This covers various chart types and customization methods.


1. Matplotlib ๊ธฐ์ดˆ

1.1 ๊ธฐ๋ณธ ํ”Œ๋กฏ ์ƒ์„ฑ

import matplotlib.pyplot as plt
import numpy as np

# ๋ฐ์ดํ„ฐ ์ค€๋น„
x = np.linspace(0, 10, 100)
y = np.sin(x)

# Basic ํ”Œ๋กฏ
plt.plot(x, y)
plt.show()

# ์ œ๋ชฉ๊ณผ ๋ ˆ์ด๋ธ” ์ถ”๊ฐ€
plt.plot(x, y)
plt.title('Sine Wave')
plt.xlabel('X axis')
plt.ylabel('Y axis')
plt.show()

# ์ €์žฅ
plt.plot(x, y)
plt.savefig('plot.png', dpi=300, bbox_inches='tight')
plt.close()

1.2 Figure์™€ Axes

# ๊ฐ์ฒด ์ง€ํ–ฅ ๋ฐฉ์‹ (๊ถŒ์žฅ)
fig, ax = plt.subplots(figsize=(10, 6))

x = np.linspace(0, 10, 100)
ax.plot(x, np.sin(x), label='sin')
ax.plot(x, np.cos(x), label='cos')

ax.set_title('Trigonometric Functions', fontsize=14)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

1.3 ์—ฌ๋Ÿฌ ํ”Œ๋กฏ (Subplots)

# 2x2 ์„œ๋ธŒํ”Œ๋กฏ
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

x = np.linspace(0, 10, 100)

axes[0, 0].plot(x, np.sin(x))
axes[0, 0].set_title('Sine')

axes[0, 1].plot(x, np.cos(x))
axes[0, 1].set_title('Cosine')

axes[1, 0].plot(x, np.exp(-x/5) * np.sin(x))
axes[1, 0].set_title('Damped Sine')

axes[1, 1].plot(x, np.tan(x))
axes[1, 1].set_ylim(-5, 5)
axes[1, 1].set_title('Tangent')

plt.tight_layout()
plt.show()

# ๋‹ค๋ฅธ ํฌ๊ธฐ์˜ ์„œ๋ธŒํ”Œ๋กฏ
fig = plt.figure(figsize=(12, 6))

ax1 = fig.add_subplot(1, 2, 1)  # 1ํ–‰ 2์—ด์˜ 1๋ฒˆ์งธ
ax2 = fig.add_subplot(2, 2, 2)  # 2ํ–‰ 2์—ด์˜ 2๋ฒˆ์งธ
ax3 = fig.add_subplot(2, 2, 4)  # 2ํ–‰ 2์—ด์˜ 4๋ฒˆ์งธ

ax1.plot(x, np.sin(x))
ax2.plot(x, np.cos(x))
ax3.plot(x, np.tan(x))

plt.tight_layout()
plt.show()

2. ์„  ๊ทธ๋ž˜ํ”„ (Line Plot)

2.1 ๊ธฐ๋ณธ ์„  ๊ทธ๋ž˜ํ”„

x = np.arange(1, 11)
y1 = x ** 2
y2 = x ** 1.5

fig, ax = plt.subplots(figsize=(10, 6))

ax.plot(x, y1, label='xยฒ')
ax.plot(x, y2, label='x^1.5')

ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_title('Power Functions')
ax.legend()
ax.grid(True, alpha=0.3)

plt.show()

2.2 ์„  ์Šคํƒ€์ผ ์ปค์Šคํ„ฐ๋งˆ์ด์ง•

x = np.linspace(0, 10, 50)

fig, ax = plt.subplots(figsize=(12, 6))

# ๋‹ค์–‘ํ•œ ์Šคํƒ€์ผ
ax.plot(x, np.sin(x), 'b-', linewidth=2, label='์‹ค์„ ')
ax.plot(x, np.sin(x + 1), 'r--', linewidth=2, label='์ ์„ ')
ax.plot(x, np.sin(x + 2), 'g-.', linewidth=2, label='์ ์„ +์‹ค์„ ')
ax.plot(x, np.sin(x + 3), 'm:', linewidth=2, label='์ ')

# ๋งˆ์ปค ์ถ”๊ฐ€
ax.plot(x[::5], np.sin(x[::5] + 4), 'ko-', markersize=8, label='๋งˆ์ปค')

ax.legend()
ax.set_title('Line Styles')
plt.show()

# ์„  ์Šคํƒ€์ผ ์˜ต์…˜
# '-': ์‹ค์„ , '--': ์ ์„ , '-.': ์ ์„ +์‹ค์„ , ':': ์ 
# ์ƒ‰์ƒ: 'b'(blue), 'g'(green), 'r'(red), 'c'(cyan), 'm'(magenta), 'y'(yellow), 'k'(black), 'w'(white)
# ๋งˆ์ปค: 'o'(์›), 's'(์‚ฌ๊ฐ), '^'(์‚ผ๊ฐ), 'd'(๋‹ค์ด์•„๋ชฌ๋“œ), 'x', '+', '*'

2.3 ์‹œ๊ณ„์—ด ๊ทธ๋ž˜ํ”„

import pandas as pd

# ์‹œ๊ณ„์—ด ๋ฐ์ดํ„ฐ
dates = pd.date_range('2023-01-01', periods=365, freq='D')
values = np.cumsum(np.random.randn(365)) + 100

fig, ax = plt.subplots(figsize=(14, 6))

ax.plot(dates, values, 'b-', linewidth=1)
ax.fill_between(dates, values, alpha=0.3)

ax.set_xlabel('Date')
ax.set_ylabel('Value')
ax.set_title('Time Series Plot')

# x์ถ• ๋‚ ์งœ ํฌ๋งท
import matplotlib.dates as mdates
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

3. ๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„ (Bar Chart)

3.1 ์ˆ˜์ง ๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„

categories = ['A', 'B', 'C', 'D', 'E']
values = [23, 45, 56, 78, 32]

fig, ax = plt.subplots(figsize=(10, 6))

bars = ax.bar(categories, values, color='steelblue', edgecolor='black')

# ๊ฐ’ ๋ ˆ์ด๋ธ” ์ถ”๊ฐ€
for bar, val in zip(bars, values):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
            str(val), ha='center', va='bottom', fontsize=12)

ax.set_xlabel('Category')
ax.set_ylabel('Value')
ax.set_title('Vertical Bar Chart')

plt.show()

3.2 ์ˆ˜ํ‰ ๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„

categories = ['Very Long Category A', 'Category B', 'Category C', 'Category D']
values = [45, 32, 67, 54]

fig, ax = plt.subplots(figsize=(10, 6))

bars = ax.barh(categories, values, color='coral', edgecolor='black')

# ๊ฐ’ ๋ ˆ์ด๋ธ”
for bar, val in zip(bars, values):
    ax.text(bar.get_width() + 1, bar.get_y() + bar.get_height()/2,
            str(val), ha='left', va='center')

ax.set_xlabel('Value')
ax.set_title('Horizontal Bar Chart')

plt.show()

3.3 ๊ทธ๋ฃน ๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„

categories = ['Q1', 'Q2', 'Q3', 'Q4']
series1 = [20, 35, 30, 35]
series2 = [25, 32, 34, 20]
series3 = [22, 28, 36, 25]

x = np.arange(len(categories))
width = 0.25

fig, ax = plt.subplots(figsize=(12, 6))

bars1 = ax.bar(x - width, series1, width, label='2021', color='steelblue')
bars2 = ax.bar(x, series2, width, label='2022', color='coral')
bars3 = ax.bar(x + width, series3, width, label='2023', color='green')

ax.set_xlabel('Quarter')
ax.set_ylabel('Sales')
ax.set_title('Quarterly Sales Comparison')
ax.set_xticks(x)
ax.set_xticklabels(categories)
ax.legend()

plt.tight_layout()
plt.show()

3.4 ์Šคํƒ ๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„

categories = ['A', 'B', 'C', 'D']
values1 = [20, 35, 30, 35]
values2 = [25, 32, 34, 20]
values3 = [15, 25, 20, 30]

fig, ax = plt.subplots(figsize=(10, 6))

ax.bar(categories, values1, label='Series 1', color='steelblue')
ax.bar(categories, values2, bottom=values1, label='Series 2', color='coral')
ax.bar(categories, values3, bottom=np.array(values1) + np.array(values2),
       label='Series 3', color='green')

ax.set_xlabel('Category')
ax.set_ylabel('Value')
ax.set_title('Stacked Bar Chart')
ax.legend()

plt.show()

4. ํžˆ์Šคํ† ๊ทธ๋žจ (Histogram)

# ์ •๊ทœ๋ถ„ํฌ ๋ฐ์ดํ„ฐ
np.random.seed(42)
data = np.random.randn(1000)

fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Basic ํžˆ์Šคํ† ๊ทธ๋žจ
axes[0, 0].hist(data, bins=30, edgecolor='black', alpha=0.7)
axes[0, 0].set_title('Basic Histogram')

# ๋ฐ€๋„ ํžˆ์Šคํ† ๊ทธ๋žจ
axes[0, 1].hist(data, bins=30, density=True, edgecolor='black', alpha=0.7)
# ์ •๊ทœ๋ถ„ํฌ ๊ณก์„  ์ถ”๊ฐ€
x = np.linspace(-4, 4, 100)
from scipy import stats
axes[0, 1].plot(x, stats.norm.pdf(x), 'r-', linewidth=2)
axes[0, 1].set_title('Density Histogram with Normal Curve')

# ๋ˆ„์  ํžˆ์Šคํ† ๊ทธ๋žจ
axes[1, 0].hist(data, bins=30, cumulative=True, edgecolor='black', alpha=0.7)
axes[1, 0].set_title('Cumulative Histogram')

# ์—ฌ๋Ÿฌ ๋ฐ์ดํ„ฐ ๋น„๊ต
data1 = np.random.randn(1000)
data2 = np.random.randn(1000) + 2
axes[1, 1].hist(data1, bins=30, alpha=0.5, label='Data 1', edgecolor='black')
axes[1, 1].hist(data2, bins=30, alpha=0.5, label='Data 2', edgecolor='black')
axes[1, 1].legend()
axes[1, 1].set_title('Overlapping Histograms')

plt.tight_layout()
plt.show()

5. ์‚ฐ์ ๋„ (Scatter Plot)

5.1 ๊ธฐ๋ณธ ์‚ฐ์ ๋„

np.random.seed(42)
x = np.random.randn(100)
y = 2 * x + np.random.randn(100) * 0.5

fig, ax = plt.subplots(figsize=(10, 6))

ax.scatter(x, y, alpha=0.7, edgecolors='black', s=50)

ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_title('Basic Scatter Plot')

# ์ถ”์„ธ์„  ์ถ”๊ฐ€
z = np.polyfit(x, y, 1)
p = np.poly1d(z)
ax.plot(x, p(x), "r--", linewidth=2, label=f'Trend: y={z[0]:.2f}x+{z[1]:.2f}')
ax.legend()

plt.show()

5.2 ๋ฒ„๋ธ” ์ฐจํŠธ

np.random.seed(42)
x = np.random.rand(50)
y = np.random.rand(50)
sizes = np.random.rand(50) * 500
colors = np.random.rand(50)

fig, ax = plt.subplots(figsize=(10, 8))

scatter = ax.scatter(x, y, s=sizes, c=colors, alpha=0.6,
                     cmap='viridis', edgecolors='black')

ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_title('Bubble Chart')

# ์ปฌ๋Ÿฌ๋ฐ” ์ถ”๊ฐ€
cbar = plt.colorbar(scatter)
cbar.set_label('Color Value')

plt.show()

5.3 ์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ์‚ฐ์ ๋„

np.random.seed(42)

categories = ['A', 'B', 'C']
colors = ['red', 'blue', 'green']

fig, ax = plt.subplots(figsize=(10, 6))

for cat, color in zip(categories, colors):
    x = np.random.randn(30) + ord(cat) - 65  # A=0, B=1, C=2
    y = np.random.randn(30) + ord(cat) - 65
    ax.scatter(x, y, c=color, label=cat, alpha=0.7, s=50, edgecolors='black')

ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_title('Scatter Plot by Category')
ax.legend()

plt.show()

6. ํŒŒ์ด ์ฐจํŠธ (Pie Chart)

labels = ['Product A', 'Product B', 'Product C', 'Product D', 'Others']
sizes = [30, 25, 20, 15, 10]
colors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99', '#c2c2f0']
explode = (0.05, 0, 0, 0, 0)  # ์ฒซ ๋ฒˆ์งธ ์กฐ๊ฐ ๋ถ„๋ฆฌ

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Basic ํŒŒ์ด ์ฐจํŠธ
axes[0].pie(sizes, labels=labels, colors=colors, explode=explode,
            autopct='%1.1f%%', shadow=True, startangle=90)
axes[0].set_title('Basic Pie Chart')

# ๋„๋„› ์ฐจํŠธ
wedges, texts, autotexts = axes[1].pie(sizes, colors=colors, explode=explode,
                                        autopct='%1.1f%%', startangle=90,
                                        pctdistance=0.85)
centre_circle = plt.Circle((0,0), 0.70, fc='white')
axes[1].add_artist(centre_circle)
axes[1].legend(wedges, labels, loc='center left', bbox_to_anchor=(1, 0.5))
axes[1].set_title('Donut Chart')

plt.tight_layout()
plt.show()

7. ๋ฐ•์Šค ํ”Œ๋กฏ (Box Plot)

np.random.seed(42)
data = [np.random.normal(0, std, 100) for std in range(1, 5)]

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# Basic ๋ฐ•์Šคํ”Œ๋กฏ
bp = axes[0].boxplot(data, labels=['A', 'B', 'C', 'D'])
axes[0].set_title('Basic Box Plot')
axes[0].set_ylabel('Value')

# ์ปค์Šคํ„ฐ๋งˆ์ด์ง•๋œ ๋ฐ•์Šคํ”Œ๋กฏ
bp = axes[1].boxplot(data, labels=['A', 'B', 'C', 'D'],
                     patch_artist=True,  # ๋ฐ•์Šค ์ƒ‰์ƒ ์ฑ„์šฐ๊ธฐ
                     notch=True,         # ๋…ธ์น˜ (์‹ ๋ขฐ๊ตฌ๊ฐ„)
                     showmeans=True,     # ํ‰๊ท  ํ‘œ์‹œ
                     meanline=True)      # ํ‰๊ท ์„ 

colors = ['pink', 'lightblue', 'lightgreen', 'lightyellow']
for patch, color in zip(bp['boxes'], colors):
    patch.set_facecolor(color)

axes[1].set_title('Customized Box Plot')
axes[1].set_ylabel('Value')

plt.tight_layout()
plt.show()

# ์ˆ˜ํ‰ ๋ฐ•์Šคํ”Œ๋กฏ
fig, ax = plt.subplots(figsize=(10, 6))
ax.boxplot(data, labels=['A', 'B', 'C', 'D'], vert=False)
ax.set_title('Horizontal Box Plot')
plt.show()

8. ํžˆํŠธ๋งต (Heatmap)

# ์ƒ๊ด€ํ–‰๋ ฌ ํžˆํŠธ๋งต
np.random.seed(42)
data = np.random.randn(10, 5)
df = pd.DataFrame(data, columns=['A', 'B', 'C', 'D', 'E'])
correlation = df.corr()

fig, ax = plt.subplots(figsize=(10, 8))

im = ax.imshow(correlation, cmap='coolwarm', aspect='auto', vmin=-1, vmax=1)

# ์ถ• ๋ ˆ์ด๋ธ”
ax.set_xticks(range(len(correlation.columns)))
ax.set_yticks(range(len(correlation.columns)))
ax.set_xticklabels(correlation.columns)
ax.set_yticklabels(correlation.columns)

# ๊ฐ’ ํ‘œ์‹œ
for i in range(len(correlation)):
    for j in range(len(correlation)):
        text = ax.text(j, i, f'{correlation.iloc[i, j]:.2f}',
                       ha='center', va='center', color='black')

# ์ปฌ๋Ÿฌ๋ฐ”
cbar = plt.colorbar(im)
cbar.set_label('Correlation')

ax.set_title('Correlation Heatmap')
plt.tight_layout()
plt.show()

9. ์Šคํƒ€์ผ๊ณผ ํ…Œ๋งˆ

# Usage ๊ฐ€๋Šฅํ•œ ์Šคํƒ€์ผ ํ™•์ธ
print(plt.style.available)

# ์Šคํƒ€์ผ ์ ์šฉ
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
styles = ['default', 'seaborn-v0_8', 'ggplot', 'dark_background']

x = np.linspace(0, 10, 100)

for ax, style in zip(axes.flat, styles):
    with plt.style.context(style):
        ax.plot(x, np.sin(x), label='sin')
        ax.plot(x, np.cos(x), label='cos')
        ax.set_title(f'Style: {style}')
        ax.legend()

plt.tight_layout()
plt.show()

# ์ „์—ญ ์Šคํƒ€์ผ ์„ค์ •
# plt.style.use('seaborn-v0_8')

10. ๊ทธ๋ž˜ํ”„ ์ปค์Šคํ„ฐ๋งˆ์ด์ง•

# ํฐํŠธ ์„ค์ •
plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['font.size'] = 12
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['axes.labelsize'] = 12

# ๊ทธ๋ž˜ํ”„ ์š”์†Œ ์ปค์Šคํ„ฐ๋งˆ์ด์ง•
fig, ax = plt.subplots(figsize=(12, 6))

x = np.linspace(0, 10, 100)
ax.plot(x, np.sin(x), linewidth=2, color='navy', label='sin(x)')

# ์ถ• ๋ฒ”์œ„
ax.set_xlim(0, 10)
ax.set_ylim(-1.5, 1.5)

# ๋ˆˆ๊ธˆ
ax.set_xticks(np.arange(0, 11, 2))
ax.set_yticks(np.arange(-1, 1.5, 0.5))

# ๊ทธ๋ฆฌ๋“œ
ax.grid(True, linestyle='--', alpha=0.5)

# ์ฃผ์„
ax.annotate('Peak', xy=(np.pi/2, 1), xytext=(np.pi/2 + 1, 1.3),
            arrowprops=dict(facecolor='black', shrink=0.05),
            fontsize=12)

# ํ…์ŠคํŠธ
ax.text(5, -1.3, 'Note: This is a sine wave', fontsize=10, style='italic')

# ์ œ๋ชฉ๊ณผ ๋ ˆ์ด๋ธ”
ax.set_title('Customized Sine Wave Plot', fontsize=16, fontweight='bold')
ax.set_xlabel('X axis', fontsize=12)
ax.set_ylabel('Y axis', fontsize=12)

# ๋ฒ”๋ก€
ax.legend(loc='upper right', frameon=True, shadow=True)

# ์ŠคํŒŒ์ธ (ํ…Œ๋‘๋ฆฌ)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

plt.tight_layout()
plt.show()

Summary

์ฐจํŠธ ์œ ํ˜• ํ•จ์ˆ˜ ์šฉ๋„
์„  ๊ทธ๋ž˜ํ”„ plot() ์‹œ๊ณ„์—ด, ์—ฐ์† ๋ฐ์ดํ„ฐ
๋ง‰๋Œ€ ๊ทธ๋ž˜ํ”„ bar(), barh() ๋ฒ”์ฃผํ˜• ๋น„๊ต
ํžˆ์Šคํ† ๊ทธ๋žจ hist() ๋ถ„ํฌ ํ™•์ธ
์‚ฐ์ ๋„ scatter() ๋‘ ๋ณ€์ˆ˜ ๊ด€๊ณ„
ํŒŒ์ด ์ฐจํŠธ pie() ๋น„์œจ, ๊ตฌ์„ฑ
๋ฐ•์Šค ํ”Œ๋กฏ boxplot() ๋ถ„ํฌ, ์ด์ƒ์น˜
ํžˆํŠธ๋งต imshow() ํ–‰๋ ฌ ๋ฐ์ดํ„ฐ
์ปค์Šคํ„ฐ๋งˆ์ด์ง• ๋ฉ”์„œ๋“œ
์ œ๋ชฉ/๋ ˆ์ด๋ธ” set_title(), set_xlabel(), set_ylabel()
๋ฒ”์œ„ set_xlim(), set_ylim()
๋ˆˆ๊ธˆ set_xticks(), set_yticks()
๋ฒ”๋ก€ legend()
๊ทธ๋ฆฌ๋“œ grid()
์ €์žฅ savefig()
to navigate between lessons