09. Edge AI - ONNX Runtime
09. Edge AI - ONNX Runtime¶
νμ΅ λͺ©ν¶
- ONNX(Open Neural Network Exchange) κ°μ μ΄ν΄
- ONNX Runtime μ€μΉ λ° μ¬μ©λ² μ΅λ
- λͺ¨λΈ μ΅μ ν κΈ°λ² νμ΅
- λΌμ¦λ² 리νμ΄ λ°°ν¬
- κ°μ²΄ κ²μΆ μμ ꡬν
1. ONNX κ°μ¶
1.1 ONNXλ?¶
ONNX(Open Neural Network Exchange)λ λ€μν ML νλ μμν¬ κ° λͺ¨λΈ νΈνμ±μ μ 곡νλ μ€ν ν¬λ§·μ λλ€.
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
β ONNX μνκ³ β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ€
β β
β νλ ¨ νλ μμν¬ μΆλ‘ μμ§ β
β ββββββββββββ β
β β PyTorch ββββββ β
β ββββββββββββ β β
β ββββββββββββ β ββββββββββββ ββββββββββββββββ β
β βTensorFlowββββββΌβββββββΆβ ONNX βββββΆβONNX Runtime β β
β ββββββββββββ β β (.onnx) β β(ν¬λ‘μ€νλ«νΌ)β β
β ββββββββββββ β ββββββββββββ ββββββββββββββββ β
β β Keras ββββββ€ β β
β ββββββββββββ β βΌ β
β ββββββββββββ β ββββββββββββββββ β
β β Sklearn ββββββ β λ°°ν¬ λμ β β
β ββββββββββββ β β’ λΌμ¦λ² 리νμ΄β β
β β β’ Windows β β
β β β’ Android β β
β β β’ iOS β β
β ββββββββββββββββ β
β β
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
1.2 ONNX vs TFLite¶
| νΉμ± | ONNX | TFLite |
|---|---|---|
| κ°λ°μ¬ | Microsoft + ννΈλ | |
| νλ μμν¬ μ§μ | PyTorch, TF, Sklearn λ± | TensorFlow/Keras |
| ν¬λ§· | .onnx (Protobuf) | .tflite (FlatBuffer) |
| μ΅μ ν | ONNX Runtime | TF Lite Interpreter |
| μμν | μ§μ | μ§μ |
| νλμ¨μ΄ | CPU, GPU, NPU | CPU, GPU, Edge TPU |
1.3 ONNX Runtime νΉμ§¶
# ONNX Runtime νΉμ§
onnx_runtime_features = {
"ν¬λ‘μ€νλ«νΌ": "Windows, Linux, macOS, Android, iOS",
"νλμ¨μ΄ κ°μ": "CPU, CUDA, TensorRT, DirectML, OpenVINO",
"λ€μ€ μΈμ΄": "Python, C++, C#, Java, JavaScript",
"μ΅μ ν": "κ·Έλν μ΅μ ν, μμν, μ°μ°μ ν¨μ ",
"μ μ°μ±": "λ€μν νλ μμν¬μμ λ³νλ λͺ¨λΈ μ€ν"
}
2. ONNX Runtime μ€μΉ¶
2.1 λΌμ¦λ² 리νμ΄ μ€μΉ¶
# κΈ°λ³Έ ONNX Runtime (CPU)
pip install onnxruntime
# ARM64 μ΅μ ν λ²μ (λΌμ¦λ² 리νμ΄ OS 64bit)
# pip install onnxruntime --extra-index-url https://aiinfra.pkgs.visualstudio.com/...
# μΆκ° ν¨ν€μ§
pip install numpy pillow onnx
# λͺ¨λΈ λ³νμ© (PCμμ)
pip install tf2onnx torch onnx-simplifier
2.2 μ€μΉ νμΈ¶
#!/usr/bin/env python3
"""ONNX Runtime μ€μΉ νμΈ"""
import onnxruntime as ort
import numpy as np
# λ²μ νμΈ
print(f"ONNX Runtime λ²μ : {ort.__version__}")
# μ¬μ© κ°λ₯ν νλ‘λ°μ΄λ (μ€ν λ°±μλ)
providers = ort.get_available_providers()
print(f"μ¬μ© κ°λ₯ν νλ‘λ°μ΄λ: {providers}")
# κ°λ¨ν ν
μ€νΈ
# λλ―Έ λͺ¨λΈ μ€ν
session_options = ort.SessionOptions()
session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
print("\nONNX Runtime μ μ λμ νμΈ!")
3. λͺ¨λΈ λ³ν¶
3.1 PyTorch to ONNX¶
#!/usr/bin/env python3
"""PyTorch λͺ¨λΈμ ONNXλ‘ λ³ν"""
import torch
import torch.nn as nn
# μμ λͺ¨λΈ
class SimpleNet(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(10, 50)
self.fc2 = nn.Linear(50, 3)
def forward(self, x):
x = torch.relu(self.fc1(x))
return self.fc2(x)
def export_to_onnx(model, output_path: str, input_shape: tuple):
"""PyTorch λͺ¨λΈμ ONNXλ‘ λ΄λ³΄λ΄κΈ°"""
model.eval()
# λλ―Έ μ
λ ₯
dummy_input = torch.randn(*input_shape)
# ONNX λ΄λ³΄λ΄κΈ°
torch.onnx.export(
model,
dummy_input,
output_path,
input_names=['input'],
output_names=['output'],
dynamic_axes={
'input': {0: 'batch_size'},
'output': {0: 'batch_size'}
},
opset_version=13
)
print(f"ONNX λͺ¨λΈ μ μ₯: {output_path}")
# μ¬μ© μ
if __name__ == "__main__":
model = SimpleNet()
export_to_onnx(model, "simple_net.onnx", (1, 10))
3.2 TensorFlow/Keras to ONNX¶
# tf2onnx μ¬μ© (컀맨λλΌμΈ)
python -m tf2onnx.convert \
--saved-model tensorflow_model/ \
--output model.onnx \
--opset 13
#!/usr/bin/env python3
"""TensorFlow/Keras λͺ¨λΈμ ONNXλ‘ λ³ν"""
import tensorflow as tf
import tf2onnx
import onnx
def keras_to_onnx(model_path: str, output_path: str):
"""Keras λͺ¨λΈμ ONNXλ‘ λ³ν"""
# Keras λͺ¨λΈ λ‘λ
model = tf.keras.models.load_model(model_path)
# ONNXλ‘ λ³ν
onnx_model, _ = tf2onnx.convert.from_keras(
model,
opset=13,
output_path=output_path
)
print(f"λ³ν μλ£: {output_path}")
# μ¬μ© μ
keras_to_onnx("my_model.h5", "my_model.onnx")
3.3 λͺ¨λΈ κ²μ¦ λ° λ¨μν¶
#!/usr/bin/env python3
"""ONNX λͺ¨λΈ κ²μ¦ λ° λ¨μν"""
import onnx
from onnxsim import simplify
def validate_and_simplify(model_path: str, output_path: str = None):
"""ONNX λͺ¨λΈ κ²μ¦ λ° μ΅μ ν"""
# λͺ¨λΈ λ‘λ
model = onnx.load(model_path)
# κ²μ¦
try:
onnx.checker.check_model(model)
print("λͺ¨λΈ κ²μ¦ ν΅κ³Ό")
except Exception as e:
print(f"κ²μ¦ μ€ν¨: {e}")
return
# λͺ¨λΈ μ 보 μΆλ ₯
print(f"\nλͺ¨λΈ μ 보:")
print(f" IR λ²μ : {model.ir_version}")
print(f" Opset: {model.opset_import[0].version}")
print(f" κ·Έλν μ΄λ¦: {model.graph.name}")
# μ
μΆλ ₯ μ 보
print(f"\nμ
λ ₯:")
for input in model.graph.input:
print(f" {input.name}: {input.type}")
print(f"\nμΆλ ₯:")
for output in model.graph.output:
print(f" {output.name}: {output.type}")
# λ¨μν (μ€λ³΅ μ°μ° μ κ±°, κ·Έλν μ΅μ ν)
simplified_model, check = simplify(model)
if check:
print("\nλ¨μν μ±κ³΅")
if output_path:
onnx.save(simplified_model, output_path)
print(f"μ μ₯: {output_path}")
# ν¬κΈ° λΉκ΅
import os
orig_size = os.path.getsize(model_path) / 1024
new_size = os.path.getsize(output_path) / 1024
print(f"\nν¬κΈ°: {orig_size:.1f}KB -> {new_size:.1f}KB")
return simplified_model
else:
print("λ¨μν μ€ν¨")
return model
# μ¬μ© μ
if __name__ == "__main__":
validate_and_simplify("model.onnx", "model_simplified.onnx")
4. μΆλ‘ μν¶
4.1 κΈ°λ³Έ μΆλ‘ ¶
#!/usr/bin/env python3
"""ONNX Runtime κΈ°λ³Έ μΆλ‘ """
import onnxruntime as ort
import numpy as np
class ONNXModel:
"""ONNX λͺ¨λΈ λνΌ"""
def __init__(self, model_path: str, providers: list = None):
if providers is None:
providers = ['CPUExecutionProvider']
# μΈμ
μ΅μ
sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
sess_options.intra_op_num_threads = 4
# μΈμ
μμ±
self.session = ort.InferenceSession(
model_path,
sess_options=sess_options,
providers=providers
)
# μ
μΆλ ₯ μ 보
self.input_name = self.session.get_inputs()[0].name
self.input_shape = self.session.get_inputs()[0].shape
self.output_name = self.session.get_outputs()[0].name
def get_input_shape(self):
return self.input_shape
def predict(self, input_data: np.ndarray) -> np.ndarray:
"""μΆλ‘ μν"""
outputs = self.session.run(
[self.output_name],
{self.input_name: input_data}
)
return outputs[0]
# μ¬μ© μ
if __name__ == "__main__":
model = ONNXModel("model.onnx")
print(f"μ
λ ₯ νν: {model.get_input_shape()}")
# λλ―Έ μ
λ ₯
input_data = np.random.randn(1, 10).astype(np.float32)
output = model.predict(input_data)
print(f"μΆλ ₯ νν: {output.shape}")
print(f"μΆλ ₯ κ°: {output}")
4.2 λ°°μΉ μΆλ‘ ¶
#!/usr/bin/env python3
"""ONNX Runtime λ°°μΉ μΆλ‘ """
import onnxruntime as ort
import numpy as np
import time
def batch_inference(model_path: str, data: np.ndarray,
batch_size: int = 32) -> np.ndarray:
"""λ°°μΉ μΆλ‘ μν"""
session = ort.InferenceSession(model_path, providers=['CPUExecutionProvider'])
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name
results = []
num_samples = len(data)
for i in range(0, num_samples, batch_size):
batch = data[i:i + batch_size]
output = session.run([output_name], {input_name: batch})[0]
results.append(output)
return np.concatenate(results, axis=0)
# μ±λ₯ μΈ‘μ
def benchmark_batch_sizes(model_path: str, input_shape: tuple):
"""λ°°μΉ ν¬κΈ°λ³ μ±λ₯ λΉκ΅"""
session = ort.InferenceSession(model_path, providers=['CPUExecutionProvider'])
input_name = session.get_inputs()[0].name
total_samples = 1000
for batch_size in [1, 8, 16, 32, 64]:
data = np.random.randn(total_samples, *input_shape[1:]).astype(np.float32)
start = time.perf_counter()
for i in range(0, total_samples, batch_size):
batch = data[i:i + batch_size]
_ = session.run(None, {input_name: batch})
elapsed = time.perf_counter() - start
throughput = total_samples / elapsed
print(f"λ°°μΉ ν¬κΈ° {batch_size:2d}: {throughput:.1f} samples/sec")
4.3 μμν μΆλ‘ ¶
#!/usr/bin/env python3
"""ONNX Runtime μμν"""
import onnxruntime as ort
from onnxruntime.quantization import quantize_dynamic, QuantType
def quantize_model(model_path: str, output_path: str):
"""λμ μμν μ μ©"""
quantize_dynamic(
model_input=model_path,
model_output=output_path,
weight_type=QuantType.QInt8
)
import os
orig_size = os.path.getsize(model_path) / (1024 * 1024)
new_size = os.path.getsize(output_path) / (1024 * 1024)
print(f"μλ³Έ: {orig_size:.2f} MB")
print(f"μμν: {new_size:.2f} MB")
print(f"μμΆλ₯ : {orig_size / new_size:.1f}x")
# μ¬μ© μ
quantize_model("model.onnx", "model_quantized.onnx")
5. κ°μ²΄ κ²μΆ μμ ¶
5.1 YOLO ONNX λͺ¨λΈ μ¬μ©¶
#!/usr/bin/env python3
"""YOLOv5 ONNX κ°μ²΄ κ²μΆ"""
import onnxruntime as ort
import numpy as np
from PIL import Image
import cv2
class YOLODetector:
"""YOLOv5 ONNX κ°μ²΄ κ²μΆκΈ°"""
# COCO ν΄λμ€
CLASSES = [
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign',
'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv',
'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
def __init__(self, model_path: str, conf_threshold: float = 0.5,
iou_threshold: float = 0.45):
self.session = ort.InferenceSession(
model_path,
providers=['CPUExecutionProvider']
)
self.conf_threshold = conf_threshold
self.iou_threshold = iou_threshold
# μ
λ ₯ μ 보
input_info = self.session.get_inputs()[0]
self.input_name = input_info.name
self.input_shape = input_info.shape
self.input_height = self.input_shape[2]
self.input_width = self.input_shape[3]
def preprocess(self, image: np.ndarray) -> tuple:
"""μ΄λ―Έμ§ μ μ²λ¦¬"""
orig_height, orig_width = image.shape[:2]
# 리μ¬μ΄μ¦
resized = cv2.resize(image, (self.input_width, self.input_height))
# BGR to RGB, HWC to CHW
input_data = resized[:, :, ::-1].transpose(2, 0, 1)
# μ κ·ν (0-1)
input_data = input_data.astype(np.float32) / 255.0
# λ°°μΉ μ°¨μ μΆκ°
input_data = np.expand_dims(input_data, axis=0)
# μ€μΌμΌ λΉμ¨ μ μ₯
scale = (orig_width / self.input_width, orig_height / self.input_height)
return input_data, scale
def postprocess(self, output: np.ndarray, scale: tuple) -> list:
"""μΆλ ₯ νμ²λ¦¬"""
predictions = output[0]
boxes = []
scores = []
class_ids = []
for pred in predictions:
confidence = pred[4]
if confidence > self.conf_threshold:
class_probs = pred[5:]
class_id = np.argmax(class_probs)
class_score = class_probs[class_id]
if class_score > self.conf_threshold:
# λ°μ€ μ’ν (center_x, center_y, width, height)
cx, cy, w, h = pred[:4]
# μλ³Έ μ€μΌμΌλ‘ λ³ν
x1 = int((cx - w / 2) * scale[0])
y1 = int((cy - h / 2) * scale[1])
x2 = int((cx + w / 2) * scale[0])
y2 = int((cy + h / 2) * scale[1])
boxes.append([x1, y1, x2, y2])
scores.append(float(confidence * class_score))
class_ids.append(int(class_id))
# NMS (Non-Maximum Suppression)
if boxes:
indices = cv2.dnn.NMSBoxes(
boxes, scores, self.conf_threshold, self.iou_threshold
)
results = []
for i in indices:
idx = i[0] if isinstance(i, (list, np.ndarray)) else i
results.append({
'box': boxes[idx],
'score': scores[idx],
'class_id': class_ids[idx],
'class_name': self.CLASSES[class_ids[idx]]
})
return results
return []
def detect(self, image: np.ndarray) -> list:
"""κ°μ²΄ κ²μΆ"""
input_data, scale = self.preprocess(image)
outputs = self.session.run(None, {self.input_name: input_data})
detections = self.postprocess(outputs[0], scale)
return detections
def draw_detections(self, image: np.ndarray, detections: list) -> np.ndarray:
"""κ²μΆ κ²°κ³Ό μκ°ν"""
result = image.copy()
for det in detections:
x1, y1, x2, y2 = det['box']
label = f"{det['class_name']}: {det['score']:.2f}"
# λ°μ€
cv2.rectangle(result, (x1, y1), (x2, y2), (0, 255, 0), 2)
# λΌλ²¨
(w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
cv2.rectangle(result, (x1, y1 - 20), (x1 + w, y1), (0, 255, 0), -1)
cv2.putText(result, label, (x1, y1 - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
return result
# μ¬μ© μ
if __name__ == "__main__":
detector = YOLODetector("yolov5s.onnx")
# μ΄λ―Έμ§ λ‘λ
image = cv2.imread("test_image.jpg")
# κ²μΆ
detections = detector.detect(image)
print(f"κ²μΆλ κ°μ²΄: {len(detections)}κ°")
for det in detections:
print(f" {det['class_name']}: {det['score']:.2f}")
# κ²°κ³Ό μ μ₯
result_image = detector.draw_detections(image, detections)
cv2.imwrite("result.jpg", result_image)
5.2 μ€μκ° κ°μ²΄ κ²μΆ¶
#!/usr/bin/env python3
"""μ€μκ° κ°μ²΄ κ²μΆ (Pi Camera + ONNX)"""
import numpy as np
import cv2
import time
try:
from picamera2 import Picamera2
HAS_CAMERA = True
except ImportError:
HAS_CAMERA = False
# YOLODetector ν΄λμ€λ μμ λμΌ
class RealtimeDetector:
"""μ€μκ° κ°μ²΄ κ²μΆκΈ°"""
def __init__(self, model_path: str):
self.detector = YOLODetector(model_path)
if HAS_CAMERA:
self.camera = Picamera2()
config = self.camera.create_preview_configuration(
main={"size": (640, 480), "format": "RGB888"}
)
self.camera.configure(config)
def run(self, duration: float = 60, display: bool = False):
"""μ€μκ° κ²μΆ μ€ν"""
if not HAS_CAMERA:
print("μΉ΄λ©λΌ μμ")
return
self.camera.start()
print(f"μ€μκ° κ²μΆ μμ ({duration}μ΄)")
start_time = time.time()
frame_count = 0
fps_time = time.time()
try:
while time.time() - start_time < duration:
# νλ μ μΊ‘μ²
frame = self.camera.capture_array()
# BGR λ³ν (OpenCV νμ)
frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
# κ²μΆ
detections = self.detector.detect(frame_bgr)
frame_count += 1
# FPS κ³μ°
if frame_count % 10 == 0:
elapsed = time.time() - fps_time
fps = 10 / elapsed
fps_time = time.time()
print(f"\rFPS: {fps:.1f}, κ²μΆ: {len(detections)}κ°", end="")
for det in detections:
print(f" | {det['class_name']}", end="")
# λμ€νλ μ΄ (μ ν)
if display:
result = self.detector.draw_detections(frame_bgr, detections)
cv2.imshow("Detection", result)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
except KeyboardInterrupt:
pass
finally:
self.camera.stop()
if display:
cv2.destroyAllWindows()
total_time = time.time() - start_time
avg_fps = frame_count / total_time
print(f"\n\nνκ· FPS: {avg_fps:.1f}")
if __name__ == "__main__":
detector = RealtimeDetector("yolov5s.onnx")
detector.run(duration=30, display=False)
5.3 κ²μΆ κ²°κ³Ό MQTT λ°ν¶
#!/usr/bin/env python3
"""κ°μ²΄ κ²μΆ κ²°κ³Ό MQTT λ°ν"""
import paho.mqtt.client as mqtt
import json
import time
class DetectionPublisher:
"""κ²μΆ κ²°κ³Ό MQTT λ°νκΈ°"""
def __init__(self, model_path: str, mqtt_broker: str = "localhost"):
self.detector = YOLODetector(model_path)
self.mqtt_client = mqtt.Client()
self.mqtt_client.connect(mqtt_broker, 1883)
self.mqtt_client.loop_start()
self.node_id = "detector_01"
def process_and_publish(self, image_path: str):
"""μ΄λ―Έμ§ μ²λ¦¬ λ° κ²°κ³Ό λ°ν"""
import cv2
image = cv2.imread(image_path)
if image is None:
print(f"μ΄λ―Έμ§ λ‘λ μ€ν¨: {image_path}")
return
# κ²μΆ
start = time.perf_counter()
detections = self.detector.detect(image)
inference_time = (time.perf_counter() - start) * 1000
# κ²°κ³Ό μμ±
result = {
"node_id": self.node_id,
"image": image_path,
"detections": [
{
"class": det['class_name'],
"score": round(det['score'], 3),
"box": det['box']
}
for det in detections
],
"count": len(detections),
"inference_time_ms": round(inference_time, 2),
"timestamp": time.time()
}
# MQTT λ°ν
topic = f"edge/{self.node_id}/detection"
self.mqtt_client.publish(topic, json.dumps(result))
print(f"λ°ν: {topic}")
print(f" κ²μΆ: {len(detections)}κ°, μκ°: {inference_time:.1f}ms")
return result
def shutdown(self):
self.mqtt_client.loop_stop()
self.mqtt_client.disconnect()
if __name__ == "__main__":
publisher = DetectionPublisher("yolov5s.onnx")
try:
publisher.process_and_publish("test_image.jpg")
finally:
publisher.shutdown()
μ°μ΅ λ¬Έμ ¶
λ¬Έμ 1: λͺ¨λΈ λ³ν¶
- PyTorch μ΄λ―Έμ§ λΆλ₯ λͺ¨λΈμ ONNXλ‘ λ³ννμΈμ.
- λ³νλ λͺ¨λΈμ κ²μ¦νκ³ λ¨μννμΈμ.
λ¬Έμ 2: μ±λ₯ λΉκ΅¶
- TFLiteμ ONNX Runtimeμ μΆλ‘ μλλ₯Ό λΉκ΅νμΈμ.
- λ°°μΉ ν¬κΈ°λ³ μ²λ¦¬λμ μΈ‘μ νμΈμ.
λ¬Έμ 3: μ€μκ° κ²μΆ¶
- YOLO λͺ¨λΈλ‘ μ€μκ° κ°μ²΄ κ²μΆμ ꡬννμΈμ.
- κ²μΆ κ²°κ³Όλ₯Ό MQTTλ‘ λ°ννμΈμ.
λ€μ λ¨κ³¶
- 10_Home_Automation_Project.md: AI κΈ°λ° μ€λ§νΈν
- 11_Image_Analysis_Project.md: μμ λΆμ νλ‘μ νΈ
μ΅μ’ μ λ°μ΄νΈ: 2026-02-01