1"""
219. OpenCV DNN ๋ชจ๋
3- ๋ฅ๋ฌ๋ ๋ชจ๋ธ ๋ก๋
4- ์ด๋ฏธ์ง ๋ถ๋ฅ
5- ๊ฐ์ฒด ๊ฒ์ถ (YOLO, SSD)
6- ์๋งจํฑ ์ธ๊ทธ๋ฉํ
์ด์
7"""
8
9import cv2
10import numpy as np
11
12
13def dnn_module_overview():
14 """DNN ๋ชจ๋ ๊ฐ์"""
15 print("=" * 50)
16 print("OpenCV DNN ๋ชจ๋ ๊ฐ์")
17 print("=" * 50)
18
19 print("\n1. ์ง์ ํ๋ ์์ํฌ:")
20 frameworks = [
21 ('Caffe', '.caffemodel, .prototxt'),
22 ('TensorFlow', '.pb, .pbtxt'),
23 ('Darknet', '.weights, .cfg'),
24 ('ONNX', '.onnx'),
25 ('Torch', '.t7, .net'),
26 ]
27
28 for name, files in frameworks:
29 print(f" {name}: {files}")
30
31 print("\n2. ๋ชจ๋ธ ๋ก๋ ํจ์:")
32 print(" cv2.dnn.readNet(model, config)")
33 print(" cv2.dnn.readNetFromCaffe(prototxt, caffemodel)")
34 print(" cv2.dnn.readNetFromTensorflow(model, config)")
35 print(" cv2.dnn.readNetFromDarknet(cfg, weights)")
36 print(" cv2.dnn.readNetFromONNX(onnx)")
37
38 print("\n3. ๋ฐฑ์๋ ๋ฐ ํ๊ฒ:")
39 print(" ๋ฐฑ์๋: DNN_BACKEND_OPENCV, DNN_BACKEND_CUDA")
40 print(" ํ๊ฒ: DNN_TARGET_CPU, DNN_TARGET_CUDA")
41
42
43def blob_creation_demo():
44 """Blob ์์ฑ ๋ฐ๋ชจ"""
45 print("\n" + "=" * 50)
46 print("Blob ์์ฑ")
47 print("=" * 50)
48
49 # ํ
์คํธ ์ด๋ฏธ์ง
50 img = np.zeros((480, 640, 3), dtype=np.uint8)
51 img[:] = [150, 150, 150]
52 cv2.circle(img, (320, 240), 100, (0, 200, 0), -1)
53
54 # Blob ์์ฑ
55 # scalefactor: ํฝ์
๊ฐ ์ค์ผ์ผ๋ง (๋ณดํต 1/255)
56 # size: ๋คํธ์ํฌ ์
๋ ฅ ํฌ๊ธฐ
57 # mean: ํ๊ท ๊ฐ ๋นผ๊ธฐ (BGR ์์)
58 # swapRB: BGR -> RGB ๋ณํ
59 # crop: ํฌ๊ธฐ ์กฐ์ ์ ํฌ๋กญ ์ฌ๋ถ
60
61 blob = cv2.dnn.blobFromImage(
62 img,
63 scalefactor=1/255.0,
64 size=(224, 224),
65 mean=(0, 0, 0),
66 swapRB=True,
67 crop=False
68 )
69
70 print(f"์๋ณธ ์ด๋ฏธ์ง: {img.shape}")
71 print(f"Blob shape: {blob.shape}")
72 print(f"Blob dtype: {blob.dtype}")
73
74 print("\nblobFromImage ํ๋ผ๋ฏธํฐ:")
75 print(" scalefactor: ๋ณดํต 1/255.0 (0-1 ์ ๊ทํ)")
76 print(" size: ๋คํธ์ํฌ ์
๋ ฅ ํฌ๊ธฐ (224x224, 416x416 ๋ฑ)")
77 print(" mean: ImageNet ํ๊ท (104.0, 117.0, 123.0)")
78 print(" swapRB: OpenCV BGR -> ๋ชจ๋ธ RGB")
79 print(" crop: True๋ฉด ํฌ๋กญ, False๋ฉด ๋ฆฌ์ฌ์ด์ฆ๋ง")
80
81 # ์ฌ๋ฌ ์ด๋ฏธ์ง ์ฒ๋ฆฌ
82 images = [img, img.copy()]
83 blob_batch = cv2.dnn.blobFromImages(
84 images,
85 scalefactor=1/255.0,
86 size=(224, 224),
87 mean=(0, 0, 0),
88 swapRB=True
89 )
90 print(f"\nBatch blob shape: {blob_batch.shape}")
91
92 cv2.imwrite('dnn_input.jpg', img)
93
94
95def image_classification_demo():
96 """์ด๋ฏธ์ง ๋ถ๋ฅ ๋ฐ๋ชจ (๊ฐ๋
)"""
97 print("\n" + "=" * 50)
98 print("์ด๋ฏธ์ง ๋ถ๋ฅ (Image Classification)")
99 print("=" * 50)
100
101 print("\n๋ชจ๋ธ ์์:")
102 models = [
103 ('ResNet', 'Residual Networks, ๊น์ ๋คํธ์ํฌ'),
104 ('VGG', 'Visual Geometry Group, ๋จ์ ๊ตฌ์กฐ'),
105 ('MobileNet', '๊ฒฝ๋ํ, ๋ชจ๋ฐ์ผ์ฉ'),
106 ('EfficientNet', 'ํจ์จ์ ์ค์ผ์ผ๋ง'),
107 ('GoogLeNet', 'Inception ๋ชจ๋'),
108 ]
109
110 for name, desc in models:
111 print(f" {name}: {desc}")
112
113 code = '''
114# ์ด๋ฏธ์ง ๋ถ๋ฅ ์ฝ๋ ํ
ํ๋ฆฟ
115import cv2
116
117# ๋ชจ๋ธ ๋ก๋ (์: MobileNet)
118net = cv2.dnn.readNetFromCaffe(
119 'deploy.prototxt',
120 'mobilenet.caffemodel'
121)
122
123# ์ด๋ฏธ์ง ์ ์ฒ๋ฆฌ
124img = cv2.imread('image.jpg')
125blob = cv2.dnn.blobFromImage(
126 img, 1/255.0, (224, 224), (104, 117, 123), swapRB=True
127)
128
129# ์ถ๋ก
130net.setInput(blob)
131output = net.forward()
132
133# ๊ฒฐ๊ณผ ํด์
134class_id = np.argmax(output)
135confidence = output[0][class_id]
136print(f"Class: {class_id}, Confidence: {confidence:.2f}")
137'''
138 print(code)
139
140 print("\n์ฐธ๊ณ : ์ค์ ์คํ์๋ ๋ชจ๋ธ ํ์ผ์ด ํ์ํฉ๋๋ค.")
141 print(" MobileNet: https://github.com/shicai/MobileNet-Caffe")
142 print(" ONNX Models: https://github.com/onnx/models")
143
144
145def object_detection_yolo_demo():
146 """YOLO ๊ฐ์ฒด ๊ฒ์ถ ๋ฐ๋ชจ (๊ฐ๋
)"""
147 print("\n" + "=" * 50)
148 print("๊ฐ์ฒด ๊ฒ์ถ - YOLO")
149 print("=" * 50)
150
151 print("\nYOLO (You Only Look Once):")
152 print(" - ์ค์๊ฐ ๊ฐ์ฒด ๊ฒ์ถ")
153 print(" - ๋จ์ผ ๋คํธ์ํฌ๋ก ๊ฒ์ถ + ๋ถ๋ฅ")
154 print(" - ๋ฒ์ : YOLOv3, YOLOv4, YOLOv5, YOLOv8")
155
156 code = '''
157# YOLO ๊ฐ์ฒด ๊ฒ์ถ ์ฝ๋
158import cv2
159import numpy as np
160
161# ๋ชจ๋ธ ๋ก๋ (Darknet)
162net = cv2.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')
163
164# ์ถ๋ ฅ ๋ ์ด์ด ์ด๋ฆ
165layer_names = net.getLayerNames()
166output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
167
168# ์ด๋ฏธ์ง ์ ์ฒ๋ฆฌ
169img = cv2.imread('image.jpg')
170blob = cv2.dnn.blobFromImage(
171 img, 1/255.0, (416, 416), (0, 0, 0), swapRB=True, crop=False
172)
173
174# ์ถ๋ก
175net.setInput(blob)
176outputs = net.forward(output_layers)
177
178# ๊ฒฐ๊ณผ ์ฒ๋ฆฌ
179boxes = []
180confidences = []
181class_ids = []
182
183for output in outputs:
184 for detection in output:
185 scores = detection[5:]
186 class_id = np.argmax(scores)
187 confidence = scores[class_id]
188
189 if confidence > 0.5:
190 # ๋ฐ์ด๋ฉ ๋ฐ์ค ์ขํ
191 center_x = int(detection[0] * img.shape[1])
192 center_y = int(detection[1] * img.shape[0])
193 w = int(detection[2] * img.shape[1])
194 h = int(detection[3] * img.shape[0])
195
196 x = int(center_x - w / 2)
197 y = int(center_y - h / 2)
198
199 boxes.append([x, y, w, h])
200 confidences.append(float(confidence))
201 class_ids.append(class_id)
202
203# NMS (Non-Maximum Suppression)
204indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
205
206# ๊ฒฐ๊ณผ ์๊ฐํ
207for i in indices.flatten():
208 x, y, w, h = boxes[i]
209 cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
210 label = f"{classes[class_ids[i]]}: {confidences[i]:.2f}"
211 cv2.putText(img, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
212'''
213 print(code)
214
215 print("\n๋ชจ๋ธ ๋ค์ด๋ก๋:")
216 print(" YOLOv3: https://pjreddie.com/darknet/yolo/")
217 print(" YOLOv4: https://github.com/AlexeyAB/darknet")
218
219
220def object_detection_ssd_demo():
221 """SSD ๊ฐ์ฒด ๊ฒ์ถ ๋ฐ๋ชจ (๊ฐ๋
)"""
222 print("\n" + "=" * 50)
223 print("๊ฐ์ฒด ๊ฒ์ถ - SSD")
224 print("=" * 50)
225
226 print("\nSSD (Single Shot Detector):")
227 print(" - ๋ค์ค ์ค์ผ์ผ ํน์ง ๋งต ์ฌ์ฉ")
228 print(" - ๋น ๋ฅธ ์๋")
229 print(" - MobileNet + SSD ์กฐํฉ ์ธ๊ธฐ")
230
231 code = '''
232# SSD ๊ฐ์ฒด ๊ฒ์ถ ์ฝ๋
233import cv2
234
235# ๋ชจ๋ธ ๋ก๋ (TensorFlow)
236net = cv2.dnn.readNetFromTensorflow(
237 'frozen_inference_graph.pb',
238 'ssd_mobilenet_v2_coco.pbtxt'
239)
240
241# ์ด๋ฏธ์ง ์ ์ฒ๋ฆฌ
242img = cv2.imread('image.jpg')
243blob = cv2.dnn.blobFromImage(
244 img, size=(300, 300), mean=(127.5, 127.5, 127.5),
245 scalefactor=1/127.5, swapRB=True
246)
247
248# ์ถ๋ก
249net.setInput(blob)
250detections = net.forward()
251
252# ๊ฒฐ๊ณผ ์ฒ๋ฆฌ
253for i in range(detections.shape[2]):
254 confidence = detections[0, 0, i, 2]
255
256 if confidence > 0.5:
257 class_id = int(detections[0, 0, i, 1])
258 x1 = int(detections[0, 0, i, 3] * img.shape[1])
259 y1 = int(detections[0, 0, i, 4] * img.shape[0])
260 x2 = int(detections[0, 0, i, 5] * img.shape[1])
261 y2 = int(detections[0, 0, i, 6] * img.shape[0])
262
263 cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
264'''
265 print(code)
266
267 print("\n๋ชจ๋ธ ๋ค์ด๋ก๋:")
268 print(" TensorFlow Model Zoo:")
269 print(" https://github.com/tensorflow/models/blob/master/research/object_detection/")
270
271
272def face_detection_dnn_demo():
273 """DNN ์ผ๊ตด ๊ฒ์ถ ๋ฐ๋ชจ"""
274 print("\n" + "=" * 50)
275 print("DNN ์ผ๊ตด ๊ฒ์ถ")
276 print("=" * 50)
277
278 print("\nOpenCV DNN ์ผ๊ตด ๊ฒ์ถ๊ธฐ:")
279 print(" - Caffe ๊ธฐ๋ฐ SSD")
280 print(" - 300x300 ์
๋ ฅ")
281 print(" - Haar Cascade๋ณด๋ค ์ ํ")
282
283 code = '''
284# DNN ์ผ๊ตด ๊ฒ์ถ
285import cv2
286
287# ๋ชจ๋ธ ๋ก๋
288model_file = "res10_300x300_ssd_iter_140000.caffemodel"
289config_file = "deploy.prototxt"
290net = cv2.dnn.readNetFromCaffe(config_file, model_file)
291
292# ์ด๋ฏธ์ง ์ ์ฒ๋ฆฌ
293img = cv2.imread('image.jpg')
294h, w = img.shape[:2]
295blob = cv2.dnn.blobFromImage(
296 img, 1.0, (300, 300), (104.0, 177.0, 123.0)
297)
298
299# ์ถ๋ก
300net.setInput(blob)
301detections = net.forward()
302
303# ๊ฒฐ๊ณผ ์ฒ๋ฆฌ
304for i in range(detections.shape[2]):
305 confidence = detections[0, 0, i, 2]
306
307 if confidence > 0.5:
308 box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
309 x1, y1, x2, y2 = box.astype(int)
310 cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
311 label = f"{confidence:.2f}"
312 cv2.putText(img, label, (x1, y1-10),
313 cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
314'''
315 print(code)
316
317 print("\n๋ชจ๋ธ ๋ค์ด๋ก๋:")
318 print(" https://github.com/opencv/opencv/tree/master/samples/dnn/face_detector")
319
320
321def semantic_segmentation_demo():
322 """์๋งจํฑ ์ธ๊ทธ๋ฉํ
์ด์
๋ฐ๋ชจ (๊ฐ๋
)"""
323 print("\n" + "=" * 50)
324 print("์๋งจํฑ ์ธ๊ทธ๋ฉํ
์ด์
")
325 print("=" * 50)
326
327 print("\n์ธ๊ทธ๋ฉํ
์ด์
์ ํ:")
328 print(" - Semantic: ํฝ์
๋จ์ ํด๋์ค ๋ถ๋ฅ")
329 print(" - Instance: ๊ฐ๋ณ ๊ฐ์ฒด ๊ตฌ๋ถ")
330 print(" - Panoptic: Semantic + Instance")
331
332 print("\n์ฃผ์ ๋ชจ๋ธ:")
333 models = [
334 ('FCN', 'Fully Convolutional Network'),
335 ('U-Net', '์๋ฃ ์ด๋ฏธ์ง์ฉ'),
336 ('DeepLab', 'Atrous convolution'),
337 ('SegNet', '์ธ์ฝ๋-๋์ฝ๋ ๊ตฌ์กฐ'),
338 ('PSPNet', 'Pyramid Pooling'),
339 ]
340
341 for name, desc in models:
342 print(f" {name}: {desc}")
343
344 code = '''
345# ์๋งจํฑ ์ธ๊ทธ๋ฉํ
์ด์
์ฝ๋
346import cv2
347import numpy as np
348
349# ๋ชจ๋ธ ๋ก๋ (์: ENet)
350net = cv2.dnn.readNet('enet-model.net')
351
352# ์ด๋ฏธ์ง ์ ์ฒ๋ฆฌ
353img = cv2.imread('image.jpg')
354blob = cv2.dnn.blobFromImage(
355 img, 1/255.0, (1024, 512), (0, 0, 0), swapRB=True
356)
357
358# ์ถ๋ก
359net.setInput(blob)
360output = net.forward()
361
362# ๊ฒฐ๊ณผ ์ฒ๋ฆฌ (ํด๋์ค ๋งต)
363class_map = np.argmax(output[0], axis=0)
364
365# ์ปฌ๋ฌ ๋งต ์ ์ฉ
366colors = np.random.randint(0, 255, (num_classes, 3))
367segmentation = colors[class_map]
368'''
369 print(code)
370
371
372def pose_estimation_dnn_demo():
373 """ํฌ์ฆ ์ถ์ DNN ๋ฐ๋ชจ (๊ฐ๋
)"""
374 print("\n" + "=" * 50)
375 print("ํฌ์ฆ ์ถ์ (Pose Estimation)")
376 print("=" * 50)
377
378 print("\nํฌ์ฆ ์ถ์ ์ ํ:")
379 print(" - 2D: ์ด๋ฏธ์ง์์ ๊ด์ ์์น")
380 print(" - 3D: 3์ฐจ์ ๊ณต๊ฐ์ ๊ด์ ์์น")
381
382 print("\n์ฃผ์ ๋ชจ๋ธ:")
383 models = [
384 ('OpenPose', 'Bottom-up ๋ฐฉ์, ๋ค์ค ์ธ์'),
385 ('PoseNet', '๊ฒฝ๋ํ, ์ค์๊ฐ'),
386 ('HRNet', '๊ณ ํด์๋, ์ ํ'),
387 ('MediaPipe', 'Google, ๋ชจ๋ฐ์ผ ์ต์ ํ'),
388 ]
389
390 for name, desc in models:
391 print(f" {name}: {desc}")
392
393 print("\n๊ด์ ํฌ์ธํธ (COCO ๋ฐ์ดํฐ์
):")
394 keypoints = [
395 "0: nose", "1: neck",
396 "2: right_shoulder", "3: right_elbow", "4: right_wrist",
397 "5: left_shoulder", "6: left_elbow", "7: left_wrist",
398 "8: right_hip", "9: right_knee", "10: right_ankle",
399 "11: left_hip", "12: left_knee", "13: left_ankle",
400 "14: right_eye", "15: left_eye",
401 "16: right_ear", "17: left_ear"
402 ]
403 for kp in keypoints:
404 print(f" {kp}")
405
406
407def dnn_performance_tips():
408 """DNN ์ฑ๋ฅ ์ต์ ํ"""
409 print("\n" + "=" * 50)
410 print("DNN ์ฑ๋ฅ ์ต์ ํ")
411 print("=" * 50)
412
413 print("""
4141. GPU ๊ฐ์ ์ฌ์ฉ
415 net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
416 net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
417
4182. ์
๋ ฅ ํฌ๊ธฐ ์กฐ์
419 - ์์ ์
๋ ฅ = ๋น ๋ฅธ ์ถ๋ก
420 - ์ ํ๋์ ์๋ ํธ๋ ์ด๋์คํ
421
4223. ๋ชจ๋ธ ์ต์ ํ
423 - INT8 ์์ํ
424 - ๋ชจ๋ธ ํ๋ฃจ๋
425 - ์ง์ ์ฆ๋ฅ
426
4274. ๋ฐฐ์น ์ฒ๋ฆฌ
428 - ์ฌ๋ฌ ์ด๋ฏธ์ง ๋์ ์ฒ๋ฆฌ
429 - blobFromImages() ์ฌ์ฉ
430
4315. ๋น๋๊ธฐ ์ถ๋ก
432 - net.forwardAsync()
433 - ์ถ๋ก ์ค ๋ค๋ฅธ ์์
์ํ
434
4356. ๋ชจ๋ธ ์ ํ
436 - ์๋ ์ค์: MobileNet, EfficientNet-Lite
437 - ์ ํ๋ ์ค์: ResNet, EfficientNet
438
4397. ์ถ๋ก ์๊ฐ ์ธก์
440""")
441
442 # ์๊ฐ ์ธก์ ์์
443 print("์ถ๋ก ์๊ฐ ์ธก์ :")
444 code = '''
445import time
446
447# ์๋ฐ์
448for _ in range(10):
449 net.forward()
450
451# ์ธก์
452times = []
453for _ in range(100):
454 start = time.time()
455 net.forward()
456 times.append(time.time() - start)
457
458print(f"ํ๊ท : {np.mean(times)*1000:.2f}ms")
459print(f"FPS: {1/np.mean(times):.2f}")
460'''
461 print(code)
462
463
464def model_download_guide():
465 """๋ชจ๋ธ ๋ค์ด๋ก๋ ๊ฐ์ด๋"""
466 print("\n" + "=" * 50)
467 print("๋ชจ๋ธ ๋ค์ด๋ก๋ ๊ฐ์ด๋")
468 print("=" * 50)
469
470 print("""
4711. YOLO
472 - ๊ณต์: https://pjreddie.com/darknet/yolo/
473 - v4: https://github.com/AlexeyAB/darknet
474 - v5+: https://github.com/ultralytics/yolov5
475
4762. SSD MobileNet
477 - TensorFlow Model Zoo
478 - https://github.com/tensorflow/models/
479
4803. ์ผ๊ตด ๊ฒ์ถ
481 - OpenCV DNN Face Detector
482 - https://github.com/opencv/opencv/tree/master/samples/dnn/face_detector
483
4844. ํฌ์ฆ ์ถ์
485 - OpenPose: https://github.com/CMU-Perceptual-Computing-Lab/openpose
486 - ๊ฒฝ๋ ๋ฒ์ : https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch
487
4885. ์ธ๊ทธ๋ฉํ
์ด์
489 - ENet: https://github.com/e-lab/ENet-training
490 - DeepLab: https://github.com/tensorflow/models/tree/master/research/deeplab
491
4926. ONNX Model Zoo
493 - https://github.com/onnx/models
494 - ๋ค์ํ ์ฌ์ ํ์ต ๋ชจ๋ธ
495
4967. OpenVINO Model Zoo
497 - https://github.com/openvinotoolkit/open_model_zoo
498 - Intel ์ต์ ํ ๋ชจ๋ธ
499""")
500
501
502def main():
503 """๋ฉ์ธ ํจ์"""
504 # DNN ๋ชจ๋ ๊ฐ์
505 dnn_module_overview()
506
507 # Blob ์์ฑ
508 blob_creation_demo()
509
510 # ์ด๋ฏธ์ง ๋ถ๋ฅ
511 image_classification_demo()
512
513 # YOLO ๊ฐ์ฒด ๊ฒ์ถ
514 object_detection_yolo_demo()
515
516 # SSD ๊ฐ์ฒด ๊ฒ์ถ
517 object_detection_ssd_demo()
518
519 # DNN ์ผ๊ตด ๊ฒ์ถ
520 face_detection_dnn_demo()
521
522 # ์๋งจํฑ ์ธ๊ทธ๋ฉํ
์ด์
523 semantic_segmentation_demo()
524
525 # ํฌ์ฆ ์ถ์
526 pose_estimation_dnn_demo()
527
528 # ์ฑ๋ฅ ์ต์ ํ
529 dnn_performance_tips()
530
531 # ๋ชจ๋ธ ๋ค์ด๋ก๋ ๊ฐ์ด๋
532 model_download_guide()
533
534 print("\nDNN ๋ชจ๋ ๋ฐ๋ชจ ์๋ฃ!")
535
536
537if __name__ == '__main__':
538 main()