-
Notifications
You must be signed in to change notification settings - Fork 0
/
camera_detection.py
94 lines (86 loc) · 3.63 KB
/
camera_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import io
import time
import random
import numpy as numpy
from edgetpu.detection.engine import DetectionEngine
import cv2
from config import Config
class VideoCamera(object):
def __init__(self):
print('starting camera')
with open(Config.LABEL_PATH, 'r', encoding="utf-8") as f:
pairs = (l.strip().split(maxsplit=1) for l in f.readlines())
self.labels = dict((int(k), v) for k, v in pairs)
self.engine = DetectionEngine(Config.MODEL_PATH)
# Using OpenCV to capture from device 0. If you have trouble capturing
# from a webcam, comment the line below out and use a video file
# instead.
self.video = cv2.VideoCapture(0)
if self.video:
self.video.set(3, 640)
self.video.set(4, 480)
# If you decide to use video.mp4, you must have this file in the folder
# as the main.py.
# self.video = cv2.VideoCapture('video.mp4')
def __del__(self):
print('closing camera')
self.video.release()
def get_frame(self):
start_time = time.time()
font = cv2.FONT_HERSHEY_SIMPLEX
topLeftCornerOfText = (10, 20)
bottomLeftCornerOfText = (10, 470)
fontScale = 0.6
fontColor = (random.randint(0,255), random.randint(0,255), random.randint(0,255))
lineType = 1
annotate_text = ""
_, width, height, channels = self.engine.get_input_tensor_shape()
if not self.video.isOpened():
print('Camera is not opened')
ret, img = self.video.read()
if not ret:
print('Camera is not read')
input = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
input = cv2.resize(input, (width, height))
input = input.reshape((width * height * channels))
rows = img.shape[0]
cols = img.shape[1]
record_time = time.time()
elapsed_record_ms = record_time - start_time
#############
# Run inference.
ans = self.engine.DetectWithInputTensor(input, threshold=Config.DETECT_THRESHOLD,
top_k=Config.TOP_K)
detection_time = time.time()
elapsed_detection_ms = detection_time - record_time
# Display result.
if ans:
for obj in ans:
box = obj.bounding_box.flatten().tolist()
#print ('id=', obj.label_id, 'score = ', obj.score, 'box = ', box)
# Draw a rectangle.
x = box[0] * cols
y = box[1] * rows
right = box[2] * cols
bottom = box[3] * rows
if obj.score > Config.DETECT_THRESHOLD:
cv2.rectangle(img, (int(x), int(y)), (int(right), int(bottom)),
fontColor, thickness=1)
annotate_text = "%s %.2f" % (
self.labels[obj.label_id], obj.score)
annotate_text_time = time.time()
cv2.putText(img, annotate_text,
(int(x), int(bottom)),
font,
fontScale,
fontColor,
lineType)
elapsed_frame_ms = (time.time() - start_time) * 1000.0
frame_rate_text = "FPS: %.2f record: %.2fms detection: %.2fms" % (1000.0/elapsed_frame_ms,
elapsed_record_ms * 1000.0, elapsed_detection_ms * 1000.0)
cv2.putText(img, frame_rate_text,
topLeftCornerOfText,
font, fontScale,
fontColor, lineType)
ret, jpeg = cv2.imencode('.jpg', img)
return jpeg.tobytes()