#python #openvino #openvino-notebooks #deeplearning #accelerated-inference #object-detection #onnx #onnx-runtime #openvino-onnx-runtime #openvino-execution-provider-for-onnx #tiny-yolov4
Object detection with YOLOv4 in Python using OpenVINO™ Execution Provider# pip3 install openvino
# Install ONNX Runtime for OpenVINO™ Execution Provider
# pip3 install onnxruntime-openvino==1.11.0
# pip3 install -r requirements.txt
# Running the ONNXRuntime OpenVINO™ Execution Provider sample
# python3 yolov4.py --device CPU_FP32 --video classroom.mp4 --model yolov4.onnx
'''
Copyright (C) 2021-2022, Intel Corporation
SPDX-License-Identifier: Apache-2.0
Major Portions of this code are copyright of their respective authors and released under the Apache License Version 2.0:
- onnx, Copyright 2021-2022. For licensing see https://github.com/onnx/models/blob/master/LICENSE
'''
import cv2
import numpy as np
from onnx import numpy_helper
import onnx
import onnxruntime as rt
import os
from PIL import Image
from scipy import special
import colorsys
import random
import argparse
import sys
import time
import platform
if platform.system() == "Windows":
from openvino import utils
utils.add_openvino_libs_to_path()
def image_preprocess(image, target_size, gt_boxes=None):
ih, iw = target_size
h, w, _ = image.shape
scale = min(iw/w, ih/h)
nw, nh = int(scale * w), int(scale * h)
image_resized = cv2.resize(image, (nw, nh))
image_padded = np.full(shape=[ih, iw, 3], fill_value=128.0)
dw, dh = (iw - nw) // 2, (ih-nh) // 2
image_padded[dh:nh+dh, dw:nw+dw, :] = image_resized
image_padded = image_padded / 255.
if gt_boxes is None:
return image_padded
else:
gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw
gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
return image_padded, gt_boxes
def postprocess_bbbox(pred_bbox):
'''define anchor boxes'''
for i, pred in enumerate(pred_bbox):
conv_shape = pred.shape
output_size = conv_shape[1]
conv_raw_dxdy = pred[:, :, :, :, 0:2]
conv_raw_dwdh = pred[:, :, :, :, 2:4]
xy_grid = np.meshgrid(np.arange(output_size), np.arange(output_size))
xy_grid = np.expand_dims(np.stack(xy_grid, axis=-1), axis=2)
xy_grid = np.tile(np.expand_dims(xy_grid, axis=0), [1, 1, 1, 3, 1])
xy_grid = xy_grid.astype(float)
pred_xy = ((special.expit(conv_raw_dxdy) * XYSCALE[i]) - 0.5 * (XYSCALE[i] - 1) + xy_grid) * STRIDES[i]
pred_wh = (np.exp(conv_raw_dwdh) * ANCHORS[i])
pred[:, :, :, :, 0:4] = np.concatenate([pred_xy, pred_wh], axis=-1)
pred_bbox = [np.reshape(x, (-1, np.shape(x)[-1])) for x in pred_bbox]
pred_bbox = np.concatenate(pred_bbox, axis=0)
return pred_bbox
def postprocess_boxes(pred_bbox, org_img_shape, input_size, score_threshold):
'''remove boundary boxs with a low detection probability'''
valid_scale=[0, np.inf]
pred_bbox = np.array(pred_bbox)
pred_xywh = pred_bbox[:, 0:4]
pred_conf = pred_bbox[:, 4]
pred_prob = pred_bbox[:, 5:]
# # (1) (x, y, w, h) --> (xmin, ymin, xmax, ymax)
pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)
# # (2) (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
org_h, org_w = org_img_shape
resize_ratio = min(input_size / org_w, input_size / org_h)
dw = (input_size - resize_ratio * org_w) / 2
dh = (input_size - resize_ratio * org_h) / 2
pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio
# # (3) clip some boxes that are out of range
pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]),
np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1)
invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3]))
pred_coor[invalid_mask] = 0
# # (4) discard some invalid boxes
bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1]))
# # (5) discard some boxes with low scores
classes = np.argmax(pred_prob, axis=-1)
scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]
score_mask = scores > score_threshold
mask = np.logical_and(scale_mask, score_mask)
coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]
return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)
def bboxes_iou(boxes1, boxes2):
'''calculate the Intersection Over Union value'''
boxes1 = np.array(boxes1)
boxes2 = np.array(boxes2)
boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
inter_section = np.maximum(right_down - left_up, 0.0)
inter_area = inter_section[..., 0] * inter_section[..., 1]
union_area = boxes1_area + boxes2_area - inter_area
ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)
return ious
def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
"""
:param bboxes: (xmin, ymin, xmax, ymax, score, class)
Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
https://github.com/bharatsingh430/soft-nms
"""
classes_in_img = list(set(bboxes[:, 5]))
best_bboxes = []
for cls in classes_in_img:
cls_mask = (bboxes[:, 5] == cls)
cls_bboxes = bboxes[cls_mask]
while len(cls_bboxes) > 0:
max_ind = np.argmax(cls_bboxes[:, 4])
best_bbox = cls_bboxes[max_ind]
best_bboxes.append(best_bbox)
cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
weight = np.ones((len(iou),), dtype=np.float32)
assert method in ['nms', 'soft-nms']
if method == 'nms':
iou_mask = iou > iou_threshold
weight[iou_mask] = 0.0
if method == 'soft-nms':
weight = np.exp(-(1.0 * iou ** 2 / sigma))
cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
score_mask = cls_bboxes[:, 4] > 0.
cls_bboxes = cls_bboxes[score_mask]
return best_bboxes
def read_class_names(class_file_name):
'''loads class name from a file'''
names = {}
with open(class_file_name, 'r') as data:
for ID, name in enumerate(data):
names[ID] = name.strip('\n')
return names
def draw_bbox(image, bboxes, classes=read_class_names("coco.names"), show_label=True):
"""
bboxes: [x_min, y_min, x_max, y_max, probability, cls_id] format coordinates.
"""
num_classes = len(classes)
image_h, image_w, _ = image.shape
hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
random.seed(0)
random.shuffle(colors)
random.seed(None)
for i, bbox in enumerate(bboxes):
coor = np.array(bbox[:4], dtype=np.int32)
fontScale = 0.5
score = bbox[4]
class_ind = int(bbox[5])
bbox_color = colors[class_ind]
bbox_thick = int(0.6 * (image_h + image_w) / 600)
c1, c2 = (coor[0], coor[1]), (coor[2], coor[3])
cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)
if show_label:
bbox_mess = '%s: %.2f' % (classes[class_ind], score)
t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick//2)[0]
cv2.rectangle(image, c1, (c1[0] + t_size[0], c1[1] - t_size[1] - 3), bbox_color, -1)
cv2.putText(image, bbox_mess, (c1[0], c1[1]-2), cv2.FONT_HERSHEY_SIMPLEX,
fontScale, (0, 0, 0), bbox_thick//2, lineType=cv2.LINE_AA)
return image
def get_anchors(anchors_path, tiny=False):
'''loads the anchors from a file'''
with open(anchors_path) as f:
anchors = f.readline()
anchors = np.array(anchors.split(','), dtype=np.float32)
return anchors.reshape(3, 3, 2)
#Specify the path to anchors file on your machine
ANCHORS = "./yolov4_anchors.txt"
STRIDES = [8, 16, 32]
XYSCALE = [1.2, 1.1, 1.05]
ANCHORS = get_anchors(ANCHORS)
STRIDES = np.array(STRIDES)
def parse_arguments():
parser = argparse.ArgumentParser(description='Object Detection using YOLOv4 in OPENCV using OpenVINO Execution Provider for ONNXRuntime')
parser.add_argument('--device', default='CPU_FP32', help="Device to perform inference on 'cpu (MLAS)' or on devices supported by OpenVINO-EP [CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16].")
parser.add_argument('--image', help='Path to image file.')
parser.add_argument('--video', help='Path to video file.')
parser.add_argument('--model', help='Path to model.')
args = parser.parse_args()
return args
def check_model_extension(fp):
# Split the extension from the path and normalise it to lowercase.
ext = os.path.splitext(fp)[-1].lower()
# Now we can simply use != to check for inequality, no need for wildcards.
if(ext != ".onnx"):
raise Exception(fp, "is an unknown file format. Use the model ending with .onnx format")
if not os.path.exists(fp):
raise Exception("[ ERROR ] Path of the onnx model file is Invalid")
def main():
# Process arguments
args = parse_arguments()
# Validate model file path
check_model_extension(args.model)
# Process inputs
win_name = 'Object detection using ONNXRuntime OpenVINO Execution Provider using YoloV4 model'
cv2.namedWindow(win_name, cv2.WINDOW_NORMAL)
output_file = "yolo_out_py.avi"
if (args.image):
# Open the image file
if not os.path.isfile(args.image):
print("Input image file ", args.image, " doesn't exist")
sys.exit(1)
cap = cv2.VideoCapture(args.image)
output_file = args.image[:-4]+'_yolo_out_py.jpg'
elif (args.video):
# Open the video file
if not os.path.isfile(args.video):
print("Input video file ", args.video, " doesn't exist")
sys.exit(1)
cap = cv2.VideoCapture(args.video)
output_file = args.video[:-4]+'_yolo_out_py.avi'
else:
# Webcam input
cap = cv2.VideoCapture(0)
# Get the video writer initialized to save the output video
if (not args.image):
vid_writer = cv2.VideoWriter(output_file, cv2.VideoWriter_fourcc('M','J','P','G'), 30, (round(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),round(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))
# Check the device information and create a session
device = args.device
so = rt.SessionOptions()
so.log_severity_level = 3
if(args.device == 'cpu'):
print("Device type selected is 'cpu' which is the default CPU Execution Provider (MLAS)")
#Specify the path to the ONNX model on your machine and register the CPU EP
sess = rt.InferenceSession(args.model, so, providers=['CPUExecutionProvider'])
else:
#Specify the path to the ONNX model on your machine and register the OpenVINO EP
sess = rt.InferenceSession(args.model, so, providers=['OpenVINOExecutionProvider'], provider_options=[{'device_type' : device}])
print("Device type selected is: " + device + " using the OpenVINO Execution Provider")
'''
other 'device_type' options are: (Any hardware target can be assigned if you have the access to it)
'CPU_FP32', 'GPU_FP32', 'GPU_FP16', 'MYRIAD_FP16', 'VAD-M_FP16'
'''
input_name = sess.get_inputs()[0].name
while cv2.waitKey(1) < 0:
# get frame from the video
has_frame, frame = cap.read()
# Stop the program if reached end of video
if not has_frame:
print("Done processing !!!")
print("Output file is stored as ", output_file)
has_frame=False
cv2.waitKey(3000)
# Release device
cap.release()
break
input_size = 416
original_image = frame
original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
original_image_size = original_image.shape[:2]
image_data = image_preprocess(np.copy(original_image), [input_size, input_size])
image_data = image_data[np.newaxis, ...].astype(np.float32)
outputs = sess.get_outputs()
output_names = list(map(lambda output: output.name, outputs))
start = time.time()
detections = sess.run(output_names, {input_name: image_data})
end = time.time()
inference_time = end - start
pred_bbox = postprocess_bbbox(detections)
bboxes = postprocess_boxes(pred_bbox, original_image_size, input_size, 0.25)
bboxes = nms(bboxes, 0.213, method='nms')
image = draw_bbox(original_image, bboxes)
cv2.putText(image,device,(10,20),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
cv2.putText(image,'FPS: {}'.format(1.0/inference_time),(10,40),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
# Write the frame with the detection boxes
if (args.image):
cv2.imwrite(output_file, image.astype(np.uint8))
else:
vid_writer.write(image.astype(np.uint8))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
cv2.imshow(win_name, image)
if __name__ == "__main__":
main()
#python #openvino #openvino-notebooks #deeplearning #accelerated-inference #object-detection #onnx #onnx-runtime #openvino-onnx-runtime #yolov2 #openvino-execution-provider-for-onnx
Object detection with tinyYOLOv2 in Python using OpenVINO™ Execution Provider# pip3 install openvino
# Install ONNX Runtime for OpenVINO™ Execution Provider
# pip3 install onnxruntime-openvino==1.11.0
# pip3 install -r requirements.txt
# How to run the sample
# python3 tiny_yolov2_obj_detection_sample.py --h
# Running the ONNXRuntime OpenVINO™ Execution Provider sample
# python3 tiny_yolov2_obj_detection_sample.py --video face-demographics-walking-and-pause.mp4 --model tinyyolov2.onnx --device CPU_FP32
'''
Copyright (C) 2021-2022, Intel Corporation
SPDX-License-Identifier: Apache-2.0
'''
import numpy as np
import onnxruntime as rt
import cv2
import time
import os
import argparse
import platform
if platform.system() == "Windows":
from openvino import utils
utils.add_openvino_libs_to_path()
# color look up table for different classes for object detection sample
clut = [(0,0,0),(255,0,0),(255,0,255),(0,0,255),(0,255,0),(0,255,128),
(128,255,0),(128,128,0),(0,128,255),(128,0,128),
(255,0,128),(128,0,255),(255,128,128),(128,255,128),(255,255,0),
(255,128,128),(128,128,255),(255,128,128),(128,255,128),(128,255,128)]
# 20 labels that the tiny-yolov2 model can do the object_detection on
label = ["aeroplane","bicycle","bird","boat","bottle",
"bus","car","cat","chair","cow","diningtable",
"dog","horse","motorbike","person","pottedplant",
"sheep","sofa","train","tvmonitor"]
def parse_arguments():
parser = argparse.ArgumentParser(description='Object Detection using YOLOv2 in OPENCV using OpenVINO Execution Provider for ONNXRuntime')
parser.add_argument('--device', default='CPU_FP32', help="Device to perform inference on 'cpu (MLAS)' or on devices supported by OpenVINO-EP [CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16].")
parser.add_argument('--video', help='Path to video file.')
parser.add_argument('--model', help='Path to model.')
args = parser.parse_args()
return args
def sigmoid(x, derivative=False):
return x*(1-x) if derivative else 1/(1+np.exp(-x))
def softmax(x):
score_mat_exp = np.exp(np.asarray(x))
return score_mat_exp / score_mat_exp.sum(0)
def check_model_extension(fp):
# Split the extension from the path and normalise it to lowercase.
ext = os.path.splitext(fp)[-1].lower()
# Now we can simply use != to check for inequality, no need for wildcards.
if(ext != ".onnx"):
raise Exception(fp, "is an unknown file format. Use the model ending with .onnx format")
if not os.path.exists(fp):
raise Exception("[ ERROR ] Path of the onnx model file is Invalid")
def check_video_file_extension(fp):
# Split the extension from the path and normalise it to lowercase.
ext = os.path.splitext(fp)[-1].lower()
# Now we can simply use != to check for inequality, no need for wildcards.
if(ext == ".mp4" or ext == ".avi" or ext == ".mov"):
pass
else:
raise Exception(fp, "is an unknown file format. Use the video file ending with .mp4 or .avi or .mov formats")
if not os.path.exists(fp):
raise Exception("[ ERROR ] Path of the video file is Invalid")
def image_preprocess(frame):
in_frame = cv2.resize(frame, (416, 416))
preprocessed_image = np.asarray(in_frame)
preprocessed_image = preprocessed_image.astype(np.float32)
preprocessed_image = preprocessed_image.transpose(2,0,1)
#Reshaping the input array to align with the input shape of the model
preprocessed_image = preprocessed_image.reshape(1,3,416,416)
return preprocessed_image
def postprocess_output(out, frame, x_scale, y_scale, i):
out = out[0][0]
num_classes = 20
anchors = [1.08, 1.19, 3.42, 4.41, 6.63, 11.38, 9.42, 5.11, 16.62, 10.52]
existing_labels = {l: [] for l in label}
#Inside this loop we compute the bounding box b for grid cell (cy, cx)
for cy in range(0,13):
for cx in range(0,13):
for b in range(0,5):
# First we read the tx, ty, width(tw), and height(th) for the bounding box from the out array, as well as the confidence score
channel = b*(num_classes+5)
tx = out[channel ][cy][cx]
ty = out[channel+1][cy][cx]
tw = out[channel+2][cy][cx]
th = out[channel+3][cy][cx]
tc = out[channel+4][cy][cx]
x = (float(cx) + sigmoid(tx))*32
y = (float(cy) + sigmoid(ty))*32
w = np.exp(tw) * 32 * anchors[2*b]
h = np.exp(th) * 32 * anchors[2*b+1]
#calculating the confidence score
confidence = sigmoid(tc) # The confidence value for the bounding box is given by tc
classes = np.zeros(num_classes)
for c in range(0,num_classes):
classes[c] = out[channel + 5 +c][cy][cx]
# we take the softmax to turn the array into a probability distribution. And then we pick the class with the largest score as the winner.
classes = softmax(classes)
detected_class = classes.argmax()
# Now we can compute the final score for this bounding box and we only want to keep the ones whose combined score is over a certain threshold
if 0.60 < classes[detected_class]*confidence:
color =clut[detected_class]
x = (x - w/2)*x_scale
y = (y - h/2)*y_scale
w *= x_scale
h *= y_scale
labelX = int((x+x+w)/2)
labelY = int((y+y+h)/2)
addLabel = True
lab_threshold = 100
for point in existing_labels[label[detected_class]]:
if labelX < point[0] + lab_threshold and labelX > point[0] - lab_threshold and \
labelY < point[1] + lab_threshold and labelY > point[1] - lab_threshold:
addLabel = False
#Adding class labels to the output of the frame and also drawing a rectangular bounding box around the object detected.
if addLabel:
cv2.rectangle(frame, (int(x),int(y)),(int(x+w),int(y+h)),color,2)
cv2.rectangle(frame, (int(x),int(y-13)),(int(x)+9*len(label[detected_class]),int(y)),color,-1)
cv2.putText(frame,label[detected_class],(int(x)+2,int(y)-3),cv2.FONT_HERSHEY_COMPLEX,0.4,(255,255,255),1)
existing_labels[label[detected_class]].append((labelX,labelY))
print('{} detected in frame {}'.format(label[detected_class],i))
def show_bbox(device, frame, inference_time):
cv2.putText(frame,device,(10,20),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
cv2.putText(frame,'FPS: {}'.format(1.0/inference_time),(10,40),cv2.FONT_HERSHEY_COMPLEX,0.5,(255,255,255),1)
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
cv2.imshow('frame',frame)
def main():
# Process arguments
args = parse_arguments()
# Validate model file path
check_model_extension(args.model)
so = rt.SessionOptions()
so.log_severity_level = 3
if (args.device == 'cpu'):
print("Device type selected is 'cpu' which is the default CPU Execution Provider (MLAS)")
#Specify the path to the ONNX model on your machine and register the CPU EP
sess = rt.InferenceSession(args.model, so, providers=['CPUExecutionProvider'])
elif (args.device == 'CPU_FP32' or args.device == 'GPU_FP32' or args.device == 'GPU_FP16' or args.device == 'MYRIAD_FP16' or args.device == 'VADM_FP16'):
#Specify the path to the ONNX model on your machine and register the OpenVINO EP
sess = rt.InferenceSession(args.model, so, providers=['OpenVINOExecutionProvider'], provider_options=[{'device_type' : args.device}])
print("Device type selected is: " + args.device + " using the OpenVINO Execution Provider")
'''
other 'device_type' options are: (Any hardware target can be assigned if you have the access to it)
'CPU_FP32', 'GPU_FP32', 'GPU_FP16', 'MYRIAD_FP16', 'VAD-M_FP16'
'''
else:
raise Exception("Device type selected is not [cpu, CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VADM_FP16]")
# Get the input name of the model
input_name = sess.get_inputs()[0].name
#validate video file input path
check_video_file_extension(args.video)
#Path to video file has to be provided
cap = cv2.VideoCapture(args.video)
# capturing different metrics of the image from the video
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
x_scale = float(width)/416.0 #In the document of tino-yolo-v2, input shape of this network is (1,3,416,416).
y_scale = float(height)/416.0
# writing the inferencing output as a video to the local disk
fourcc = cv2.VideoWriter_fourcc(*'XVID')
output_video_name = args.device + "_output.avi"
output_video = cv2.VideoWriter(output_video_name,fourcc, float(17.0), (640,360))
# capturing one frame at a time from the video feed and performing the inference
i = 0
while cv2.waitKey(1) < 0:
l_start = time.time()
ret, frame = cap.read()
if not ret:
break
initial_w = cap.get(3)
initial_h = cap.get(4)
# preprocessing the input frame and reshaping it.
#In the document of tino-yolo-v2, input shape of this network is (1,3,416,416). so we resize the model frame w.r.t that size.
preprocessed_image = image_preprocess(frame)
start = time.time()
#Running the session by passing in the input data of the model
out = sess.run(None, {input_name: preprocessed_image})
end = time.time()
inference_time = end - start
#Get the output
postprocess_output(out, frame, x_scale, y_scale, i)
#Show the Output
output_video.write(frame)
show_bbox(args.device, frame, inference_time)
#Press 'q' to quit the process
print('Processed Frame {}'.format(i))
i += 1
l_end = time.time()
print('Loop Time = {}'.format(l_end - l_start))
output_video.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()
#python #openvino #openvino-notebooks #live-inference #deeplearning #accelerated-inference #object-detection #pose-estimation
402-pose-estimation-webcam: Live Human Pose Estimation with OpenVINO# Imports
import collections
import os
import sys
import time
import cv2
import numpy as np
from IPython import display
from numpy.lib.stride_tricks import as_strided
from openvino.runtime import Core
from decoder import OpenPoseDecoder
sys.path.append("../utils")
import notebook_utils as utils
# Download the model
# directory where model will be downloaded
base_model_dir = "model"
# model name as named in Open Model Zoo
model_name = "human-pose-estimation-0001"
# selected precision (FP32, FP16, FP16-INT8)
precision = "FP16-INT8"
model_path = f"model/intel/{model_name}/{precision}/{model_name}.xml"
model_weights_path = f"model/intel/{model_name}/{precision}/{model_name}.bin"
if not os.path.exists(model_path):
download_command = f"omz_downloader " \
f"--name {model_name} " \
f"--precision {precision} " \
f"--output_dir {base_model_dir}"
! $download_command
# Load the model
# initialize inference engine
ie_core = Core()
# read the network and corresponding weights from file
model = ie_core.read_model(model=model_path, weights=model_weights_path)
# load the model on the CPU (you can use GPU or MYRIAD as well)
compiled_model = ie_core.compile_model(model=model, device_name="CPU")
# get input and output names of nodes
input_layer = compiled_model.input(0)
output_layers = list(compiled_model.outputs)
# get input size
height, width = list(input_layer.shape)[2:]
# Processing OpenPoseDecoder
decoder = OpenPoseDecoder()
# Process Results
# 2d pooling in numpy (from: htt11ps://stackoverflow.com/a/54966908/1624463)
def pool2d(A, kernel_size, stride, padding, pool_mode="max"):
"""
2D Pooling
Parameters:
A: input 2D array
kernel_size: int, the size of the window
stride: int, the stride of the window
padding: int, implicit zero paddings on both sides of the input
pool_mode: string, 'max' or 'avg'
"""
# Padding
A = np.pad(A, padding, mode="constant")
# Window view of A
output_shape = (
(A.shape[0] - kernel_size) // stride + 1,
(A.shape[1] - kernel_size) // stride + 1,
)
kernel_size = (kernel_size, kernel_size)
A_w = as_strided(
A,
shape=output_shape + kernel_size,
strides=(stride * A.strides[0], stride * A.strides[1]) + A.strides
)
A_w = A_w.reshape(-1, *kernel_size)
# Return the result of pooling
if pool_mode == "max":
return A_w.max(axis=(1, 2)).reshape(output_shape)
elif pool_mode == "avg":
return A_w.mean(axis=(1, 2)).reshape(output_shape)
# non maximum suppression
def heatmap_nms(heatmaps, pooled_heatmaps):
return heatmaps * (heatmaps == pooled_heatmaps)
# get poses from results
def process_results(img, pafs, heatmaps):
# this processing comes from
# https://github.com/openvinotoolkit/open_model_zoo/blob/master/demos/common/python/models/open_pose.py
pooled_heatmaps = np.array(
[[pool2d(h, kernel_size=3, stride=1, padding=1, pool_mode="max") for h in heatmaps[0]]]
)
nms_heatmaps = heatmap_nms(heatmaps, pooled_heatmaps)
# decode poses
poses, scores = decoder(heatmaps, nms_heatmaps, pafs)
output_shape = list(compiled_model.output(index=0).partial_shape)
output_scale = img.shape[1] / output_shape[3].get_length(), img.shape[0] / output_shape[2].get_length()
# multiply coordinates by scaling factor
poses[:, :, :2] *= output_scale
return poses, scores
# Draw Pose Overlays
colors = ((255, 0, 0), (255, 0, 255), (170, 0, 255), (255, 0, 85), (255, 0, 170), (85, 255, 0),
(255, 170, 0), (0, 255, 0), (255, 255, 0), (0, 255, 85), (170, 255, 0), (0, 85, 255),
(0, 255, 170), (0, 0, 255), (0, 255, 255), (85, 0, 255), (0, 170, 255))
default_skeleton = ((15, 13), (13, 11), (16, 14), (14, 12), (11, 12), (5, 11), (6, 12), (5, 6), (5, 7),
(6, 8), (7, 9), (8, 10), (1, 2), (0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (4, 6))
def draw_poses(img, poses, point_score_threshold, skeleton=default_skeleton):
if poses.size == 0:
return img
img_limbs = np.copy(img)
for pose in poses:
points = pose[:, :2].astype(np.int32)
points_scores = pose[:, 2]
# Draw joints.
for i, (p, v) in enumerate(zip(points, points_scores)):
if v > point_score_threshold:
cv2.circle(img, tuple(p), 1, colors[i], 2)
# Draw limbs.
for i, j in skeleton:
if points_scores[i] > point_score_threshold and points_scores[j] > point_score_threshold:
cv2.line(img_limbs, tuple(points[i]), tuple(points[j]), color=colors[j], thickness=4)
cv2.addWeighted(img, 0.4, img_limbs, 0.6, 0, dst=img)
return img
# Main Processing Function
# main processing function to run pose estimation
def run_pose_estimation(source=0, flip=False, use_popup=False, skip_first_frames=0):
pafs_output_key = compiled_model.output("Mconv7_stage2_L1")
heatmaps_output_key = compiled_model.output("Mconv7_stage2_L2")
player = None
try:
# create video player to play with target fps
player = utils.VideoPlayer(source, flip=flip, fps=30, skip_first_frames=skip_first_frames)
# start capturing
player.start()
if use_popup:
title = "Press ESC to Exit"
cv2.namedWindow(title, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE)
processing_times = collections.deque()
while True:
# grab the frame
frame = player.next()
if frame is None:
print("Source ended")
break
# if frame larger than full HD, reduce size to improve the performance
scale = 1280 / max(frame.shape)
if scale < 1:
frame = cv2.resize(frame, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
# resize image and change dims to fit neural network input
# (see https://github.com/openvinotoolkit/open_model_zoo/tree/master/models/intel/human-pose-estimation-0001)
input_img = cv2.resize(frame, (width, height), interpolation=cv2.INTER_AREA)
# create batch of images (size = 1)
input_img = input_img.transpose((2,0,1))[np.newaxis, ...]
# measure processing time
start_time = time.time()
# get results
results = compiled_model([input_img])
stop_time = time.time()
pafs = results[pafs_output_key]
heatmaps = results[heatmaps_output_key]
# get poses from network results
poses, scores = process_results(frame, pafs, heatmaps)
# draw poses on a frame
frame = draw_poses(frame, poses, 0.1)
processing_times.append(stop_time - start_time)
# use processing times from last 200 frames
if len(processing_times) > 200:
processing_times.popleft()
_, f_width = frame.shape[:2]
# mean processing time [ms]
processing_time = np.mean(processing_times) * 1000
fps = 1000 / processing_time
cv2.putText(frame, f"Inference time: {processing_time:.1f}ms ({fps:.1f} FPS)", (20, 40),
cv2.FONT_HERSHEY_COMPLEX, f_width / 1000, (0, 0, 255), 1, cv2.LINE_AA)
# use this workaround if there is flickering
if use_popup:
cv2.imshow(title, frame)
key = cv2.waitKey(1)
# escape = 27
if key == 27:
break
else:
# encode numpy array to jpg
_, encoded_img = cv2.imencode(".jpg", frame, params=[cv2.IMWRITE_JPEG_QUALITY, 90])
# create IPython image
i = display.Image(data=encoded_img)
# display the image in this notebook
display.clear_output(wait=True)
display.display(i)
# ctrl-c
except KeyboardInterrupt:
print("Interrupted")
# any different error
except RuntimeError as e:
print(e)
finally:
if player is not None:
# stop capturing
player.stop()
if use_popup:
cv2.destroyAllWindows()
# Run Live Pose Estimation
run_pose_estimation(source=0, flip=True, use_popup=False)
# Run Pose Estimation on a Video File
video_file = "https://github.com/intel-iot-devkit/sample-videos/blob/master/store-aisle-detection.mp4?raw=true"
run_pose_estimation(video_file, flip=False, use_popup=False, skip_first_frames=500)
#python #openvino #openvino-notebooks #live-inference #deeplearning #accelerated-inference #object-detection
401-object-detection-webcam: Live Object Detection with OpenVINO# Imports
import collections
import os
import sys
import time
import cv2
import numpy as np
from IPython import display
from openvino.runtime import Core
sys.path.append("../utils")
import notebook_utils as utils
# Download the Model
# directory where model will be downloaded
base_model_dir = "model"
# model name as named in Open Model Zoo
model_name = "ssdlite_mobilenet_v2"
download_command = f"omz_downloader " \
f"--name {model_name} " \
f"--output_dir {base_model_dir} " \
f"--cache_dir {base_model_dir}"
! $download_command
# Convert the Model
precision = "FP16"
# output path for the conversion
converted_model_path = f"model/public/{model_name}/{precision}/{model_name}.xml"
if not os.path.exists(converted_model_path):
convert_command = f"omz_converter " \
f"--name {model_name} " \
f"--download_dir {base_model_dir} " \
f"--precisions {precision}"
! $convert_command
# Load the Model
# initialize inference engine
ie_core = Core()
# read the network and corresponding weights from file
model = ie_core.read_model(model=converted_model_path)
# compile the model for the CPU (you can choose manually CPU, GPU, MYRIAD etc.)
# or let the engine choose the best available device (AUTO)
compiled_model = ie_core.compile_model(model=model, device_name="CPU")
# get input and output nodes
input_layer = compiled_model.input(0)
output_layer = compiled_model.output(0)
# get input size
height, width = list(input_layer.shape)[1:3]
# Process Results
# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
classes = [
"background", "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train",
"truck", "boat", "traffic light", "fire hydrant", "street sign", "stop sign",
"parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
"bear", "zebra", "giraffe", "hat", "backpack", "umbrella", "shoe", "eye glasses",
"handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
"baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
"plate", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair",
"couch", "potted plant", "bed", "mirror", "dining table", "window", "desk", "toilet",
"door", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
"toaster", "sink", "refrigerator", "blender", "book", "clock", "vase", "scissors",
"teddy bear", "hair drier", "toothbrush", "hair brush"
]
# colors for above classes (Rainbow Color Map)
colors = cv2.applyColorMap(
src=np.arange(0, 255, 255 / len(classes), dtype=np.float32).astype(np.uint8),
colormap=cv2.COLORMAP_RAINBOW,
).squeeze()
def process_results(frame, results, thresh=0.6):
# size of the original frame
h, w = frame.shape[:2]
# results is a tensor [1, 1, 100, 7]
results = results.squeeze()
boxes = []
labels = []
scores = []
for _, label, score, xmin, ymin, xmax, ymax in results:
# create a box with pixels coordinates from the box with normalized coordinates [0,1]
boxes.append(
tuple(map(int, (xmin * w, ymin * h, (xmax - xmin) * w, (ymax - ymin) * h)))
)
labels.append(int(label))
scores.append(float(score))
# apply non-maximum suppression to get rid of many overlapping entities
# see https://paperswithcode.com/method/non-maximum-suppression
# this algorithm returns indices of objects to keep
indices = cv2.dnn.NMSBoxes(
bboxes=boxes, scores=scores, score_threshold=thresh, nms_threshold=0.6
)
# if there are no boxes
if len(indices) == 0:
return []
# filter detected objects
return [(labels[idx], scores[idx], boxes[idx]) for idx in indices.flatten()]
def draw_boxes(frame, boxes):
for label, score, box in boxes:
# choose color for the label
color = tuple(map(int, colors[label]))
# draw box
x2 = box[0] + box[2]
y2 = box[1] + box[3]
cv2.rectangle(img=frame, pt1=box[:2], pt2=(x2, y2), color=color, thickness=3)
# draw label name inside the box
cv2.putText(
img=frame,
text=f"{classes[label]} {score:.2f}",
org=(box[0] + 10, box[1] + 30),
fontFace=cv2.FONT_HERSHEY_COMPLEX,
fontScale=frame.shape[1] / 1000,
color=color,
thickness=1,
lineType=cv2.LINE_AA,
)
return frame
# Main Processing Function
# main processing function to run object detection
def run_object_detection(source=0, flip=False, use_popup=False, skip_first_frames=0):
player = None
try:
# create video player to play with target fps
player = utils.VideoPlayer(
source=source, flip=flip, fps=30, skip_first_frames=skip_first_frames
)
# start capturing
player.start()
if use_popup:
title = "Press ESC to Exit"
cv2.namedWindow(
winname=title, flags=cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE
)
processing_times = collections.deque()
while True:
# grab the frame
frame = player.next()
if frame is None:
print("Source ended")
break
# if frame larger than full HD, reduce size to improve the performance
scale = 1280 / max(frame.shape)
if scale < 1:
frame = cv2.resize(
src=frame,
dsize=None,
fx=scale,
fy=scale,
interpolation=cv2.INTER_AREA,
)
# resize image and change dims to fit neural network input
input_img = cv2.resize(
src=frame, dsize=(width, height), interpolation=cv2.INTER_AREA
)
# create batch of images (size = 1)
input_img = input_img[np.newaxis, ...]
# measure processing time
start_time = time.time()
# get results
results = compiled_model([input_img])[output_layer]
stop_time = time.time()
# get poses from network results
boxes = process_results(frame=frame, results=results)
# draw boxes on a frame
frame = draw_boxes(frame=frame, boxes=boxes)
processing_times.append(stop_time - start_time)
# use processing times from last 200 frames
if len(processing_times) > 200:
processing_times.popleft()
_, f_width = frame.shape[:2]
# mean processing time [ms]
processing_time = np.mean(processing_times) * 1000
fps = 1000 / processing_time
cv2.putText(
img=frame,
text=f"Inference time: {processing_time:.1f}ms ({fps:.1f} FPS)",
org=(20, 40),
fontFace=cv2.FONT_HERSHEY_COMPLEX,
fontScale=f_width / 1000,
color=(0, 0, 255),
thickness=1,
lineType=cv2.LINE_AA,
)
# use this workaround if there is flickering
if use_popup:
cv2.imshow(winname=title, mat=frame)
key = cv2.waitKey(1)
# escape = 27
if key == 27:
break
else:
# encode numpy array to jpg
_, encoded_img = cv2.imencode(
ext=".jpg", img=frame, params=[cv2.IMWRITE_JPEG_QUALITY, 100]
)
# create IPython image
i = display.Image(data=encoded_img)
# display the image in this notebook
display.clear_output(wait=True)
display.display(i)
# ctrl-c
except KeyboardInterrupt:
print("Interrupted")
# any different error
except RuntimeError as e:
print(e)
finally:
if player is not None:
# stop capturing
player.stop()
if use_popup:
cv2.destroyAllWindows()
# Run Live Object Detection
run_object_detection(source=0, flip=True, use_popup=False)
# Run Object Detection on a Video File
video_file = "../201-vision-monodepth/data/Coco Walking in Berkeley.mp4"
run_object_detection(source=video_file, flip=False, use_popup=False)
#python #openvino #openvino-notebook #detection #object-detection
004-hello-detection: Introduction to Detection in OpenVINO# Imports
import cv2
import matplotlib.pyplot as plt
import numpy as np
from openvino.runtime import Core
# Load the Model
ie = Core()
model = ie.read_model(model="model/horizontal-text-detection-0001.xml")
compiled_model = ie.compile_model(model=model, device_name="CPU")
input_layer_ir = compiled_model.input(0)
output_layer_ir = compiled_model.output("boxes")
# Load an Image
# Text detection models expects image in BGR format
image = cv2.imread("data/intel_rnb.jpg")
# N,C,H,W = batch size, number of channels, height, width
N, C, H, W = input_layer_ir.shape
# Resize image to meet network expected input sizes
resized_image = cv2.resize(image, (W, H))
# Reshape to network input shape
input_image = np.expand_dims(resized_image.transpose(2, 0, 1), 0)
plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB));
# Do Inference
# Create inference request
boxes = compiled_model([input_image])[output_layer_ir]
# Remove zero only boxes
boxes = boxes[~np.all(boxes == 0, axis=1)]
# Visualize Results
# For each detection, the description has the format: [x_min, y_min, x_max, y_max, conf]
# Image passed here is in BGR format with changed width and height. To display it in colors expected by matplotlib we use cvtColor function
def convert_result_to_image(bgr_image, resized_image, boxes, threshold=0.3, conf_labels=True):
# Define colors for boxes and descriptions
colors = {"red": (255, 0, 0), "green": (0, 255, 0)}
# Fetch image shapes to calculate ratio
(real_y, real_x), (resized_y, resized_x) = bgr_image.shape[:2], resized_image.shape[:2]
ratio_x, ratio_y = real_x / resized_x, real_y / resized_y
# Convert base image from bgr to rgb format
rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
# Iterate through non-zero boxes
for box in boxes:
# Pick confidence factor from last place in array
conf = box[-1]
if conf > threshold:
# Convert float to int and multiply corner position of each box by x and y ratio
# In case that bounding box is found at the top of the image,
# we position upper box bar little lower to make it visible on image
(x_min, y_min, x_max, y_max) = [
int(max(corner_position * ratio_y, 10)) if idx % 2
else int(corner_position * ratio_x)
for idx, corner_position in enumerate(box[:-1])
]
# Draw box based on position, parameters in rectangle function are: image, start_point, end_point, color, thickness
rgb_image = cv2.rectangle(rgb_image, (x_min, y_min), (x_max, y_max), colors["green"], 3)
# Add text to image based on position and confidence
# Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type
if conf_labels:
rgb_image = cv2.putText(
rgb_image,
f"{conf:.2f}",
(x_min, y_min - 10),
cv2.FONT_HERSHEY_SIMPLEX,
0.8,
colors["red"],
1,
cv2.LINE_AA,
)
return rgb_image
plt.figure(figsize=(10, 6))
plt.axis("off")
plt.imshow(convert_result_to_image(image, resized_image, boxes, conf_labels=False));
#python #openvino #openvino-tensorflow #tensorflow #object-detection
Object Detection with OpenVINO™ integration with TensorFlow# Upload the required wheel files, models and images in a google drive folder
# Uncomment and run the below command to copy them in your current workspace
#!cp /content/drive/MyDrive/TF-OV/working_dir_files/* .
import os
# Enable these variables for runtime inference optimizations
os.environ["OPENVINO_TF_CONVERT_VARIABLES_TO_CONSTANTS"] = "1"
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "1"
!python3 -m pip -q install --upgrade pip
!python3 -m pip -q install pillow
!python3 -m pip -q install keras_applications
# Install TensorFlow (v2.8.0) and OpenVINO-TensorFlow (v2.0.0) only if they aren't found
!if python3 -c "import tensorflow"; then echo "Found TensorFlow. Skipping."; else echo "TensorFlow Not Found. Installing."; python3 -m pip -q install tensorflow==2.8.0; fi
!if python3 -c "import openvino_tensorflow"; then echo "Found OpenVINO-TensorFlow. Skipping."; else echo "OpenVINO-TensorFlow Not Found. Installing.";
OVTF_DIR = "/"
RAW_GITHUB_COMMON = "https://raw.githubusercontent.com/openvinotoolkit/openvino_tensorflow/master/examples/common/"
GITHUB_EXAMPLES = "https://github.com/openvinotoolkit/openvino_tensorflow/raw/master/examples/data/"
RAW_GITHUB_EXAMPLES = "https://raw.githubusercontent.com/openvinotoolkit/openvino_tensorflow/master/examples/"
import os
files = os.listdir('.')
if ('common' not in files or 'examples' not in files) and 'openvino_tensorflow' not in os.listdir(OVTF_DIR):
!mkdir ./common
!wget {RAW_GITHUB_COMMON}/post_process.py -O ./common/post_process.py
!wget {RAW_GITHUB_COMMON}/pre_process.py -O ./common/pre_process.py
!wget {RAW_GITHUB_COMMON}/utils.py -O ./common/utils.py
!mkdir -p ./examples/data
!wget {GITHUB_EXAMPLES}/grace_hopper.jpg -O ./examples/data/grace_hopper.jpg
!wget {GITHUB_EXAMPLES}/yolov4_anchors.txt -O ./examples/data/yolov4_anchors.txt
!wget {RAW_GITHUB_EXAMPLES}/convert_yolov4.sh -O ./examples/convert_yolov4.sh
!wget {RAW_GITHUB_EXAMPLES}/keras_to_tensorflow.patch -O ./examples/keras_to_tensorflow.patch
import sys
if 'openvino_tensorflow' in os.listdir(OVTF_DIR):
sys_append = os.path.abspath(OVTF_DIR + "/openvino_tensorflow/examples/")
sys.path.append(sys_append)
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import tensorflow as tf
import openvino_tensorflow as ovtf
from PIL import Image
import cv2
import matplotlib.pyplot as plt
from common.utils import get_input_mode, get_colors, draw_boxes, get_anchors, rename_file
from common.pre_process import preprocess_image_yolov3 as preprocess_image
from common.post_process import yolo3_postprocess_np
# Download and Convert the YoloV4 model
files = os.listdir('.')
if 'examples' in files:
path = "examples"
else:
path = "{0}/openvino_tensorflow/examples/".format(OVTF_DIR)
%cd {path}
!chmod +x convert_yolov4.sh && bash convert_yolov4.sh
# Once the model conversion is completed; move back to outside of examples directory
%cd ../
def load_coco_names(file_name):
"""Parses the label file with only class names,
and returns a dictionary mapping the class IDs to class names.
"""
names = {}
with open(file_name) as f:
for id_, name in enumerate(f):
names[id_] = name
return names
def load_labels(label_file):
"""Parses the label file, assuming that labels are separated with a newline
in the file and returns the list of labels.
"""
label = []
proto_as_ascii_lines = tf.io.gfile.GFile(label_file).readlines()
for l in proto_as_ascii_lines:
label.append(l.rstrip())
return label
def infer_openvino_tensorflow(model_file, image_file , input_height, input_width, label_file, anchor_file, conf_threshold, iou_threshold):
"""Takes the tensorflow model and all other input parameters as arguments.
Runs inference with the object detection model and prints the predictions.
"""
print("CREATE MODEL - BEGIN")
# Load model and process input image
model = model = tf.saved_model.load(model_file)
print("CREATE MODEL - END")
if label_file:
classes = load_coco_names(label_file)
labels = load_labels(label_file)
colors = get_colors(labels)
if anchor_file:
anchors = get_anchors(anchor_file)
print("PREDICTION - BEGIN")
#Preprocess Image
image = Image.open(image_file)
img = np.asarray(image)
image_width, image_height = image.size
img_resized = tf.convert_to_tensor(preprocess_image(image, (input_height, input_width)))
# Warmup
detected_boxes = model(img_resized)
# Run
import time
start = time.time()
detected_boxes = model(img_resized)
elapsed = time.time() - start
print('Inference time in ms: %f' % (elapsed * 1000))
print("PREDICTION - END")
image_shape = tuple((image_height, image_width))
# apply non max suppresion, draw boxes and save updated image
out_boxes, out_classes, out_scores = yolo3_postprocess_np(
detected_boxes,
image_shape,
anchors,
len(labels), (input_height, input_width),
max_boxes=10,
confidence=conf_threshold,
iou_threshold=iou_threshold,
elim_grid_sense=True)
img_bbox = draw_boxes(img, out_boxes, out_classes, out_scores,
labels, colors)
if output_dir:
cv2.imwrite(os.path.join(output_dir, "detections.jpg"), img_bbox)
else:
cv2.imwrite("detections.jpg", img_bbox)
plt.imshow(img)
input_file = "examples/data/grace_hopper.jpg"
model_file = "examples/data/yolo_v4"
label_file = "examples/data/coco.names"
anchor_file = "examples/data/yolov4_anchors.txt"
input_height = 416
input_width = 416
backend_name = "CPU"
output_dir = "."
conf_threshold = 0.6
iou_threshold = 0.5
#Print list of available backends
print('Available Backends:')
backends_list = ovtf.list_backends()
for backend in backends_list:
print(backend)
ovtf.set_backend(backend_name)
print("OpenVINO TensorFlow is enabled")
infer_openvino_tensorflow(model_file, input_file, input_height, input_width, label_file, anchor_file, conf_threshold, iou_threshold )
ovtf.disable() ## Disabling OVTF
print("OpenVINO TensorFlow is disabled")
infer_openvino_tensorflow(model_file, input_file, input_height, input_width, label_file, anchor_file, conf_threshold, iou_threshold )
ovtf.enable()
Fri Jun 17 2022 09:51:37 GMT+0000 (Coordinated Universal Time)
#python #openvino #openvino-notebooks #deeplearning #accelerated-inference #object-detection #onnx #onnx-runtime #openvino-onnx-runtime #openvino-execution-provider-for-onnx #tiny-yolov4Fri Jun 17 2022 09:49:43 GMT+0000 (Coordinated Universal Time) https://github.com/microsoft/onnxruntime-inference-examples/blob/main/python/OpenVINO_EP/tiny_yolo_v2_object_detection/tiny_yolov2_obj_detection_sample.py
#python #openvino #openvino-notebooks #deeplearning #accelerated-inference #object-detection #onnx #onnx-runtime #openvino-onnx-runtime #yolov2 #openvino-execution-provider-for-onnxFri Jun 17 2022 04:28:34 GMT+0000 (Coordinated Universal Time) https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/402-pose-estimation-webcam/402-pose-estimation.ipynb
#python #openvino #openvino-notebooks #live-inference #deeplearning #accelerated-inference #object-detection #pose-estimationThu Jun 16 2022 14:49:51 GMT+0000 (Coordinated Universal Time) https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/401-object-detection-webcam/401-object-detection.ipynb
#python #openvino #openvino-notebooks #live-inference #deeplearning #accelerated-inference #object-detectionThu Jun 09 2022 16:51:17 GMT+0000 (Coordinated Universal Time) https://github.com/openvinotoolkit/openvino_notebooks/blob/main/notebooks/004-hello-detection/004-hello-detection.ipynb
#python #openvino #openvino-notebook #detection #object-detectionThu Jun 09 2022 14:50:09 GMT+0000 (Coordinated Universal Time) https://github.com/openvinotoolkit/openvino_tensorflow/blob/master/examples/notebooks/OpenVINO_TensorFlow_object_detection_example.ipynb
#python #openvino #openvino-tensorflow #tensorflow #object-detection

