-
Official Pre-Trained Models | YOLOX
https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/ONNXRuntime#download-onnx-models -
Body-Head-Hand Pre-Trained Models | PINTO_model_zoo
https://github.com/PINTO0309/PINTO_model_zoo/tree/main/426_YOLOX-Body-Head-Hand
Last active
December 17, 2023 04:01
-
-
Save UnaNancyOwen/802b724b46977115d94332b5971775b4 to your computer and use it in GitHub Desktop.
Object Detection using YOLOX inferred by OpenCV DNN module
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
import numpy as np | |
from yolox import DetectionModel | |
# params | |
WEIGHTS = "yolox_s.onnx" | |
INPUT_SIZE = (640, 640) | |
NUM_CLASSES = 80 | |
SCORE_THRESHOLD = 0.6 | |
IOU_THRESHOLD = 0.4 | |
# get raudom colors | |
def get_colors(num): | |
colors = [] | |
np.random.seed(0) | |
for i in range(num): | |
color = np.random.randint(0, 256, [3]).astype(np.uint8) | |
colors.append(color.tolist()) | |
return colors | |
# main | |
def main(): | |
# read image | |
image = cv2.imread("dog.jpg") | |
if image is None: | |
raise IOError("can't read image!") | |
# create detection model class for yolox | |
weight = WEIGHTS | |
input_size = INPUT_SIZE | |
model = DetectionModel(weight, input_size) | |
# detect objects | |
score_threshold = SCORE_THRESHOLD | |
iou_threshold = IOU_THRESHOLD | |
class_ids, scores, boxes = model.detect(image, score_threshold, iou_threshold) | |
# draw objects | |
num_classes = NUM_CLASSES | |
colors = get_colors(num_classes) | |
for box, score, class_id in zip(boxes, scores, class_ids): | |
color = colors[class_id] | |
thickness = 2 | |
line_type = cv2.LINE_AA | |
cv2.rectangle(image, box, color, thickness, line_type) | |
# show image | |
cv2.imshow("image", image) | |
cv2.waitKey(0) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
import numpy as np | |
# detection model class for yolox | |
class DetectionModel: | |
# constructor | |
def __init__(self, weight, input_size = (640, 640)): | |
self.__initialize(weight, input_size) | |
# initialize | |
def __initialize(self, weight, input_size): | |
self.net = cv2.dnn.readNet(weight) | |
self.input_size = input_size | |
self.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV) | |
self.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) | |
strides = [8, 16, 32] | |
self.grids, self.expanded_strides = self.__create_grids_and_expanded_strides(strides) | |
# create grids and expanded strides | |
def __create_grids_and_expanded_strides(self, strides): | |
grids = [] | |
expanded_strides = [] | |
hsizes = [self.input_size[0] // stride for stride in strides] | |
wsizes = [self.input_size[1] // stride for stride in strides] | |
for hsize, wsize, stride in zip(hsizes, wsizes, strides): | |
xv, yv = np.meshgrid(np.arange(hsize), np.arange(wsize)) | |
grid = np.stack((xv, yv), 2).reshape(1, -1, 2) | |
grids.append(grid) | |
shape = grid.shape[:2] | |
expanded_strides.append(np.full((*shape, 1), stride)) | |
grids = np.concatenate(grids, 1) | |
expanded_strides = np.concatenate(expanded_strides, 1) | |
return grids, expanded_strides | |
# set preferable backend | |
def setPreferableBackend(self, backend): | |
self.net.setPreferableBackend(backend) | |
# set preferable target | |
def setPreferableTarget(self, target): | |
self.net.setPreferableTarget(target) | |
# detect objects | |
def detect(self, image, score_threshold, iou_threshold): | |
self.image_shape = image.shape | |
input_blob, resize_ratio = self.__preprocess(image) | |
output_blob = self.__predict(input_blob) | |
boxes, scores, class_ids = self.__postprocess(output_blob, resize_ratio) | |
boxes, scores, class_ids = self.__nms(boxes, scores, class_ids, score_threshold, iou_threshold) | |
return class_ids, scores, boxes | |
# preprocess | |
def __preprocess(self, image): | |
resize_ratio = min(self.input_size[0] / self.image_shape[0], self.input_size[1] / self.image_shape[1]) | |
resized_image = cv2.resize(image, dsize=None, fx=resize_ratio, fy=resize_ratio) | |
padded_image = np.ones((self.input_size[0], self.input_size[1], 3), dtype=np.uint8) * 114 | |
padded_image[: resized_image.shape[0], : resized_image.shape[1]] = resized_image | |
input_blob = cv2.dnn.blobFromImage(padded_image, 1.0, self.input_size, (0.0, 0.0, 0.0), True, False) | |
return input_blob, resize_ratio | |
# predict | |
def __predict(self, input_blob): | |
self.net.setInput(input_blob) | |
output_layer = self.net.getUnconnectedOutLayersNames()[0] # "output" | |
output_blob = self.net.forward(output_layer) | |
return output_blob | |
# postprocess | |
def __postprocess(self, output_blob, resize_ratio): | |
output_blob[..., :2] = (output_blob[..., :2] + self.grids) * self.expanded_strides | |
output_blob[..., 2:4] = np.exp(output_blob[..., 2:4]) * self.expanded_strides | |
predictions = output_blob[0] | |
boxes = predictions[:, :4] | |
boxes_xywh = np.ones_like(boxes) | |
boxes_xywh[:, 0] = boxes[:, 0] - boxes[:, 2] * 0.5 | |
boxes_xywh[:, 1] = boxes[:, 1] - boxes[:, 3] * 0.5 | |
boxes_xywh[:, 2] = (boxes[:, 0] + boxes[:, 2] * 0.5) - boxes_xywh[:, 0] | |
boxes_xywh[:, 3] = (boxes[:, 1] + boxes[:, 3] * 0.5) - boxes_xywh[:, 1] | |
boxes_xywh /= resize_ratio | |
scores = predictions[:, 4:5] * predictions[:, 5:] | |
class_ids = scores.argmax(1) | |
scores = scores[np.arange(len(class_ids)), class_ids] | |
return boxes_xywh, scores, class_ids | |
# non maximum suppression | |
def __nms(self, boxes, scores, class_ids, score_threshold, iou_threshold): | |
indices = cv2.dnn.NMSBoxesBatched(boxes, scores, class_ids, score_threshold, iou_threshold) # OpenCV 4.7.0 or later | |
keep_boxes = [] | |
keep_scores = [] | |
keep_class_ids = [] | |
for index in indices: | |
keep_boxes.append(boxes[index]) | |
keep_scores.append(scores[index]) | |
keep_class_ids.append(class_ids[index]) | |
if len(keep_boxes) != 0: | |
keep_boxes = np.vectorize(int)(keep_boxes) | |
return keep_boxes, keep_scores, keep_class_ids |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment