UnaNancyOwen · December 17, 2023 04:01
diff --git a/README.md b/README.md
diff --git a/main.py b/main.py
 import cv2
 import numpy as np
 from yolox import DetectionModel

 # params
 WEIGHTS = "yolox_s.onnx"
 INPUT_SIZE = (640, 640)
 NUM_CLASSES = 80
 SCORE_THRESHOLD = 0.6
 IOU_THRESHOLD = 0.4

 # get raudom colors
 def get_colors(num):
    colors = []
    np.random.seed(0)
    for i in range(num):
        color = np.random.randint(0, 256, [3]).astype(np.uint8)
        colors.append(color.tolist())
    return colors

 # main
 def main():
    # read image
    image = cv2.imread("dog.jpg")
    if image is None:
        raise IOError("can't read image!")
    
    # create detection model class for yolox
    weight = WEIGHTS
    input_size = INPUT_SIZE
    model = DetectionModel(weight, input_size)

    # detect objects
    score_threshold = SCORE_THRESHOLD
    iou_threshold = IOU_THRESHOLD
    class_ids, scores, boxes = model.detect(image, score_threshold, iou_threshold)

    # draw objects
    num_classes = NUM_CLASSES
    colors = get_colors(num_classes)
    for box, score, class_id in zip(boxes, scores, class_ids):
        color = colors[class_id]
        thickness = 2
        line_type = cv2.LINE_AA
        cv2.rectangle(image, box, color, thickness, line_type)

    # show image
    cv2.imshow("image", image)
    cv2.waitKey(0)

 if __name__ == '__main__':
    main()
diff --git a/yolox.py b/yolox.py
 import cv2
 import numpy as np

 # detection model class for yolox
 class DetectionModel:
    # constructor
    def __init__(self, weight, input_size = (640, 640)):
        self.__initialize(weight, input_size)

    # initialize
    def __initialize(self, weight, input_size):
        self.net = cv2.dnn.readNet(weight)
        self.input_size = input_size

        self.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
        self.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

        strides = [8, 16, 32]
        self.grids, self.expanded_strides = self.__create_grids_and_expanded_strides(strides)

    # create grids and expanded strides
    def __create_grids_and_expanded_strides(self, strides):
        grids = []
        expanded_strides = []

        hsizes = [self.input_size[0] // stride for stride in strides]
        wsizes = [self.input_size[1] // stride for stride in strides]

        for hsize, wsize, stride in zip(hsizes, wsizes, strides):
            xv, yv = np.meshgrid(np.arange(hsize), np.arange(wsize))
            grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
            grids.append(grid)
            shape = grid.shape[:2]
            expanded_strides.append(np.full((*shape, 1), stride))
        
        grids = np.concatenate(grids, 1)
        expanded_strides = np.concatenate(expanded_strides, 1)

        return grids, expanded_strides
    
    # set preferable backend
    def setPreferableBackend(self, backend):
        self.net.setPreferableBackend(backend)
    
    # set preferable target
    def setPreferableTarget(self, target):
        self.net.setPreferableTarget(target)

    # detect objects
    def detect(self, image, score_threshold, iou_threshold):
        self.image_shape = image.shape
        input_blob, resize_ratio = self.__preprocess(image)
        output_blob = self.__predict(input_blob)
        boxes, scores, class_ids = self.__postprocess(output_blob, resize_ratio)
        boxes, scores, class_ids = self.__nms(boxes, scores, class_ids, score_threshold, iou_threshold)

        return class_ids, scores, boxes

    # preprocess
    def __preprocess(self, image):
        resize_ratio = min(self.input_size[0] / self.image_shape[0], self.input_size[1] / self.image_shape[1])
        resized_image = cv2.resize(image, dsize=None, fx=resize_ratio, fy=resize_ratio)

        padded_image = np.ones((self.input_size[0], self.input_size[1], 3), dtype=np.uint8) * 114
        padded_image[: resized_image.shape[0], : resized_image.shape[1]] = resized_image

        input_blob = cv2.dnn.blobFromImage(padded_image, 1.0, self.input_size, (0.0, 0.0, 0.0), True, False)

        return input_blob, resize_ratio
    
    # predict
    def __predict(self, input_blob):
        self.net.setInput(input_blob)

        output_layer = self.net.getUnconnectedOutLayersNames()[0] # "output"
        output_blob = self.net.forward(output_layer)

        return output_blob
    
    # postprocess
    def __postprocess(self, output_blob, resize_ratio):
        output_blob[..., :2] = (output_blob[..., :2] + self.grids) * self.expanded_strides
        output_blob[..., 2:4] = np.exp(output_blob[..., 2:4]) * self.expanded_strides

        predictions = output_blob[0]

        boxes = predictions[:, :4]
        boxes_xywh = np.ones_like(boxes)
        boxes_xywh[:, 0] = boxes[:, 0] - boxes[:, 2] * 0.5
        boxes_xywh[:, 1] = boxes[:, 1] - boxes[:, 3] * 0.5
        boxes_xywh[:, 2] = (boxes[:, 0] + boxes[:, 2] * 0.5) - boxes_xywh[:, 0]
        boxes_xywh[:, 3] = (boxes[:, 1] + boxes[:, 3] * 0.5) - boxes_xywh[:, 1]
        boxes_xywh /= resize_ratio

        scores = predictions[:, 4:5] * predictions[:, 5:]
        class_ids = scores.argmax(1)
        scores = scores[np.arange(len(class_ids)), class_ids]

        return boxes_xywh, scores, class_ids
    
    # non maximum suppression
    def __nms(self, boxes, scores, class_ids, score_threshold, iou_threshold):
        indices = cv2.dnn.NMSBoxesBatched(boxes, scores, class_ids, score_threshold, iou_threshold) # OpenCV 4.7.0 or later

        keep_boxes = []
        keep_scores = []
        keep_class_ids = []
        for index in indices:
            keep_boxes.append(boxes[index])
            keep_scores.append(scores[index])
            keep_class_ids.append(class_ids[index])

        if len(keep_boxes) != 0:
            keep_boxes = np.vectorize(int)(keep_boxes)

        return keep_boxes, keep_scores, keep_class_ids
	import cv2
	import numpy as np
	from yolox import DetectionModel

	# params
	WEIGHTS = "yolox_s.onnx"
	INPUT_SIZE = (640, 640)
	NUM_CLASSES = 80
	SCORE_THRESHOLD = 0.6
	IOU_THRESHOLD = 0.4

	# get raudom colors
	def get_colors(num):
	colors = []
	np.random.seed(0)
	for i in range(num):
	color = np.random.randint(0, 256, [3]).astype(np.uint8)
	colors.append(color.tolist())
	return colors

	# main
	def main():
	# read image
	image = cv2.imread("dog.jpg")
	if image is None:
	raise IOError("can't read image!")

	# create detection model class for yolox
	weight = WEIGHTS
	input_size = INPUT_SIZE
	model = DetectionModel(weight, input_size)

	# detect objects
	score_threshold = SCORE_THRESHOLD
	iou_threshold = IOU_THRESHOLD
	class_ids, scores, boxes = model.detect(image, score_threshold, iou_threshold)

	# draw objects
	num_classes = NUM_CLASSES
	colors = get_colors(num_classes)
	for box, score, class_id in zip(boxes, scores, class_ids):
	color = colors[class_id]
	thickness = 2
	line_type = cv2.LINE_AA
	cv2.rectangle(image, box, color, thickness, line_type)

	# show image
	cv2.imshow("image", image)
	cv2.waitKey(0)

	if __name__ == '__main__':
	main()
	import cv2
	import numpy as np

	# detection model class for yolox
	class DetectionModel:
	# constructor
	def __init__(self, weight, input_size = (640, 640)):
	self.__initialize(weight, input_size)

	# initialize
	def __initialize(self, weight, input_size):
	self.net = cv2.dnn.readNet(weight)
	self.input_size = input_size

	self.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
	self.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

	strides = [8, 16, 32]
	self.grids, self.expanded_strides = self.__create_grids_and_expanded_strides(strides)

	# create grids and expanded strides
	def __create_grids_and_expanded_strides(self, strides):
	grids = []
	expanded_strides = []

	hsizes = [self.input_size[0] // stride for stride in strides]
	wsizes = [self.input_size[1] // stride for stride in strides]

	for hsize, wsize, stride in zip(hsizes, wsizes, strides):
	xv, yv = np.meshgrid(np.arange(hsize), np.arange(wsize))
	grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
	grids.append(grid)
	shape = grid.shape[:2]
	expanded_strides.append(np.full((*shape, 1), stride))

	grids = np.concatenate(grids, 1)
	expanded_strides = np.concatenate(expanded_strides, 1)

	return grids, expanded_strides

	# set preferable backend
	def setPreferableBackend(self, backend):
	self.net.setPreferableBackend(backend)

	# set preferable target
	def setPreferableTarget(self, target):
	self.net.setPreferableTarget(target)

	# detect objects
	def detect(self, image, score_threshold, iou_threshold):
	self.image_shape = image.shape
	input_blob, resize_ratio = self.__preprocess(image)
	output_blob = self.__predict(input_blob)
	boxes, scores, class_ids = self.__postprocess(output_blob, resize_ratio)
	boxes, scores, class_ids = self.__nms(boxes, scores, class_ids, score_threshold, iou_threshold)

	return class_ids, scores, boxes

	# preprocess
	def __preprocess(self, image):
	resize_ratio = min(self.input_size[0] / self.image_shape[0], self.input_size[1] / self.image_shape[1])
	resized_image = cv2.resize(image, dsize=None, fx=resize_ratio, fy=resize_ratio)

	padded_image = np.ones((self.input_size[0], self.input_size[1], 3), dtype=np.uint8) * 114
	padded_image[: resized_image.shape[0], : resized_image.shape[1]] = resized_image

	input_blob = cv2.dnn.blobFromImage(padded_image, 1.0, self.input_size, (0.0, 0.0, 0.0), True, False)

	return input_blob, resize_ratio

	# predict
	def __predict(self, input_blob):
	self.net.setInput(input_blob)

	output_layer = self.net.getUnconnectedOutLayersNames()[0] # "output"
	output_blob = self.net.forward(output_layer)

	return output_blob

	# postprocess
	def __postprocess(self, output_blob, resize_ratio):
	output_blob[..., :2] = (output_blob[..., :2] + self.grids) * self.expanded_strides
	output_blob[..., 2:4] = np.exp(output_blob[..., 2:4]) * self.expanded_strides

	predictions = output_blob[0]

	boxes = predictions[:, :4]
	boxes_xywh = np.ones_like(boxes)
	boxes_xywh[:, 0] = boxes[:, 0] - boxes[:, 2] * 0.5
	boxes_xywh[:, 1] = boxes[:, 1] - boxes[:, 3] * 0.5
	boxes_xywh[:, 2] = (boxes[:, 0] + boxes[:, 2] * 0.5) - boxes_xywh[:, 0]
	boxes_xywh[:, 3] = (boxes[:, 1] + boxes[:, 3] * 0.5) - boxes_xywh[:, 1]
	boxes_xywh /= resize_ratio

	scores = predictions[:, 4:5] * predictions[:, 5:]
	class_ids = scores.argmax(1)
	scores = scores[np.arange(len(class_ids)), class_ids]

	return boxes_xywh, scores, class_ids

	# non maximum suppression
	def __nms(self, boxes, scores, class_ids, score_threshold, iou_threshold):
	indices = cv2.dnn.NMSBoxesBatched(boxes, scores, class_ids, score_threshold, iou_threshold) # OpenCV 4.7.0 or later

	keep_boxes = []
	keep_scores = []
	keep_class_ids = []
	for index in indices:
	keep_boxes.append(boxes[index])
	keep_scores.append(scores[index])
	keep_class_ids.append(class_ids[index])

	if len(keep_boxes) != 0:
	keep_boxes = np.vectorize(int)(keep_boxes)

	return keep_boxes, keep_scores, keep_class_ids