Skip to content

Instantly share code, notes, and snippets.

@HoangTienDuc
Created October 12, 2021 17:44
Show Gist options
  • Save HoangTienDuc/77633d82a63b6d1f7649eea4c6380000 to your computer and use it in GitHub Desktop.
Save HoangTienDuc/77633d82a63b6d1f7649eea4c6380000 to your computer and use it in GitHub Desktop.
debug request
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
import tensorrt as trt
import cv2
from hyperpose import Config,Model
TRT_LOGGER = trt.Logger()
# Simple helper data class that's a little nicer to use than a 2-tuple.
class HostDeviceMem(object):
def __init__(self, host_mem, device_mem):
self.host = host_mem
self.device = device_mem
def __str__(self):
return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)
def __repr__(self):
return self.__str__()
# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
def allocate_buffers(engine):
inputs = []
outputs = []
bindings = []
stream = cuda.Stream()
out_shapes = []
input_shapes = []
out_names = []
max_batch_size = engine.max_batch_size
for binding in engine:
binding_shape = engine.get_binding_shape(binding)
#Fix -1 dimension for proper memory allocation for batch_size > 1
if binding_shape[0] == -1:
binding_shape = (1,) + binding_shape[1:]
size = trt.volume(binding_shape) * max_batch_size
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
bindings.append(int(device_mem))
# Append to the appropriate list.
if engine.binding_is_input(binding):
inputs.append(HostDeviceMem(host_mem, device_mem))
input_shapes.append(engine.get_binding_shape(binding))
else:
outputs.append(HostDeviceMem(host_mem, device_mem))
#Collect original output shapes and names from engine
out_shapes.append(engine.get_binding_shape(binding))
out_names.append(binding)
return inputs, outputs, bindings, stream, input_shapes, out_shapes, out_names, max_batch_size
# This function is generalized for multiple inputs/outputs.
# inputs and outputs are expected to be lists of HostDeviceMem objects.
def do_inference(context, bindings, inputs, outputs, stream):
# Transfer input data to the GPU.
[cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
# Run inference.
context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
# Transfer predictions back from the GPU.
[cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
# Synchronize the stream
stream.synchronize()
# Return only the host outputs.
return [out.host for out in outputs]
class TrtModel(object):
def __init__(self, model):
self.engine_file = model
self.engine = None
self.inputs = None
self.outputs = None
self.bindings = None
self.stream = None
self.context = None
self.input_shapes = None
self.out_shapes = None
self.max_batch_size = 1
self.cuda_ctx = cuda.Device(0).make_context()
if self.cuda_ctx:
self.cuda_ctx.push()
def build(self):
with open(self.engine_file, 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
self.engine = runtime.deserialize_cuda_engine(f.read())
self.inputs, self.outputs, self.bindings, self.stream, self.input_shapes, self.out_shapes, self.out_names, self.max_batch_size = allocate_buffers(
self.engine)
self.context = self.engine.create_execution_context()
self.context.active_optimization_profile = 0
if self.cuda_ctx:
self.cuda_ctx.pop()
def run(self, input, deflatten: bool = True, as_dict=False):
# lazy load implementation
if self.engine is None:
self.build()
if self.cuda_ctx:
self.cuda_ctx.push()
input = np.asarray(input)
batch_size = input.shape[0]
allocate_place = np.prod(input.shape)
self.inputs[0].host[:allocate_place] = input.flatten(order='C').astype(np.float32)
self.context.set_binding_shape(0, input.shape)
trt_outputs = do_inference(
self.context, bindings=self.bindings,
inputs=self.inputs, outputs=self.outputs, stream=self.stream)
if self.cuda_ctx:
self.cuda_ctx.pop()
#Reshape TRT outputs to original shape instead of flattened array
if deflatten:
trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, self.out_shapes)]
if as_dict:
return {name: trt_outputs[i] for i, name in enumerate(self.out_names)}
return trt_outputs
# return [trt_outputs[0][:batch_size]]
def preprocess(img):
# img = cv2.resize(img, (368, 656))
img = cv2.resize(img, (656, 368))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = img.transpose((2, 0, 1)).astype(np.float32)
img /= 255.0
return img
engine = TrtModel("/data/data/openpose-coco-V2-HW=368x656.onnx_b1_gpu0_fp16.engine")
engine.build()
image = cv2.imread("/data/pose_test.jpg")
trt_input = preprocess(image)
ori_image = trt_input
trt_outputs = engine.run(trt_input)
conf_map, paf_map = trt_outputs
print("trt_output: ", np.array(trt_outputs[0]))
Config.set_model_name("openpose-coco")
Config.set_model_type(Config.MODEL.Openpose)
#get visualize function, which is able to get visualized part and limb heatmap image from inferred heatmaps
visualize=Model.get_visualize(Config.MODEL.Openpose)
vis_parts_heatmap,vis_limbs_heatmap=visualize(ori_image,conf_map[0],paf_map[0],save_tofile=True)
CocoLimb=list(zip([1, 8, 9, 1, 11, 12, 1, 2, 3, 1, 5, 6, 1, 0, 0, 14, 15],
[8, 9, 10, 11, 12, 13, 2, 3, 4, 5, 6, 7, 0, 14, 15, 16, 17]))
from enum import Enum
class CocoPart(Enum):
Nose = 0
Instance = 1
RShoulder = 2
RElbow = 3
RWrist = 4
LShoulder = 5
LElbow = 6
LWrist = 7
RHip = 8
RKnee = 9
RAnkle = 10
LHip = 11
LKnee = 12
LAnkle = 13
REye = 14
LEye = 15
REar = 16
LEar = 17
#get postprocess function, which is able to get humans that contains assembled detected parts from inferred heatmaps
PostProcessor=Model.get_postprocessor(Config.MODEL.Openpose)
postprocessor=PostProcessor(parts=CocoPart,limbs=CocoLimb,hin=368,\
win=656,hout=38,wout=46,colors=None)
humans=postprocessor.process(conf_map[0],paf_map[0], 368, 656)
#draw all detected skeletons
output_img=ori_image.copy()
for human in humans:
output_img=human.draw_human(output_img)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment