YOLO26N 姿态估计 TensorRT 部署Jetson 实时推理1. TensorRT 转换fromultralyticsimportYOLO modelYOLO(yolo26n-pose.pt)model.export(formatengine,imgsz640,halfTrue,batch1)# 或 trtexec/usr/src/tensorrt/bin/trtexec\--onnxyolo26n-pose.onnx\--saveEngineyolo26n-pose.engine\--fp16\--workspace20482. TensorRT 推理封装#!/usr/bin/env python3trt_pose.py - TensorRT 姿态估计推理importtensorrtastrtimportpycuda.driverascudaimportpycuda.autoinitimportnumpyasnpimportcv2importtimeclassTRTPoseDetector:def__init__(self,engine_path,conf_thresh0.3):self.conf_threshconf_thresh loggertrt.Logger(trt.Logger.WARNING)runtimetrt.Runtime(logger)withopen(engine_path,rb)asf:self.engineruntime.deserialize_cuda_engine(f.read())self.contextself.engine.create_execution_context()self.inputs[]self.outputs[]self.bindings[]self.streamcuda.Stream()foriinrange(self.engine.num_io_tensors):nameself.engine.get_tensor_name(i)dtypetrt.nptype(self.engine.get_tensor_dtype(name))shapeself.engine.get_tensor_shape(name)shapetuple(max(1,s)ifs0else1forsinshape)sizetrt.volume(shape)host_memcuda.pagelocked_empty(size,dtype)device_memcuda.mem_alloc(host_mem.nbytes)self.bindings.append(int(device_mem))info{name:name,host:host_mem,device:device_mem,shape:shape}ifself.engine.get_tensor_mode(name)trt.TensorIOMode.INPUT:self.inputs.append(info)else:self.outputs.append(info)defpreprocess(self,image):h,wimage.shape[:2]scalemin(640/h,640/w)new_h,new_wint(h*scale),int(w*scale)resizedcv2.resize(image,(new_w,new_h))canvasnp.full((640,640,3),114,dtypenp.uint8)dy,dx(640-new_h)//2,(640-new_w)//2canvas[dy:dynew_h,dx:dxnew_w]resized blobcanvas[:,:,::-1].transpose(2,0,1).astype(np.float32)/255.0returnnp.expand_dims(blob,axis0),scale,(dy,dx)defdetect(self,image):blob,scale,padself.preprocess(image)np.copyto(self.inputs[0][host],blob.ravel())cuda.memcpy_htod_async(self.inputs[0][device],self.inputs[0][host],self.stream)self.context.set_input_shape(self.inputs[0][name],blob.shape)self.context.execute_async_v2(bindingsself.bindings,stream_handleself.stream.handle)cuda.memcpy_dtoh_async(self.outputs[0][host],self.outputs[0][device],self.stream)self.stream.synchronize()outputself.outputs[0][host].reshape(self.outputs[0][shape])returnself.postprocess(output,scale,pad,image.shape[:2])defpostprocess(self,output,scale,pad,orig_shape):predictionsoutput[0].T# [8400, 56]boxespredictions[:,:4]scorespredictions[:,4]kptspredictions[:,6:].reshape(-1,17,3)maskscoresself.conf_thresh boxesboxes[mask]scoresscores[mask]kptskpts[mask]# 坐标还原dy,dxpad boxes[:,[0,2]](boxes[:,[0,2]]-dx)/scale boxes[:,[1,3]](boxes[:,[1,3]]-dy)/scale kpts[:,:,0](kpts[:,:,0]-dx)/scale kpts[:,:,1](kpts[:,:,1]-dy)/scale results[]foriinrange(len(boxes)):results.append({bbox:boxes[i].tolist(),score:float(scores[i]),keypoints:kpts[i].tolist(),})returnresults# 骨架连接SKELETON[(0,1),(0,2),(1,3),(2,4),(5,6),(5,7),(6,8),(7,9),(8,10),(11,12),(11,13),(12,14),(13,15),(14,16),(5,11),(6,12),]defdraw_pose(image,detections):fordetindetections:kptsdet[keypoints]for(i,j)inSKELETON:ifkpts[i][2]0.3andkpts[j][2]0.3:pt1(int(kpts[i][0]),int(kpts[i][1]))pt2(int(kpts[j][0]),int(kpts[j][1]))cv2.line(image,pt1,pt2,(0,255,0),2)for(x,y,vis)inkpts:ifvis0.3:cv2.circle(image,(int(x),int(y)),3,(0,0,255),-1)returnimageif__name____main__:modelTRTPoseDetector(yolo26n-pose.engine)capcv2.VideoCapture(0)fps_count,fps_start0,time.time()whileTrue:ret,framecap.read()ifnotret:breakdetectionsmodel.detect(frame)framedraw_pose(frame,detections)fps_count1iftime.time()-fps_start1.0:fpsfps_count/(time.time()-fps_start)fps_count,fps_start0,time.time()cv2.putText(frame,fFPS:{fps:.0f},(10,30),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0),2)cv2.imshow(Pose,frame)ifcv2.waitKey(1)0xFFord(q):breakcap.release()cv2.destroyAllWindows()3. 性能基准YOLO26N-Pose TensorRT 性能640x640 ┌──────────────────┬──────────┬──────────┐ │ 设备 │ FP16 │ INT8 │ ├──────────────────┼──────────┼──────────┤ │ Jetson Orin NX │ 5.2ms │ 3.8ms │ │ Jetson Orin Nano │ 9.5ms │ 6.5ms │ │ RTX 4090 │ 1.5ms │ 1.1ms │ │ RTX 3060 │ 4.2ms │ 3.0ms │ └──────────────────┴──────────┴──────────┘总结步骤工具输出导出 ONNXUltralytics.onnx转 TensorRTtrtexec / Ultralytics.engine推理TRTPoseDetector关键点可视化draw_pose骨架图
YOLO26N 姿态估计 TensorRT 部署:Jetson 实时推理
发布时间:2026/6/30 22:25:01
YOLO26N 姿态估计 TensorRT 部署Jetson 实时推理1. TensorRT 转换fromultralyticsimportYOLO modelYOLO(yolo26n-pose.pt)model.export(formatengine,imgsz640,halfTrue,batch1)# 或 trtexec/usr/src/tensorrt/bin/trtexec\--onnxyolo26n-pose.onnx\--saveEngineyolo26n-pose.engine\--fp16\--workspace20482. TensorRT 推理封装#!/usr/bin/env python3trt_pose.py - TensorRT 姿态估计推理importtensorrtastrtimportpycuda.driverascudaimportpycuda.autoinitimportnumpyasnpimportcv2importtimeclassTRTPoseDetector:def__init__(self,engine_path,conf_thresh0.3):self.conf_threshconf_thresh loggertrt.Logger(trt.Logger.WARNING)runtimetrt.Runtime(logger)withopen(engine_path,rb)asf:self.engineruntime.deserialize_cuda_engine(f.read())self.contextself.engine.create_execution_context()self.inputs[]self.outputs[]self.bindings[]self.streamcuda.Stream()foriinrange(self.engine.num_io_tensors):nameself.engine.get_tensor_name(i)dtypetrt.nptype(self.engine.get_tensor_dtype(name))shapeself.engine.get_tensor_shape(name)shapetuple(max(1,s)ifs0else1forsinshape)sizetrt.volume(shape)host_memcuda.pagelocked_empty(size,dtype)device_memcuda.mem_alloc(host_mem.nbytes)self.bindings.append(int(device_mem))info{name:name,host:host_mem,device:device_mem,shape:shape}ifself.engine.get_tensor_mode(name)trt.TensorIOMode.INPUT:self.inputs.append(info)else:self.outputs.append(info)defpreprocess(self,image):h,wimage.shape[:2]scalemin(640/h,640/w)new_h,new_wint(h*scale),int(w*scale)resizedcv2.resize(image,(new_w,new_h))canvasnp.full((640,640,3),114,dtypenp.uint8)dy,dx(640-new_h)//2,(640-new_w)//2canvas[dy:dynew_h,dx:dxnew_w]resized blobcanvas[:,:,::-1].transpose(2,0,1).astype(np.float32)/255.0returnnp.expand_dims(blob,axis0),scale,(dy,dx)defdetect(self,image):blob,scale,padself.preprocess(image)np.copyto(self.inputs[0][host],blob.ravel())cuda.memcpy_htod_async(self.inputs[0][device],self.inputs[0][host],self.stream)self.context.set_input_shape(self.inputs[0][name],blob.shape)self.context.execute_async_v2(bindingsself.bindings,stream_handleself.stream.handle)cuda.memcpy_dtoh_async(self.outputs[0][host],self.outputs[0][device],self.stream)self.stream.synchronize()outputself.outputs[0][host].reshape(self.outputs[0][shape])returnself.postprocess(output,scale,pad,image.shape[:2])defpostprocess(self,output,scale,pad,orig_shape):predictionsoutput[0].T# [8400, 56]boxespredictions[:,:4]scorespredictions[:,4]kptspredictions[:,6:].reshape(-1,17,3)maskscoresself.conf_thresh boxesboxes[mask]scoresscores[mask]kptskpts[mask]# 坐标还原dy,dxpad boxes[:,[0,2]](boxes[:,[0,2]]-dx)/scale boxes[:,[1,3]](boxes[:,[1,3]]-dy)/scale kpts[:,:,0](kpts[:,:,0]-dx)/scale kpts[:,:,1](kpts[:,:,1]-dy)/scale results[]foriinrange(len(boxes)):results.append({bbox:boxes[i].tolist(),score:float(scores[i]),keypoints:kpts[i].tolist(),})returnresults# 骨架连接SKELETON[(0,1),(0,2),(1,3),(2,4),(5,6),(5,7),(6,8),(7,9),(8,10),(11,12),(11,13),(12,14),(13,15),(14,16),(5,11),(6,12),]defdraw_pose(image,detections):fordetindetections:kptsdet[keypoints]for(i,j)inSKELETON:ifkpts[i][2]0.3andkpts[j][2]0.3:pt1(int(kpts[i][0]),int(kpts[i][1]))pt2(int(kpts[j][0]),int(kpts[j][1]))cv2.line(image,pt1,pt2,(0,255,0),2)for(x,y,vis)inkpts:ifvis0.3:cv2.circle(image,(int(x),int(y)),3,(0,0,255),-1)returnimageif__name____main__:modelTRTPoseDetector(yolo26n-pose.engine)capcv2.VideoCapture(0)fps_count,fps_start0,time.time()whileTrue:ret,framecap.read()ifnotret:breakdetectionsmodel.detect(frame)framedraw_pose(frame,detections)fps_count1iftime.time()-fps_start1.0:fpsfps_count/(time.time()-fps_start)fps_count,fps_start0,time.time()cv2.putText(frame,fFPS:{fps:.0f},(10,30),cv2.FONT_HERSHEY_SIMPLEX,1,(0,255,0),2)cv2.imshow(Pose,frame)ifcv2.waitKey(1)0xFFord(q):breakcap.release()cv2.destroyAllWindows()3. 性能基准YOLO26N-Pose TensorRT 性能640x640 ┌──────────────────┬──────────┬──────────┐ │ 设备 │ FP16 │ INT8 │ ├──────────────────┼──────────┼──────────┤ │ Jetson Orin NX │ 5.2ms │ 3.8ms │ │ Jetson Orin Nano │ 9.5ms │ 6.5ms │ │ RTX 4090 │ 1.5ms │ 1.1ms │ │ RTX 3060 │ 4.2ms │ 3.0ms │ └──────────────────┴──────────┴──────────┘总结步骤工具输出导出 ONNXUltralytics.onnx转 TensorRTtrtexec / Ultralytics.engine推理TRTPoseDetector关键点可视化draw_pose骨架图