从零解析COCO数据集Python实战指南与高效标注处理技巧第一次打开COCO数据集的JSON文件时那种面对层层嵌套数据的茫然感我至今记忆犹新。作为一名计算机视觉开发者能够熟练处理COCO标注格式是必备技能。本文将带你彻底掌握instances标注文件的解析方法并提供可直接用于生产环境的Python代码示例。1. 理解COCO数据集标注结构COCO数据集是目前计算机视觉领域最常用的基准数据集之一其标注文件采用JSON格式存储结构清晰但初次接触可能会感到复杂。instances标注文件主要包含五个关键部分{ info: {}, # 数据集元信息 licenses: [], # 许可证信息 images: [], # 图像基本信息 annotations: [], # 目标检测标注 categories: [] # 类别定义 }每个字段都有其特定用途info包含数据集版本、创建日期等元数据licenses说明数据使用权限images记录每张图片的ID、尺寸和存储路径annotations保存所有检测目标的边界框和分割信息categories定义数据集中包含的物体类别提示在实际项目中我们最常使用的是images、annotations和categories这三个字段它们包含了训练模型所需的核心信息。2. 实战Python解析COCO标注文件让我们通过具体代码来学习如何提取和使用这些信息。首先准备环境pip install pycocotools numpy2.1 加载并解析JSON文件import json from pathlib import Path # 加载标注文件 def load_coco_annotations(json_path): with open(json_path, r) as f: data json.load(f) # 构建图像ID到文件名的映射 image_id_to_info { img[id]: { file_name: img[file_name], width: img[width], height: img[height] } for img in data[images] } # 构建类别ID到名称的映射 category_id_to_name { cat[id]: cat[name] for cat in data[categories] } return data[annotations], image_id_to_info, category_id_to_name # 使用示例 annotations, image_info, category_map load_coco_annotations(instances_val2017.json)2.2 提取边界框信息COCO格式的边界框表示为[x_min, y_min, width, height]我们常需要将其转换为其他格式def convert_bbox(bbox, img_width, img_height): 将COCO格式的bbox转换为YOLO格式(中心坐标宽高归一化) x_min, y_min, width, height bbox x_center (x_min width / 2) / img_width y_center (y_min height / 2) / img_height norm_width width / img_width norm_height height / img_height return [x_center, y_center, norm_width, norm_height] # 示例处理所有标注 yolo_annotations [] for ann in annotations: img_id ann[image_id] img_info image_info[img_id] yolo_bbox convert_bbox( ann[bbox], img_info[width], img_info[height] ) yolo_annotations.append({ image_id: img_id, category_id: ann[category_id], category_name: category_map[ann[category_id]], yolo_bbox: yolo_bbox })3. 处理分割标注与高级技巧COCO数据集不仅提供边界框还包含精确的多边形分割标注这对语义分割任务尤为重要。3.1 解析分割标注def process_segmentation(segmentation, img_width, img_height): 处理分割标注返回归一化的多边形坐标 # COCO分割标注可能是RLE或多边形格式 if isinstance(segmentation, dict): # 处理RLE格式(这里简化处理) raise NotImplementedError(RLE解码需要pycocotools) else: # 多边形格式 normalized_polygons [] for polygon in segmentation: # 将坐标点配对为(x,y)元组 points list(zip(polygon[::2], polygon[1::2])) # 归一化 normalized [ (x/img_width, y/img_height) for x, y in points ] normalized_polygons.append(normalized) return normalized_polygons # 示例使用 for ann in annotations[:5]: # 只处理前5个标注示例 if segmentation in ann and ann[segmentation]: seg process_segmentation( ann[segmentation], image_info[ann[image_id]][width], image_info[ann[image_id]][height] ) print(fImage {ann[image_id]} has {len(seg)} segmentation polygons)3.2 标注可视化技巧理解数据结构后我们可以将标注可视化以验证解析是否正确import cv2 import numpy as np from matplotlib import pyplot as plt def visualize_annotation(image_path, annotations): 可视化图片及其标注 img cv2.imread(image_path) img cv2.cvtColor(img, cv2.COLOR_BGR2RGB) plt.figure(figsize(12, 8)) plt.imshow(img) for ann in annotations: bbox ann[bbox] # 绘制边界框 rect plt.Rectangle( (bbox[0], bbox[1]), bbox[2], bbox[3], fillFalse, edgecolorred, linewidth2 ) plt.gca().add_patch(rect) # 绘制类别标签 plt.text( bbox[0], bbox[1] - 5, category_map[ann[category_id]], colorwhite, backgroundcolorred ) plt.axis(off) plt.show() # 示例可视化第一张图片的标注 sample_img_id annotations[0][image_id] sample_anns [a for a in annotations if a[image_id] sample_img_id] visualize_annotation( fval2017/{image_info[sample_img_id][file_name]}, sample_anns )4. 性能优化与批量处理处理大规模数据集时效率至关重要。以下是几个优化技巧4.1 使用多进程处理from multiprocessing import Pool import os def process_single_annotation(args): 处理单个标注的worker函数 ann, image_info, category_map args # 这里添加实际处理逻辑 return processed_result def batch_process_annotations(annotations, image_info, category_map, workers4): 批量处理标注 with Pool(workers) as p: args [(a, image_info, category_map) for a in annotations] results p.map(process_single_annotation, args) return results4.2 内存高效处理对于特别大的标注文件可以逐块处理import ijson def stream_process_large_json(json_path): 流式处理大型JSON文件 with open(json_path, rb) as f: # 使用ijson逐项解析 annotations ijson.items(f, annotations.item) for ann in annotations: # 处理每个标注项 process_annotation(ann)4.3 常用工具函数以下是一些你会反复用到的实用函数def get_annotations_for_image(image_id, all_annotations): 获取指定图片的所有标注 return [a for a in all_annotations if a[image_id] image_id] def get_image_ids_with_category(category_id, all_annotations): 获取包含特定类别的所有图片ID return list({ a[image_id] for a in all_annotations if a[category_id] category_id }) def calculate_class_distribution(annotations): 计算类别分布统计 from collections import defaultdict dist defaultdict(int) for a in annotations: dist[a[category_id]] 1 return dist5. 实际应用转换为其他标注格式不同框架需要不同的标注格式以下是常见转换示例5.1 转换为YOLO格式def convert_to_yolo(annotations, image_info, category_map, output_dir): 将COCO标注转换为YOLO格式 os.makedirs(output_dir, exist_okTrue) # 首先创建类别映射文件 with open(f{output_dir}/classes.txt, w) as f: for cat_id in sorted(category_map.keys()): f.write(f{category_map[cat_id]}\n) # 为每张图片创建标注文件 for img_id, img_data in image_info.items(): img_anns get_annotations_for_image(img_id, annotations) lines [] for ann in img_anns: yolo_bbox convert_bbox( ann[bbox], img_data[width], img_data[height] ) # YOLO格式: class_id center_x center_y width height line f{ann[category_id]} { .join(map(str, yolo_bbox))}\n lines.append(line) # 写入文件 output_path f{output_dir}/{img_data[file_name].replace(.jpg, .txt)} with open(output_path, w) as f: f.writelines(lines) # 使用示例 convert_to_yolo(annotations, image_info, category_map, yolo_labels)5.2 转换为Pascal VOC格式import xml.etree.ElementTree as ET def create_voc_annotation(image_info, annotations, category_map): 创建Pascal VOC格式的XML标注 root ET.Element(annotation) # 添加图片信息 ET.SubElement(root, filename).text image_info[file_name] size ET.SubElement(root, size) ET.SubElement(size, width).text str(image_info[width]) ET.SubElement(size, height).text str(image_info[height]) ET.SubElement(size, depth).text 3 # 假设是RGB图像 # 添加每个对象 for ann in annotations: obj ET.SubElement(root, object) ET.SubElement(obj, name).text category_map[ann[category_id]] ET.SubElement(obj, difficult).text 0 # 可根据实际情况调整 bbox ET.SubElement(obj, bndbox) x_min, y_min, width, height ann[bbox] x_max x_min width y_max y_min height ET.SubElement(bbox, xmin).text str(int(x_min)) ET.SubElement(bbox, ymin).text str(int(y_min)) ET.SubElement(bbox, xmax).text str(int(x_max)) ET.SubElement(bbox, ymax).text str(int(y_max)) return ET.tostring(root, encodingunicode) # 示例为第一张图片创建VOC标注 sample_img_id annotations[0][image_id] sample_anns get_annotations_for_image(sample_img_id, annotations) voc_xml create_voc_annotation(image_info[sample_img_id], sample_anns, category_map) print(voc_xml)6. 常见问题与解决方案在实际使用COCO数据集时你可能会遇到以下问题6.1 处理iscrowd标注COCO标注中的iscrowd字段标识目标是否被遮挡或成群出现# 分离crowd和非crowd标注 normal_annotations [a for a in annotations if a[iscrowd] 0] crowd_annotations [a for a in annotations if a[iscrowd] 1] print(f正常标注: {len(normal_annotations)}) print(fCrowd标注: {len(crowd_annotations)})注意对于目标检测任务通常建议先忽略crowd标注等模型基本工作后再考虑如何处理它们。6.2 处理类别不平衡COCO数据集中的类别分布极不均衡# 计算类别分布 from collections import Counter category_counts Counter(a[category_id] for a in annotations) # 打印最少和最多出现的类别 min_cat min(category_counts.items(), keylambda x: x[1]) max_cat max(category_counts.items(), keylambda x: x[1]) print(f最少出现的类别: {category_map[min_cat[0]]} ({min_cat[1]}次)) print(f最多出现的类别: {category_map[max_cat[0]]} ({max_cat[1]}次))解决方案包括对稀少类别进行过采样在损失函数中使用类别权重数据增强时针对稀少类别增加样本6.3 处理分割标注的复杂情况COCO的分割标注可能有多种形式def handle_complex_segmentation(segmentation): if isinstance(segmentation, list): if len(segmentation) 1: # 简单多边形 return segmentation[0] else: # 多个多边形(可能是物体有洞) return merge_polygons(segmentation) elif isinstance(segmentation, dict): # RLE格式 return decode_rle(segmentation) else: raise ValueError(未知的分割标注格式) # 实际项目中你可能需要实现merge_polygons和decode_rle函数7. 高级应用构建自定义数据集掌握了COCO标注解析后你可以轻松创建自己的标注工具或转换其他数据集7.1 创建COCO格式的标注def create_coco_annotation_template(): 创建COCO格式的标注模板 template { info: { description: My Custom Dataset, url: , version: 1.0, year: 2023, contributor: Your Name, date_created: 2023-01-01 }, licenses: [{ url: , id: 1, name: Custom License }], images: [], annotations: [], categories: [] } return template # 添加类别示例 def add_category(coco_data, category_id, name, supercategory): coco_data[categories].append({ id: category_id, name: name, supercategory: supercategory }) # 添加图片示例 def add_image(coco_data, image_id, file_name, width, height): coco_data[images].append({ id: image_id, file_name: file_name, width: width, height: height, license: 1, date_captured: 2023-01-01 }) # 添加标注示例 def add_annotation(coco_data, annotation_id, image_id, category_id, bbox, segmentation): x, y, w, h bbox coco_data[annotations].append({ id: annotation_id, image_id: image_id, category_id: category_id, segmentation: segmentation, area: w * h, bbox: [x, y, w, h], iscrowd: 0 })7.2 与其他工具集成你可以将COCO解析代码集成到训练流程中class CocoDataset(torch.utils.data.Dataset): PyTorch数据集类用于加载COCO格式数据 def __init__(self, image_dir, json_path, transformNone): self.image_dir image_dir self.transform transform # 加载并解析标注 with open(json_path, r) as f: data json.load(f) self.image_info {img[id]: img for img in data[images]} self.annotations defaultdict(list) for ann in data[annotations]: self.annotations[ann[image_id]].append(ann) self.classes {cat[id]: cat[name] for cat in data[categories]} self.class_ids sorted(self.classes.keys()) def __len__(self): return len(self.image_info) def __getitem__(self, idx): img_id list(self.image_info.keys())[idx] img_info self.image_info[img_id] # 加载图像 img_path os.path.join(self.image_dir, img_info[file_name]) image Image.open(img_path).convert(RGB) # 获取标注 anns self.annotations.get(img_id, []) boxes [] labels [] for ann in anns: boxes.append(ann[bbox]) # [x,y,w,h] labels.append(ann[category_id]) # 转换为张量 boxes torch.as_tensor(boxes, dtypetorch.float32) labels torch.as_tensor(labels, dtypetorch.int64) target { boxes: boxes, labels: labels, image_id: torch.tensor([img_id]) } if self.transform: image self.transform(image) return image, target
别再对着COCO数据集JSON文件发愁了!手把手教你用Python解析instances标注(附完整代码)
发布时间:2026/5/22 2:32:45
从零解析COCO数据集Python实战指南与高效标注处理技巧第一次打开COCO数据集的JSON文件时那种面对层层嵌套数据的茫然感我至今记忆犹新。作为一名计算机视觉开发者能够熟练处理COCO标注格式是必备技能。本文将带你彻底掌握instances标注文件的解析方法并提供可直接用于生产环境的Python代码示例。1. 理解COCO数据集标注结构COCO数据集是目前计算机视觉领域最常用的基准数据集之一其标注文件采用JSON格式存储结构清晰但初次接触可能会感到复杂。instances标注文件主要包含五个关键部分{ info: {}, # 数据集元信息 licenses: [], # 许可证信息 images: [], # 图像基本信息 annotations: [], # 目标检测标注 categories: [] # 类别定义 }每个字段都有其特定用途info包含数据集版本、创建日期等元数据licenses说明数据使用权限images记录每张图片的ID、尺寸和存储路径annotations保存所有检测目标的边界框和分割信息categories定义数据集中包含的物体类别提示在实际项目中我们最常使用的是images、annotations和categories这三个字段它们包含了训练模型所需的核心信息。2. 实战Python解析COCO标注文件让我们通过具体代码来学习如何提取和使用这些信息。首先准备环境pip install pycocotools numpy2.1 加载并解析JSON文件import json from pathlib import Path # 加载标注文件 def load_coco_annotations(json_path): with open(json_path, r) as f: data json.load(f) # 构建图像ID到文件名的映射 image_id_to_info { img[id]: { file_name: img[file_name], width: img[width], height: img[height] } for img in data[images] } # 构建类别ID到名称的映射 category_id_to_name { cat[id]: cat[name] for cat in data[categories] } return data[annotations], image_id_to_info, category_id_to_name # 使用示例 annotations, image_info, category_map load_coco_annotations(instances_val2017.json)2.2 提取边界框信息COCO格式的边界框表示为[x_min, y_min, width, height]我们常需要将其转换为其他格式def convert_bbox(bbox, img_width, img_height): 将COCO格式的bbox转换为YOLO格式(中心坐标宽高归一化) x_min, y_min, width, height bbox x_center (x_min width / 2) / img_width y_center (y_min height / 2) / img_height norm_width width / img_width norm_height height / img_height return [x_center, y_center, norm_width, norm_height] # 示例处理所有标注 yolo_annotations [] for ann in annotations: img_id ann[image_id] img_info image_info[img_id] yolo_bbox convert_bbox( ann[bbox], img_info[width], img_info[height] ) yolo_annotations.append({ image_id: img_id, category_id: ann[category_id], category_name: category_map[ann[category_id]], yolo_bbox: yolo_bbox })3. 处理分割标注与高级技巧COCO数据集不仅提供边界框还包含精确的多边形分割标注这对语义分割任务尤为重要。3.1 解析分割标注def process_segmentation(segmentation, img_width, img_height): 处理分割标注返回归一化的多边形坐标 # COCO分割标注可能是RLE或多边形格式 if isinstance(segmentation, dict): # 处理RLE格式(这里简化处理) raise NotImplementedError(RLE解码需要pycocotools) else: # 多边形格式 normalized_polygons [] for polygon in segmentation: # 将坐标点配对为(x,y)元组 points list(zip(polygon[::2], polygon[1::2])) # 归一化 normalized [ (x/img_width, y/img_height) for x, y in points ] normalized_polygons.append(normalized) return normalized_polygons # 示例使用 for ann in annotations[:5]: # 只处理前5个标注示例 if segmentation in ann and ann[segmentation]: seg process_segmentation( ann[segmentation], image_info[ann[image_id]][width], image_info[ann[image_id]][height] ) print(fImage {ann[image_id]} has {len(seg)} segmentation polygons)3.2 标注可视化技巧理解数据结构后我们可以将标注可视化以验证解析是否正确import cv2 import numpy as np from matplotlib import pyplot as plt def visualize_annotation(image_path, annotations): 可视化图片及其标注 img cv2.imread(image_path) img cv2.cvtColor(img, cv2.COLOR_BGR2RGB) plt.figure(figsize(12, 8)) plt.imshow(img) for ann in annotations: bbox ann[bbox] # 绘制边界框 rect plt.Rectangle( (bbox[0], bbox[1]), bbox[2], bbox[3], fillFalse, edgecolorred, linewidth2 ) plt.gca().add_patch(rect) # 绘制类别标签 plt.text( bbox[0], bbox[1] - 5, category_map[ann[category_id]], colorwhite, backgroundcolorred ) plt.axis(off) plt.show() # 示例可视化第一张图片的标注 sample_img_id annotations[0][image_id] sample_anns [a for a in annotations if a[image_id] sample_img_id] visualize_annotation( fval2017/{image_info[sample_img_id][file_name]}, sample_anns )4. 性能优化与批量处理处理大规模数据集时效率至关重要。以下是几个优化技巧4.1 使用多进程处理from multiprocessing import Pool import os def process_single_annotation(args): 处理单个标注的worker函数 ann, image_info, category_map args # 这里添加实际处理逻辑 return processed_result def batch_process_annotations(annotations, image_info, category_map, workers4): 批量处理标注 with Pool(workers) as p: args [(a, image_info, category_map) for a in annotations] results p.map(process_single_annotation, args) return results4.2 内存高效处理对于特别大的标注文件可以逐块处理import ijson def stream_process_large_json(json_path): 流式处理大型JSON文件 with open(json_path, rb) as f: # 使用ijson逐项解析 annotations ijson.items(f, annotations.item) for ann in annotations: # 处理每个标注项 process_annotation(ann)4.3 常用工具函数以下是一些你会反复用到的实用函数def get_annotations_for_image(image_id, all_annotations): 获取指定图片的所有标注 return [a for a in all_annotations if a[image_id] image_id] def get_image_ids_with_category(category_id, all_annotations): 获取包含特定类别的所有图片ID return list({ a[image_id] for a in all_annotations if a[category_id] category_id }) def calculate_class_distribution(annotations): 计算类别分布统计 from collections import defaultdict dist defaultdict(int) for a in annotations: dist[a[category_id]] 1 return dist5. 实际应用转换为其他标注格式不同框架需要不同的标注格式以下是常见转换示例5.1 转换为YOLO格式def convert_to_yolo(annotations, image_info, category_map, output_dir): 将COCO标注转换为YOLO格式 os.makedirs(output_dir, exist_okTrue) # 首先创建类别映射文件 with open(f{output_dir}/classes.txt, w) as f: for cat_id in sorted(category_map.keys()): f.write(f{category_map[cat_id]}\n) # 为每张图片创建标注文件 for img_id, img_data in image_info.items(): img_anns get_annotations_for_image(img_id, annotations) lines [] for ann in img_anns: yolo_bbox convert_bbox( ann[bbox], img_data[width], img_data[height] ) # YOLO格式: class_id center_x center_y width height line f{ann[category_id]} { .join(map(str, yolo_bbox))}\n lines.append(line) # 写入文件 output_path f{output_dir}/{img_data[file_name].replace(.jpg, .txt)} with open(output_path, w) as f: f.writelines(lines) # 使用示例 convert_to_yolo(annotations, image_info, category_map, yolo_labels)5.2 转换为Pascal VOC格式import xml.etree.ElementTree as ET def create_voc_annotation(image_info, annotations, category_map): 创建Pascal VOC格式的XML标注 root ET.Element(annotation) # 添加图片信息 ET.SubElement(root, filename).text image_info[file_name] size ET.SubElement(root, size) ET.SubElement(size, width).text str(image_info[width]) ET.SubElement(size, height).text str(image_info[height]) ET.SubElement(size, depth).text 3 # 假设是RGB图像 # 添加每个对象 for ann in annotations: obj ET.SubElement(root, object) ET.SubElement(obj, name).text category_map[ann[category_id]] ET.SubElement(obj, difficult).text 0 # 可根据实际情况调整 bbox ET.SubElement(obj, bndbox) x_min, y_min, width, height ann[bbox] x_max x_min width y_max y_min height ET.SubElement(bbox, xmin).text str(int(x_min)) ET.SubElement(bbox, ymin).text str(int(y_min)) ET.SubElement(bbox, xmax).text str(int(x_max)) ET.SubElement(bbox, ymax).text str(int(y_max)) return ET.tostring(root, encodingunicode) # 示例为第一张图片创建VOC标注 sample_img_id annotations[0][image_id] sample_anns get_annotations_for_image(sample_img_id, annotations) voc_xml create_voc_annotation(image_info[sample_img_id], sample_anns, category_map) print(voc_xml)6. 常见问题与解决方案在实际使用COCO数据集时你可能会遇到以下问题6.1 处理iscrowd标注COCO标注中的iscrowd字段标识目标是否被遮挡或成群出现# 分离crowd和非crowd标注 normal_annotations [a for a in annotations if a[iscrowd] 0] crowd_annotations [a for a in annotations if a[iscrowd] 1] print(f正常标注: {len(normal_annotations)}) print(fCrowd标注: {len(crowd_annotations)})注意对于目标检测任务通常建议先忽略crowd标注等模型基本工作后再考虑如何处理它们。6.2 处理类别不平衡COCO数据集中的类别分布极不均衡# 计算类别分布 from collections import Counter category_counts Counter(a[category_id] for a in annotations) # 打印最少和最多出现的类别 min_cat min(category_counts.items(), keylambda x: x[1]) max_cat max(category_counts.items(), keylambda x: x[1]) print(f最少出现的类别: {category_map[min_cat[0]]} ({min_cat[1]}次)) print(f最多出现的类别: {category_map[max_cat[0]]} ({max_cat[1]}次))解决方案包括对稀少类别进行过采样在损失函数中使用类别权重数据增强时针对稀少类别增加样本6.3 处理分割标注的复杂情况COCO的分割标注可能有多种形式def handle_complex_segmentation(segmentation): if isinstance(segmentation, list): if len(segmentation) 1: # 简单多边形 return segmentation[0] else: # 多个多边形(可能是物体有洞) return merge_polygons(segmentation) elif isinstance(segmentation, dict): # RLE格式 return decode_rle(segmentation) else: raise ValueError(未知的分割标注格式) # 实际项目中你可能需要实现merge_polygons和decode_rle函数7. 高级应用构建自定义数据集掌握了COCO标注解析后你可以轻松创建自己的标注工具或转换其他数据集7.1 创建COCO格式的标注def create_coco_annotation_template(): 创建COCO格式的标注模板 template { info: { description: My Custom Dataset, url: , version: 1.0, year: 2023, contributor: Your Name, date_created: 2023-01-01 }, licenses: [{ url: , id: 1, name: Custom License }], images: [], annotations: [], categories: [] } return template # 添加类别示例 def add_category(coco_data, category_id, name, supercategory): coco_data[categories].append({ id: category_id, name: name, supercategory: supercategory }) # 添加图片示例 def add_image(coco_data, image_id, file_name, width, height): coco_data[images].append({ id: image_id, file_name: file_name, width: width, height: height, license: 1, date_captured: 2023-01-01 }) # 添加标注示例 def add_annotation(coco_data, annotation_id, image_id, category_id, bbox, segmentation): x, y, w, h bbox coco_data[annotations].append({ id: annotation_id, image_id: image_id, category_id: category_id, segmentation: segmentation, area: w * h, bbox: [x, y, w, h], iscrowd: 0 })7.2 与其他工具集成你可以将COCO解析代码集成到训练流程中class CocoDataset(torch.utils.data.Dataset): PyTorch数据集类用于加载COCO格式数据 def __init__(self, image_dir, json_path, transformNone): self.image_dir image_dir self.transform transform # 加载并解析标注 with open(json_path, r) as f: data json.load(f) self.image_info {img[id]: img for img in data[images]} self.annotations defaultdict(list) for ann in data[annotations]: self.annotations[ann[image_id]].append(ann) self.classes {cat[id]: cat[name] for cat in data[categories]} self.class_ids sorted(self.classes.keys()) def __len__(self): return len(self.image_info) def __getitem__(self, idx): img_id list(self.image_info.keys())[idx] img_info self.image_info[img_id] # 加载图像 img_path os.path.join(self.image_dir, img_info[file_name]) image Image.open(img_path).convert(RGB) # 获取标注 anns self.annotations.get(img_id, []) boxes [] labels [] for ann in anns: boxes.append(ann[bbox]) # [x,y,w,h] labels.append(ann[category_id]) # 转换为张量 boxes torch.as_tensor(boxes, dtypetorch.float32) labels torch.as_tensor(labels, dtypetorch.int64) target { boxes: boxes, labels: labels, image_id: torch.tensor([img_id]) } if self.transform: image self.transform(image) return image, target