what.models.detection.datasets.voc
1import os 2import pathlib 3import numpy as np 4import xml.etree.ElementTree as ET 5import cv2 6 7VOC_CLASS_NAMES = ["BACKGROUND", 8 "aeroplane", 9 "bicycle", 10 "bird", 11 "boat", 12 "bottle", 13 "bus", 14 "car", 15 "cat", 16 "chair", 17 "cow", 18 "diningtable", 19 "dog", 20 "horse", 21 "motorbike", 22 "person", 23 "pottedplant", 24 "sheep", 25 "sofa", 26 "train", 27 "tvmonitor"] 28 29class VOCDataset: 30 31 def __init__(self, root, transform=None, target_transform=None, is_test=False, keep_difficult=False, label_file=None): 32 """Dataset for VOC data. 33 Args: 34 root: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories: 35 Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject. 36 """ 37 self.root = pathlib.Path(root) 38 self.transform = transform 39 self.target_transform = target_transform 40 if is_test: 41 image_sets_file = self.root / "ImageSets/Main/test.txt" 42 else: 43 image_sets_file = self.root / "ImageSets/Main/trainval.txt" 44 self.ids = VOCDataset._read_image_ids(image_sets_file) 45 self.keep_difficult = keep_difficult 46 47 # if the labels file exists, read in the class names 48 label_file_name = self.root / "labels.txt" 49 50 if os.path.isfile(label_file_name): 51 class_string = "" 52 with open(label_file_name, 'r') as infile: 53 for line in infile: 54 class_string += line.rstrip() 55 56 # classes should be a comma separated list 57 58 classes = class_string.split(',') 59 # prepend BACKGROUND as first class 60 classes.insert(0, 'BACKGROUND') 61 classes = [ elem.replace(" ", "") for elem in classes] 62 self.class_names = tuple(classes) 63 64 else: 65 self.class_names = ('BACKGROUND', 66 'aeroplane', 'bicycle', 'bird', 'boat', 67 'bottle', 'bus', 'car', 'cat', 'chair', 68 'cow', 'diningtable', 'dog', 'horse', 69 'motorbike', 'person', 'pottedplant', 70 'sheep', 'sofa', 'train', 'tvmonitor') 71 72 73 self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)} 74 75 def __len__(self): 76 return len(self.ids) 77 78 def __getitem__(self, index): 79 image_id = self.ids[index] 80 boxes, labels, is_difficult = self._get_annotation(image_id) 81 if not self.keep_difficult: 82 boxes = boxes[is_difficult == 0] 83 labels = labels[is_difficult == 0] 84 image = self._read_image(image_id) 85 if self.transform: 86 image, boxes, labels = self.transform(image, boxes, labels) 87 if self.target_transform: 88 boxes, labels = self.target_transform(boxes, labels) 89 return image, boxes, labels 90 91 def get_image(self, index): 92 image_id = self.ids[index] 93 image = self._read_image(image_id) 94 if self.transform: 95 image, _ = self.transform(image) 96 return image 97 98 def get_annotation(self, index): 99 image_id = self.ids[index] 100 return image_id, self._get_annotation(image_id) 101 102 @staticmethod 103 def _read_image_ids(image_sets_file): 104 ids = [] 105 with open(image_sets_file) as f: 106 for line in f: 107 ids.append(line.rstrip()) 108 return ids 109 110 def _get_annotation(self, image_id): 111 annotation_file = self.root / f"Annotations/{image_id}.xml" 112 objects = ET.parse(annotation_file).findall("object") 113 boxes = [] 114 labels = [] 115 is_difficult = [] 116 for object in objects: 117 class_name = object.find('name').text.lower().strip() 118 # we're only concerned with clases in our list 119 if class_name in self.class_dict: 120 bbox = object.find('bndbox') 121 122 # VOC dataset format follows Matlab, in which indexes start from 0 123 x1 = float(bbox.find('xmin').text) - 1 124 y1 = float(bbox.find('ymin').text) - 1 125 x2 = float(bbox.find('xmax').text) - 1 126 y2 = float(bbox.find('ymax').text) - 1 127 boxes.append([x1, y1, x2, y2]) 128 129 labels.append(self.class_dict[class_name]) 130 is_difficult_str = object.find('difficult').text 131 is_difficult.append(int(is_difficult_str) if is_difficult_str else 0) 132 133 return (np.array(boxes, dtype=np.float32), 134 np.array(labels, dtype=np.int64), 135 np.array(is_difficult, dtype=np.uint8)) 136 137 def _read_image(self, image_id): 138 image_file = self.root / f"JPEGImages/{image_id}.jpg" 139 image = cv2.imread(str(image_file)) 140 image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 141 return image
class
VOCDataset:
30class VOCDataset: 31 32 def __init__(self, root, transform=None, target_transform=None, is_test=False, keep_difficult=False, label_file=None): 33 """Dataset for VOC data. 34 Args: 35 root: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories: 36 Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject. 37 """ 38 self.root = pathlib.Path(root) 39 self.transform = transform 40 self.target_transform = target_transform 41 if is_test: 42 image_sets_file = self.root / "ImageSets/Main/test.txt" 43 else: 44 image_sets_file = self.root / "ImageSets/Main/trainval.txt" 45 self.ids = VOCDataset._read_image_ids(image_sets_file) 46 self.keep_difficult = keep_difficult 47 48 # if the labels file exists, read in the class names 49 label_file_name = self.root / "labels.txt" 50 51 if os.path.isfile(label_file_name): 52 class_string = "" 53 with open(label_file_name, 'r') as infile: 54 for line in infile: 55 class_string += line.rstrip() 56 57 # classes should be a comma separated list 58 59 classes = class_string.split(',') 60 # prepend BACKGROUND as first class 61 classes.insert(0, 'BACKGROUND') 62 classes = [ elem.replace(" ", "") for elem in classes] 63 self.class_names = tuple(classes) 64 65 else: 66 self.class_names = ('BACKGROUND', 67 'aeroplane', 'bicycle', 'bird', 'boat', 68 'bottle', 'bus', 'car', 'cat', 'chair', 69 'cow', 'diningtable', 'dog', 'horse', 70 'motorbike', 'person', 'pottedplant', 71 'sheep', 'sofa', 'train', 'tvmonitor') 72 73 74 self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)} 75 76 def __len__(self): 77 return len(self.ids) 78 79 def __getitem__(self, index): 80 image_id = self.ids[index] 81 boxes, labels, is_difficult = self._get_annotation(image_id) 82 if not self.keep_difficult: 83 boxes = boxes[is_difficult == 0] 84 labels = labels[is_difficult == 0] 85 image = self._read_image(image_id) 86 if self.transform: 87 image, boxes, labels = self.transform(image, boxes, labels) 88 if self.target_transform: 89 boxes, labels = self.target_transform(boxes, labels) 90 return image, boxes, labels 91 92 def get_image(self, index): 93 image_id = self.ids[index] 94 image = self._read_image(image_id) 95 if self.transform: 96 image, _ = self.transform(image) 97 return image 98 99 def get_annotation(self, index): 100 image_id = self.ids[index] 101 return image_id, self._get_annotation(image_id) 102 103 @staticmethod 104 def _read_image_ids(image_sets_file): 105 ids = [] 106 with open(image_sets_file) as f: 107 for line in f: 108 ids.append(line.rstrip()) 109 return ids 110 111 def _get_annotation(self, image_id): 112 annotation_file = self.root / f"Annotations/{image_id}.xml" 113 objects = ET.parse(annotation_file).findall("object") 114 boxes = [] 115 labels = [] 116 is_difficult = [] 117 for object in objects: 118 class_name = object.find('name').text.lower().strip() 119 # we're only concerned with clases in our list 120 if class_name in self.class_dict: 121 bbox = object.find('bndbox') 122 123 # VOC dataset format follows Matlab, in which indexes start from 0 124 x1 = float(bbox.find('xmin').text) - 1 125 y1 = float(bbox.find('ymin').text) - 1 126 x2 = float(bbox.find('xmax').text) - 1 127 y2 = float(bbox.find('ymax').text) - 1 128 boxes.append([x1, y1, x2, y2]) 129 130 labels.append(self.class_dict[class_name]) 131 is_difficult_str = object.find('difficult').text 132 is_difficult.append(int(is_difficult_str) if is_difficult_str else 0) 133 134 return (np.array(boxes, dtype=np.float32), 135 np.array(labels, dtype=np.int64), 136 np.array(is_difficult, dtype=np.uint8)) 137 138 def _read_image(self, image_id): 139 image_file = self.root / f"JPEGImages/{image_id}.jpg" 140 image = cv2.imread(str(image_file)) 141 image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 142 return image
VOCDataset( root, transform=None, target_transform=None, is_test=False, keep_difficult=False, label_file=None)
32 def __init__(self, root, transform=None, target_transform=None, is_test=False, keep_difficult=False, label_file=None): 33 """Dataset for VOC data. 34 Args: 35 root: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories: 36 Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject. 37 """ 38 self.root = pathlib.Path(root) 39 self.transform = transform 40 self.target_transform = target_transform 41 if is_test: 42 image_sets_file = self.root / "ImageSets/Main/test.txt" 43 else: 44 image_sets_file = self.root / "ImageSets/Main/trainval.txt" 45 self.ids = VOCDataset._read_image_ids(image_sets_file) 46 self.keep_difficult = keep_difficult 47 48 # if the labels file exists, read in the class names 49 label_file_name = self.root / "labels.txt" 50 51 if os.path.isfile(label_file_name): 52 class_string = "" 53 with open(label_file_name, 'r') as infile: 54 for line in infile: 55 class_string += line.rstrip() 56 57 # classes should be a comma separated list 58 59 classes = class_string.split(',') 60 # prepend BACKGROUND as first class 61 classes.insert(0, 'BACKGROUND') 62 classes = [ elem.replace(" ", "") for elem in classes] 63 self.class_names = tuple(classes) 64 65 else: 66 self.class_names = ('BACKGROUND', 67 'aeroplane', 'bicycle', 'bird', 'boat', 68 'bottle', 'bus', 'car', 'cat', 'chair', 69 'cow', 'diningtable', 'dog', 'horse', 70 'motorbike', 'person', 'pottedplant', 71 'sheep', 'sofa', 'train', 'tvmonitor') 72 73 74 self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)}
Dataset for VOC data. Args: root: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories: Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject.