what.models.detection.datasets.voc

  1import os
  2import pathlib
  3import numpy as np
  4import xml.etree.ElementTree as ET
  5import cv2
  6
  7VOC_CLASS_NAMES =  ["BACKGROUND",
  8                    "aeroplane",
  9                    "bicycle",
 10                    "bird",
 11                    "boat",
 12                    "bottle",
 13                    "bus",
 14                    "car",
 15                    "cat",
 16                    "chair",
 17                    "cow",
 18                    "diningtable",
 19                    "dog",
 20                    "horse",
 21                    "motorbike",
 22                    "person",
 23                    "pottedplant",
 24                    "sheep",
 25                    "sofa",
 26                    "train",
 27                    "tvmonitor"]
 28
 29class VOCDataset:
 30
 31    def __init__(self, root, transform=None, target_transform=None, is_test=False, keep_difficult=False, label_file=None):
 32        """Dataset for VOC data.
 33        Args:
 34            root: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories:
 35                Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject.
 36        """
 37        self.root = pathlib.Path(root)
 38        self.transform = transform
 39        self.target_transform = target_transform
 40        if is_test:
 41            image_sets_file = self.root / "ImageSets/Main/test.txt"
 42        else:
 43            image_sets_file = self.root / "ImageSets/Main/trainval.txt"
 44        self.ids = VOCDataset._read_image_ids(image_sets_file)
 45        self.keep_difficult = keep_difficult
 46
 47        # if the labels file exists, read in the class names
 48        label_file_name = self.root / "labels.txt"
 49
 50        if os.path.isfile(label_file_name):
 51            class_string = ""
 52            with open(label_file_name, 'r') as infile:
 53                for line in infile:
 54                    class_string += line.rstrip()
 55
 56            # classes should be a comma separated list
 57            
 58            classes = class_string.split(',')
 59            # prepend BACKGROUND as first class
 60            classes.insert(0, 'BACKGROUND')
 61            classes  = [ elem.replace(" ", "") for elem in classes]
 62            self.class_names = tuple(classes)
 63
 64        else:
 65            self.class_names = ('BACKGROUND',
 66            'aeroplane', 'bicycle', 'bird', 'boat',
 67            'bottle', 'bus', 'car', 'cat', 'chair',
 68            'cow', 'diningtable', 'dog', 'horse',
 69            'motorbike', 'person', 'pottedplant',
 70            'sheep', 'sofa', 'train', 'tvmonitor')
 71
 72
 73        self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)}
 74
 75    def __len__(self):
 76        return len(self.ids)
 77
 78    def __getitem__(self, index):
 79        image_id = self.ids[index]
 80        boxes, labels, is_difficult = self._get_annotation(image_id)
 81        if not self.keep_difficult:
 82            boxes = boxes[is_difficult == 0]
 83            labels = labels[is_difficult == 0]
 84        image = self._read_image(image_id)
 85        if self.transform:
 86            image, boxes, labels = self.transform(image, boxes, labels)
 87        if self.target_transform:
 88            boxes, labels = self.target_transform(boxes, labels)
 89        return image, boxes, labels
 90
 91    def get_image(self, index):
 92        image_id = self.ids[index]
 93        image = self._read_image(image_id)
 94        if self.transform:
 95            image, _ = self.transform(image)
 96        return image
 97
 98    def get_annotation(self, index):
 99        image_id = self.ids[index]
100        return image_id, self._get_annotation(image_id)
101
102    @staticmethod
103    def _read_image_ids(image_sets_file):
104        ids = []
105        with open(image_sets_file) as f:
106            for line in f:
107                ids.append(line.rstrip())
108        return ids
109
110    def _get_annotation(self, image_id):
111        annotation_file = self.root / f"Annotations/{image_id}.xml"
112        objects = ET.parse(annotation_file).findall("object")
113        boxes = []
114        labels = []
115        is_difficult = []
116        for object in objects:
117            class_name = object.find('name').text.lower().strip()
118            # we're only concerned with clases in our list
119            if class_name in self.class_dict:
120                bbox = object.find('bndbox')
121
122                # VOC dataset format follows Matlab, in which indexes start from 0
123                x1 = float(bbox.find('xmin').text) - 1
124                y1 = float(bbox.find('ymin').text) - 1
125                x2 = float(bbox.find('xmax').text) - 1
126                y2 = float(bbox.find('ymax').text) - 1
127                boxes.append([x1, y1, x2, y2])
128
129                labels.append(self.class_dict[class_name])
130                is_difficult_str = object.find('difficult').text
131                is_difficult.append(int(is_difficult_str) if is_difficult_str else 0)
132
133        return (np.array(boxes, dtype=np.float32),
134                np.array(labels, dtype=np.int64),
135                np.array(is_difficult, dtype=np.uint8))
136
137    def _read_image(self, image_id):
138        image_file = self.root / f"JPEGImages/{image_id}.jpg"
139        image = cv2.imread(str(image_file))
140        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
141        return image
class VOCDataset:
 30class VOCDataset:
 31
 32    def __init__(self, root, transform=None, target_transform=None, is_test=False, keep_difficult=False, label_file=None):
 33        """Dataset for VOC data.
 34        Args:
 35            root: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories:
 36                Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject.
 37        """
 38        self.root = pathlib.Path(root)
 39        self.transform = transform
 40        self.target_transform = target_transform
 41        if is_test:
 42            image_sets_file = self.root / "ImageSets/Main/test.txt"
 43        else:
 44            image_sets_file = self.root / "ImageSets/Main/trainval.txt"
 45        self.ids = VOCDataset._read_image_ids(image_sets_file)
 46        self.keep_difficult = keep_difficult
 47
 48        # if the labels file exists, read in the class names
 49        label_file_name = self.root / "labels.txt"
 50
 51        if os.path.isfile(label_file_name):
 52            class_string = ""
 53            with open(label_file_name, 'r') as infile:
 54                for line in infile:
 55                    class_string += line.rstrip()
 56
 57            # classes should be a comma separated list
 58            
 59            classes = class_string.split(',')
 60            # prepend BACKGROUND as first class
 61            classes.insert(0, 'BACKGROUND')
 62            classes  = [ elem.replace(" ", "") for elem in classes]
 63            self.class_names = tuple(classes)
 64
 65        else:
 66            self.class_names = ('BACKGROUND',
 67            'aeroplane', 'bicycle', 'bird', 'boat',
 68            'bottle', 'bus', 'car', 'cat', 'chair',
 69            'cow', 'diningtable', 'dog', 'horse',
 70            'motorbike', 'person', 'pottedplant',
 71            'sheep', 'sofa', 'train', 'tvmonitor')
 72
 73
 74        self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)}
 75
 76    def __len__(self):
 77        return len(self.ids)
 78
 79    def __getitem__(self, index):
 80        image_id = self.ids[index]
 81        boxes, labels, is_difficult = self._get_annotation(image_id)
 82        if not self.keep_difficult:
 83            boxes = boxes[is_difficult == 0]
 84            labels = labels[is_difficult == 0]
 85        image = self._read_image(image_id)
 86        if self.transform:
 87            image, boxes, labels = self.transform(image, boxes, labels)
 88        if self.target_transform:
 89            boxes, labels = self.target_transform(boxes, labels)
 90        return image, boxes, labels
 91
 92    def get_image(self, index):
 93        image_id = self.ids[index]
 94        image = self._read_image(image_id)
 95        if self.transform:
 96            image, _ = self.transform(image)
 97        return image
 98
 99    def get_annotation(self, index):
100        image_id = self.ids[index]
101        return image_id, self._get_annotation(image_id)
102
103    @staticmethod
104    def _read_image_ids(image_sets_file):
105        ids = []
106        with open(image_sets_file) as f:
107            for line in f:
108                ids.append(line.rstrip())
109        return ids
110
111    def _get_annotation(self, image_id):
112        annotation_file = self.root / f"Annotations/{image_id}.xml"
113        objects = ET.parse(annotation_file).findall("object")
114        boxes = []
115        labels = []
116        is_difficult = []
117        for object in objects:
118            class_name = object.find('name').text.lower().strip()
119            # we're only concerned with clases in our list
120            if class_name in self.class_dict:
121                bbox = object.find('bndbox')
122
123                # VOC dataset format follows Matlab, in which indexes start from 0
124                x1 = float(bbox.find('xmin').text) - 1
125                y1 = float(bbox.find('ymin').text) - 1
126                x2 = float(bbox.find('xmax').text) - 1
127                y2 = float(bbox.find('ymax').text) - 1
128                boxes.append([x1, y1, x2, y2])
129
130                labels.append(self.class_dict[class_name])
131                is_difficult_str = object.find('difficult').text
132                is_difficult.append(int(is_difficult_str) if is_difficult_str else 0)
133
134        return (np.array(boxes, dtype=np.float32),
135                np.array(labels, dtype=np.int64),
136                np.array(is_difficult, dtype=np.uint8))
137
138    def _read_image(self, image_id):
139        image_file = self.root / f"JPEGImages/{image_id}.jpg"
140        image = cv2.imread(str(image_file))
141        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
142        return image
VOCDataset( root, transform=None, target_transform=None, is_test=False, keep_difficult=False, label_file=None)
32    def __init__(self, root, transform=None, target_transform=None, is_test=False, keep_difficult=False, label_file=None):
33        """Dataset for VOC data.
34        Args:
35            root: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories:
36                Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject.
37        """
38        self.root = pathlib.Path(root)
39        self.transform = transform
40        self.target_transform = target_transform
41        if is_test:
42            image_sets_file = self.root / "ImageSets/Main/test.txt"
43        else:
44            image_sets_file = self.root / "ImageSets/Main/trainval.txt"
45        self.ids = VOCDataset._read_image_ids(image_sets_file)
46        self.keep_difficult = keep_difficult
47
48        # if the labels file exists, read in the class names
49        label_file_name = self.root / "labels.txt"
50
51        if os.path.isfile(label_file_name):
52            class_string = ""
53            with open(label_file_name, 'r') as infile:
54                for line in infile:
55                    class_string += line.rstrip()
56
57            # classes should be a comma separated list
58            
59            classes = class_string.split(',')
60            # prepend BACKGROUND as first class
61            classes.insert(0, 'BACKGROUND')
62            classes  = [ elem.replace(" ", "") for elem in classes]
63            self.class_names = tuple(classes)
64
65        else:
66            self.class_names = ('BACKGROUND',
67            'aeroplane', 'bicycle', 'bird', 'boat',
68            'bottle', 'bus', 'car', 'cat', 'chair',
69            'cow', 'diningtable', 'dog', 'horse',
70            'motorbike', 'person', 'pottedplant',
71            'sheep', 'sofa', 'train', 'tvmonitor')
72
73
74        self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)}

Dataset for VOC data. Args: root: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories: Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject.

def get_image(self, index):
92    def get_image(self, index):
93        image_id = self.ids[index]
94        image = self._read_image(image_id)
95        if self.transform:
96            image, _ = self.transform(image)
97        return image
def get_annotation(self, index):
 99    def get_annotation(self, index):
100        image_id = self.ids[index]
101        return image_id, self._get_annotation(image_id)