what.models.detection.frcnn.faster_rcnn

  1import os
  2import time
  3from collections import namedtuple
  4
  5import torch
  6from torch import nn
  7from torch.nn import functional as F
  8
  9from what.models.detection.utils.array_utils import to_numpy, to_scalar, to_tensor
 10
 11from .model.utils.creator_tool import AnchorTargetCreator, ProposalTargetCreator
 12from .model.faster_rcnn_vgg16 import FasterRCNNVGG16
 13
 14from .meter.confusion_meter import ConfusionMeter
 15from .meter.averagevalue_meter import AverageValueMeter
 16from .utils.eval_tool import eval_detection_voc
 17from .utils.config import opt
 18
 19LossTuple = namedtuple('LossTuple',
 20                       ['rpn_loc_loss',
 21                        'rpn_cls_loss',
 22                        'roi_loc_loss',
 23                        'roi_cls_loss',
 24                        'total_loss'
 25                        ])
 26
 27def _smooth_l1_loss(x, t, in_weight, sigma):
 28    sigma2 = sigma ** 2
 29    diff = in_weight * (x - t)
 30    abs_diff = diff.abs()
 31    flag = (abs_diff.data < (1. / sigma2)).float()
 32    y = (flag * (sigma2 / 2.) * (diff ** 2) +
 33         (1 - flag) * (abs_diff - 0.5 / sigma2))
 34    return y.sum()
 35
 36def _fast_rcnn_loc_loss(pred_loc, gt_loc, gt_label, sigma):
 37    in_weight = torch.zeros(gt_loc.shape)
 38    # Localization loss is calculated only for positive rois.
 39    # NOTE:  unlike origin implementation, 
 40    # we don't need inside_weight and outside_weight, they can calculate by gt_label
 41    in_weight[(gt_label > 0).view(-1, 1).expand_as(in_weight)] = 1
 42    loc_loss = _smooth_l1_loss(pred_loc, gt_loc, in_weight.detach(), sigma)
 43    # Normalize by total number of negtive and positive rois.
 44    loc_loss /= ((gt_label >= 0).sum().float()) # ignore gt_label==-1 for rpn_loss
 45    return loc_loss
 46
 47class FasterRCNN(nn.Module):
 48    """wrapper for conveniently training. return losses
 49
 50    The losses include:
 51
 52    * :obj:`rpn_loc_loss`: The localization loss for \
 53        Region Proposal Network (RPN).
 54    * :obj:`rpn_cls_loss`: The classification loss for RPN.
 55    * :obj:`roi_loc_loss`: The localization loss for the head module.
 56    * :obj:`roi_cls_loss`: The classification loss for the head module.
 57    * :obj:`total_loss`: The sum of 4 loss above.
 58
 59    Args:
 60        faster_rcnn (model.FasterRCNN):
 61            A Faster R-CNN model that is going to be trained.
 62    """
 63
 64    def __init__(self, device=torch.device('cpu')):
 65        super(FasterRCNN, self).__init__()
 66
 67        self.device = device
 68
 69        self.faster_rcnn = FasterRCNNVGG16(device=device)
 70        self.rpn_sigma = opt.rpn_sigma
 71        self.roi_sigma = opt.roi_sigma
 72
 73        # target creator create gt_bbox gt_label etc as training targets. 
 74        self.anchor_target_creator = AnchorTargetCreator()
 75        self.proposal_target_creator = ProposalTargetCreator()
 76
 77        self.loc_normalize_mean = self.faster_rcnn.loc_normalize_mean
 78        self.loc_normalize_std = self.faster_rcnn.loc_normalize_std
 79
 80        self.optimizer = self.faster_rcnn.get_optimizer()
 81
 82        # indicators for training status
 83        self.rpn_cm = ConfusionMeter(2)
 84        self.roi_cm = ConfusionMeter(21)
 85        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}  # average loss
 86
 87    def predict(self, img):
 88        return self.faster_rcnn.predict(img)
 89
 90    def forward(self, imgs, bboxes, labels, scale):
 91        """Forward Faster R-CNN and calculate losses.
 92
 93        Here are notations used.
 94
 95        * :math:`N` is the batch size.
 96        * :math:`R` is the number of bounding boxes per image.
 97
 98        Currently, only :math:`N=1` is supported.
 99
100        Args:
101            imgs (~torch.autograd.Variable): A variable with a batch of images.
102            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
103                Its shape is :math:`(N, R, 4)`.
104            labels (~torch.autograd..Variable): A batch of labels.
105                Its shape is :math:`(N, R)`. The background is excluded from
106                the definition, which means that the range of the value
107                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
108                classes.
109            scale (float): Amount of scaling applied to
110                the raw image during preprocessing.
111
112        Returns:
113            namedtuple of 5 losses
114        """
115        n = bboxes.shape[0]
116        if n != 1:
117            raise ValueError('Currently only batch size 1 is supported.')
118
119        _, _, H, W = imgs.shape
120        img_size = (H, W)
121
122        features = self.faster_rcnn.extractor(imgs)
123
124        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
125            self.faster_rcnn.rpn(features, img_size, scale)
126
127        # Since batch size is one, convert variables to singular form
128        bbox = bboxes[0]
129        label = labels[0]
130        rpn_score = rpn_scores[0]
131        rpn_loc = rpn_locs[0]
132        roi = rois
133
134        # Sample RoIs and forward
135        # it's fine to break the computation graph of rois, 
136        # consider them as constant input
137        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
138            roi,
139            to_numpy(bbox),
140            to_numpy(label),
141            self.loc_normalize_mean,
142            self.loc_normalize_std)
143        # NOTE it's all zero because now it only support for batch=1 now
144        sample_roi_index = torch.zeros(len(sample_roi))
145        roi_cls_loc, roi_score = self.faster_rcnn.head(
146            features,
147            sample_roi,
148            sample_roi_index)
149
150        # ------------------ RPN losses -------------------#
151        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
152            to_numpy(bbox),
153            anchor,
154            img_size)
155        gt_rpn_label = to_tensor(gt_rpn_label, self.device).long()
156        gt_rpn_loc = to_tensor(gt_rpn_loc, self.device)
157        rpn_loc_loss = _fast_rcnn_loc_loss(
158            rpn_loc,
159            gt_rpn_loc,
160            gt_rpn_label.data,
161            self.rpn_sigma)
162
163        # NOTE: default value of ignore_index is -100 ...
164        rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.to(self.device), ignore_index=-1)
165        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
166        _rpn_score = to_numpy(rpn_score)[to_numpy(gt_rpn_label) > -1]
167        self.rpn_cm.add(to_tensor(_rpn_score, torch.device("cpu")), _gt_rpn_label.data.long())
168
169        # ------------------ ROI losses (fast rcnn loss) -------------------#
170        n_sample = roi_cls_loc.shape[0]
171        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
172        roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().to(self.device), \
173                              to_tensor(gt_roi_label, self.device).long()]
174        gt_roi_label = to_tensor(gt_roi_label, self.device).long()
175        gt_roi_loc = to_tensor(gt_roi_loc, self.device)
176
177        roi_loc_loss = _fast_rcnn_loc_loss(
178            roi_loc.contiguous(),
179            gt_roi_loc,
180            gt_roi_label.data,
181            self.roi_sigma)
182
183        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label)
184
185        self.roi_cm.add(to_tensor(roi_score, torch.device("cpu")), gt_roi_label.data.long())
186
187        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
188        losses = losses + [sum(losses)]
189
190        return LossTuple(*losses)
191
192    def step(self, imgs, bboxes, labels, scale):
193        self.optimizer.zero_grad()
194        losses = self.forward(imgs, bboxes, labels, scale)
195        losses.total_loss.backward()
196        self.optimizer.step()
197        self.update_meters(losses)
198        return losses
199
200    def eval(self, val_loader, test_num=10000):
201
202        pred_bboxes, pred_labels, pred_scores = list(), list(), list()
203        gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
204
205        for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in enumerate(val_loader):
206            sizes = [sizes[0][0], sizes[1][0]]
207            pred_bboxes_, pred_labels_, pred_scores_ = self.faster_rcnn.predict(imgs, [sizes])
208            gt_bboxes += list(gt_bboxes_.numpy())
209            gt_labels += list(gt_labels_.numpy())
210            gt_difficults += list(gt_difficults_.numpy())
211            pred_bboxes += pred_bboxes_
212            pred_labels += pred_labels_
213            pred_scores += pred_scores_
214            if ii == test_num: break
215
216        result = eval_detection_voc(
217            pred_bboxes, pred_labels, pred_scores,
218            gt_bboxes, gt_labels, gt_difficults,
219            use_07_metric=True)
220        return result
221
222
223    def load(self, path, load_optimizer=True, parse_opt=False, map_location=torch.device('cpu')):
224        state_dict = torch.load(path, map_location=map_location)
225        if 'model' in state_dict:
226            self.faster_rcnn.load_state_dict(state_dict['model'])
227        else:  # legacy way, for backward compatibility
228            self.faster_rcnn.load_state_dict(state_dict)
229            return self
230        if parse_opt:
231            opt._parse(state_dict['config'])
232        if 'optimizer' in state_dict and load_optimizer:
233            self.optimizer.load_state_dict(state_dict['optimizer'])
234        return self
235
236    def save(self, save_optimizer=False, save_path=None, **kwargs):
237        """serialize models include optimizer and other info
238        return path where the model-file is stored.
239
240        Args:
241            save_optimizer (bool): whether save optimizer.state_dict().
242            save_path (string): where to save model, if it's None, save_path
243                is generate using time str and info from kwargs.
244        
245        Returns:
246            save_path(str): the path to save models.
247        """
248        save_dict = dict()
249
250        save_dict['model'] = self.faster_rcnn.state_dict()
251        save_dict['config'] = opt._state_dict()
252        save_dict['other_info'] = kwargs
253        # save_dict['vis_info'] = self.vis.state_dict()
254
255        if save_optimizer:
256            save_dict['optimizer'] = self.optimizer.state_dict()
257
258        if save_path is None:
259            timestr = time.strftime('%m%d%H%M')
260            save_path = 'checkpoints/fasterrcnn_%s' % timestr
261            for k_, v_ in kwargs.items():
262                save_path += '_%s' % v_
263
264        save_dir = os.path.dirname(save_path)
265        if not os.path.exists(save_dir):
266            os.makedirs(save_dir)
267
268        torch.save(save_dict, save_path)
269        # self.vis.save([self.vis.env])
270        return save_path
271
272    def update_meters(self, losses):
273        loss_d = {k: to_scalar(v) for k, v in losses._asdict().items()}
274        for key, meter in self.meters.items():
275            meter.add(loss_d[key])
276
277    def reset_meters(self):
278        for key, meter in self.meters.items():
279            meter.reset()
280        self.roi_cm.reset()
281        self.rpn_cm.reset()
282
283    def get_meter_data(self):
284        return {k: v.value()[0] for k, v in self.meters.items()}
class LossTuple(builtins.tuple):

LossTuple(rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss, total_loss)

LossTuple(rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss, total_loss)

Create new instance of LossTuple(rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss, total_loss)

rpn_loc_loss

Alias for field number 0

rpn_cls_loss

Alias for field number 1

roi_loc_loss

Alias for field number 2

roi_cls_loss

Alias for field number 3

total_loss

Alias for field number 4

Inherited Members
builtins.tuple
index
count
class FasterRCNN(torch.nn.modules.module.Module):
 48class FasterRCNN(nn.Module):
 49    """wrapper for conveniently training. return losses
 50
 51    The losses include:
 52
 53    * :obj:`rpn_loc_loss`: The localization loss for \
 54        Region Proposal Network (RPN).
 55    * :obj:`rpn_cls_loss`: The classification loss for RPN.
 56    * :obj:`roi_loc_loss`: The localization loss for the head module.
 57    * :obj:`roi_cls_loss`: The classification loss for the head module.
 58    * :obj:`total_loss`: The sum of 4 loss above.
 59
 60    Args:
 61        faster_rcnn (model.FasterRCNN):
 62            A Faster R-CNN model that is going to be trained.
 63    """
 64
 65    def __init__(self, device=torch.device('cpu')):
 66        super(FasterRCNN, self).__init__()
 67
 68        self.device = device
 69
 70        self.faster_rcnn = FasterRCNNVGG16(device=device)
 71        self.rpn_sigma = opt.rpn_sigma
 72        self.roi_sigma = opt.roi_sigma
 73
 74        # target creator create gt_bbox gt_label etc as training targets. 
 75        self.anchor_target_creator = AnchorTargetCreator()
 76        self.proposal_target_creator = ProposalTargetCreator()
 77
 78        self.loc_normalize_mean = self.faster_rcnn.loc_normalize_mean
 79        self.loc_normalize_std = self.faster_rcnn.loc_normalize_std
 80
 81        self.optimizer = self.faster_rcnn.get_optimizer()
 82
 83        # indicators for training status
 84        self.rpn_cm = ConfusionMeter(2)
 85        self.roi_cm = ConfusionMeter(21)
 86        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}  # average loss
 87
 88    def predict(self, img):
 89        return self.faster_rcnn.predict(img)
 90
 91    def forward(self, imgs, bboxes, labels, scale):
 92        """Forward Faster R-CNN and calculate losses.
 93
 94        Here are notations used.
 95
 96        * :math:`N` is the batch size.
 97        * :math:`R` is the number of bounding boxes per image.
 98
 99        Currently, only :math:`N=1` is supported.
100
101        Args:
102            imgs (~torch.autograd.Variable): A variable with a batch of images.
103            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
104                Its shape is :math:`(N, R, 4)`.
105            labels (~torch.autograd..Variable): A batch of labels.
106                Its shape is :math:`(N, R)`. The background is excluded from
107                the definition, which means that the range of the value
108                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
109                classes.
110            scale (float): Amount of scaling applied to
111                the raw image during preprocessing.
112
113        Returns:
114            namedtuple of 5 losses
115        """
116        n = bboxes.shape[0]
117        if n != 1:
118            raise ValueError('Currently only batch size 1 is supported.')
119
120        _, _, H, W = imgs.shape
121        img_size = (H, W)
122
123        features = self.faster_rcnn.extractor(imgs)
124
125        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
126            self.faster_rcnn.rpn(features, img_size, scale)
127
128        # Since batch size is one, convert variables to singular form
129        bbox = bboxes[0]
130        label = labels[0]
131        rpn_score = rpn_scores[0]
132        rpn_loc = rpn_locs[0]
133        roi = rois
134
135        # Sample RoIs and forward
136        # it's fine to break the computation graph of rois, 
137        # consider them as constant input
138        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
139            roi,
140            to_numpy(bbox),
141            to_numpy(label),
142            self.loc_normalize_mean,
143            self.loc_normalize_std)
144        # NOTE it's all zero because now it only support for batch=1 now
145        sample_roi_index = torch.zeros(len(sample_roi))
146        roi_cls_loc, roi_score = self.faster_rcnn.head(
147            features,
148            sample_roi,
149            sample_roi_index)
150
151        # ------------------ RPN losses -------------------#
152        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
153            to_numpy(bbox),
154            anchor,
155            img_size)
156        gt_rpn_label = to_tensor(gt_rpn_label, self.device).long()
157        gt_rpn_loc = to_tensor(gt_rpn_loc, self.device)
158        rpn_loc_loss = _fast_rcnn_loc_loss(
159            rpn_loc,
160            gt_rpn_loc,
161            gt_rpn_label.data,
162            self.rpn_sigma)
163
164        # NOTE: default value of ignore_index is -100 ...
165        rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.to(self.device), ignore_index=-1)
166        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
167        _rpn_score = to_numpy(rpn_score)[to_numpy(gt_rpn_label) > -1]
168        self.rpn_cm.add(to_tensor(_rpn_score, torch.device("cpu")), _gt_rpn_label.data.long())
169
170        # ------------------ ROI losses (fast rcnn loss) -------------------#
171        n_sample = roi_cls_loc.shape[0]
172        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
173        roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().to(self.device), \
174                              to_tensor(gt_roi_label, self.device).long()]
175        gt_roi_label = to_tensor(gt_roi_label, self.device).long()
176        gt_roi_loc = to_tensor(gt_roi_loc, self.device)
177
178        roi_loc_loss = _fast_rcnn_loc_loss(
179            roi_loc.contiguous(),
180            gt_roi_loc,
181            gt_roi_label.data,
182            self.roi_sigma)
183
184        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label)
185
186        self.roi_cm.add(to_tensor(roi_score, torch.device("cpu")), gt_roi_label.data.long())
187
188        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
189        losses = losses + [sum(losses)]
190
191        return LossTuple(*losses)
192
193    def step(self, imgs, bboxes, labels, scale):
194        self.optimizer.zero_grad()
195        losses = self.forward(imgs, bboxes, labels, scale)
196        losses.total_loss.backward()
197        self.optimizer.step()
198        self.update_meters(losses)
199        return losses
200
201    def eval(self, val_loader, test_num=10000):
202
203        pred_bboxes, pred_labels, pred_scores = list(), list(), list()
204        gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
205
206        for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in enumerate(val_loader):
207            sizes = [sizes[0][0], sizes[1][0]]
208            pred_bboxes_, pred_labels_, pred_scores_ = self.faster_rcnn.predict(imgs, [sizes])
209            gt_bboxes += list(gt_bboxes_.numpy())
210            gt_labels += list(gt_labels_.numpy())
211            gt_difficults += list(gt_difficults_.numpy())
212            pred_bboxes += pred_bboxes_
213            pred_labels += pred_labels_
214            pred_scores += pred_scores_
215            if ii == test_num: break
216
217        result = eval_detection_voc(
218            pred_bboxes, pred_labels, pred_scores,
219            gt_bboxes, gt_labels, gt_difficults,
220            use_07_metric=True)
221        return result
222
223
224    def load(self, path, load_optimizer=True, parse_opt=False, map_location=torch.device('cpu')):
225        state_dict = torch.load(path, map_location=map_location)
226        if 'model' in state_dict:
227            self.faster_rcnn.load_state_dict(state_dict['model'])
228        else:  # legacy way, for backward compatibility
229            self.faster_rcnn.load_state_dict(state_dict)
230            return self
231        if parse_opt:
232            opt._parse(state_dict['config'])
233        if 'optimizer' in state_dict and load_optimizer:
234            self.optimizer.load_state_dict(state_dict['optimizer'])
235        return self
236
237    def save(self, save_optimizer=False, save_path=None, **kwargs):
238        """serialize models include optimizer and other info
239        return path where the model-file is stored.
240
241        Args:
242            save_optimizer (bool): whether save optimizer.state_dict().
243            save_path (string): where to save model, if it's None, save_path
244                is generate using time str and info from kwargs.
245        
246        Returns:
247            save_path(str): the path to save models.
248        """
249        save_dict = dict()
250
251        save_dict['model'] = self.faster_rcnn.state_dict()
252        save_dict['config'] = opt._state_dict()
253        save_dict['other_info'] = kwargs
254        # save_dict['vis_info'] = self.vis.state_dict()
255
256        if save_optimizer:
257            save_dict['optimizer'] = self.optimizer.state_dict()
258
259        if save_path is None:
260            timestr = time.strftime('%m%d%H%M')
261            save_path = 'checkpoints/fasterrcnn_%s' % timestr
262            for k_, v_ in kwargs.items():
263                save_path += '_%s' % v_
264
265        save_dir = os.path.dirname(save_path)
266        if not os.path.exists(save_dir):
267            os.makedirs(save_dir)
268
269        torch.save(save_dict, save_path)
270        # self.vis.save([self.vis.env])
271        return save_path
272
273    def update_meters(self, losses):
274        loss_d = {k: to_scalar(v) for k, v in losses._asdict().items()}
275        for key, meter in self.meters.items():
276            meter.add(loss_d[key])
277
278    def reset_meters(self):
279        for key, meter in self.meters.items():
280            meter.reset()
281        self.roi_cm.reset()
282        self.rpn_cm.reset()
283
284    def get_meter_data(self):
285        return {k: v.value()[0] for k, v in self.meters.items()}

wrapper for conveniently training. return losses

The losses include:

  • rpn_loc_loss: The localization loss for Region Proposal Network (RPN).
  • rpn_cls_loss: The classification loss for RPN.
  • roi_loc_loss: The localization loss for the head module.
  • roi_cls_loss: The classification loss for the head module.
  • total_loss: The sum of 4 loss above.

Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained.

FasterRCNN(device=device(type='cpu'))
65    def __init__(self, device=torch.device('cpu')):
66        super(FasterRCNN, self).__init__()
67
68        self.device = device
69
70        self.faster_rcnn = FasterRCNNVGG16(device=device)
71        self.rpn_sigma = opt.rpn_sigma
72        self.roi_sigma = opt.roi_sigma
73
74        # target creator create gt_bbox gt_label etc as training targets. 
75        self.anchor_target_creator = AnchorTargetCreator()
76        self.proposal_target_creator = ProposalTargetCreator()
77
78        self.loc_normalize_mean = self.faster_rcnn.loc_normalize_mean
79        self.loc_normalize_std = self.faster_rcnn.loc_normalize_std
80
81        self.optimizer = self.faster_rcnn.get_optimizer()
82
83        # indicators for training status
84        self.rpn_cm = ConfusionMeter(2)
85        self.roi_cm = ConfusionMeter(21)
86        self.meters = {k: AverageValueMeter() for k in LossTuple._fields}  # average loss

Initializes internal Module state, shared by both nn.Module and ScriptModule.

def predict(self, img):
88    def predict(self, img):
89        return self.faster_rcnn.predict(img)
def forward(self, imgs, bboxes, labels, scale):
 91    def forward(self, imgs, bboxes, labels, scale):
 92        """Forward Faster R-CNN and calculate losses.
 93
 94        Here are notations used.
 95
 96        * :math:`N` is the batch size.
 97        * :math:`R` is the number of bounding boxes per image.
 98
 99        Currently, only :math:`N=1` is supported.
100
101        Args:
102            imgs (~torch.autograd.Variable): A variable with a batch of images.
103            bboxes (~torch.autograd.Variable): A batch of bounding boxes.
104                Its shape is :math:`(N, R, 4)`.
105            labels (~torch.autograd..Variable): A batch of labels.
106                Its shape is :math:`(N, R)`. The background is excluded from
107                the definition, which means that the range of the value
108                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
109                classes.
110            scale (float): Amount of scaling applied to
111                the raw image during preprocessing.
112
113        Returns:
114            namedtuple of 5 losses
115        """
116        n = bboxes.shape[0]
117        if n != 1:
118            raise ValueError('Currently only batch size 1 is supported.')
119
120        _, _, H, W = imgs.shape
121        img_size = (H, W)
122
123        features = self.faster_rcnn.extractor(imgs)
124
125        rpn_locs, rpn_scores, rois, roi_indices, anchor = \
126            self.faster_rcnn.rpn(features, img_size, scale)
127
128        # Since batch size is one, convert variables to singular form
129        bbox = bboxes[0]
130        label = labels[0]
131        rpn_score = rpn_scores[0]
132        rpn_loc = rpn_locs[0]
133        roi = rois
134
135        # Sample RoIs and forward
136        # it's fine to break the computation graph of rois, 
137        # consider them as constant input
138        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
139            roi,
140            to_numpy(bbox),
141            to_numpy(label),
142            self.loc_normalize_mean,
143            self.loc_normalize_std)
144        # NOTE it's all zero because now it only support for batch=1 now
145        sample_roi_index = torch.zeros(len(sample_roi))
146        roi_cls_loc, roi_score = self.faster_rcnn.head(
147            features,
148            sample_roi,
149            sample_roi_index)
150
151        # ------------------ RPN losses -------------------#
152        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
153            to_numpy(bbox),
154            anchor,
155            img_size)
156        gt_rpn_label = to_tensor(gt_rpn_label, self.device).long()
157        gt_rpn_loc = to_tensor(gt_rpn_loc, self.device)
158        rpn_loc_loss = _fast_rcnn_loc_loss(
159            rpn_loc,
160            gt_rpn_loc,
161            gt_rpn_label.data,
162            self.rpn_sigma)
163
164        # NOTE: default value of ignore_index is -100 ...
165        rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.to(self.device), ignore_index=-1)
166        _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
167        _rpn_score = to_numpy(rpn_score)[to_numpy(gt_rpn_label) > -1]
168        self.rpn_cm.add(to_tensor(_rpn_score, torch.device("cpu")), _gt_rpn_label.data.long())
169
170        # ------------------ ROI losses (fast rcnn loss) -------------------#
171        n_sample = roi_cls_loc.shape[0]
172        roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)
173        roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().to(self.device), \
174                              to_tensor(gt_roi_label, self.device).long()]
175        gt_roi_label = to_tensor(gt_roi_label, self.device).long()
176        gt_roi_loc = to_tensor(gt_roi_loc, self.device)
177
178        roi_loc_loss = _fast_rcnn_loc_loss(
179            roi_loc.contiguous(),
180            gt_roi_loc,
181            gt_roi_label.data,
182            self.roi_sigma)
183
184        roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label)
185
186        self.roi_cm.add(to_tensor(roi_score, torch.device("cpu")), gt_roi_label.data.long())
187
188        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
189        losses = losses + [sum(losses)]
190
191        return LossTuple(*losses)

Forward Faster R-CNN and calculate losses.

Here are notations used.

  • \( N \) is the batch size.
  • \( R \) is the number of bounding boxes per image.

Currently, only \( N=1 \) is supported.

Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is \( (N, R, 4) \). labels (~torch.autograd..Variable): A batch of labels. Its shape is \( (N, R) \). The background is excluded from the definition, which means that the range of the value is \( [0, L - 1] \). \( L \) is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing.

Returns: namedtuple of 5 losses

def step(self, imgs, bboxes, labels, scale):
193    def step(self, imgs, bboxes, labels, scale):
194        self.optimizer.zero_grad()
195        losses = self.forward(imgs, bboxes, labels, scale)
196        losses.total_loss.backward()
197        self.optimizer.step()
198        self.update_meters(losses)
199        return losses
def eval(self, val_loader, test_num=10000):
201    def eval(self, val_loader, test_num=10000):
202
203        pred_bboxes, pred_labels, pred_scores = list(), list(), list()
204        gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
205
206        for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in enumerate(val_loader):
207            sizes = [sizes[0][0], sizes[1][0]]
208            pred_bboxes_, pred_labels_, pred_scores_ = self.faster_rcnn.predict(imgs, [sizes])
209            gt_bboxes += list(gt_bboxes_.numpy())
210            gt_labels += list(gt_labels_.numpy())
211            gt_difficults += list(gt_difficults_.numpy())
212            pred_bboxes += pred_bboxes_
213            pred_labels += pred_labels_
214            pred_scores += pred_scores_
215            if ii == test_num: break
216
217        result = eval_detection_voc(
218            pred_bboxes, pred_labels, pred_scores,
219            gt_bboxes, gt_labels, gt_difficults,
220            use_07_metric=True)
221        return result

Sets the module in evaluation mode.

This has any effect only on certain modules. See documentations of particular modules for details of their behaviors in training/evaluation mode, if they are affected, e.g. Dropout, BatchNorm, etc.

This is equivalent with self.train(False) <torch.nn.Module.train>().

See :ref:locally-disable-grad-doc for a comparison between .eval() and several similar mechanisms that may be confused with it.

Returns: Module: self

def load( self, path, load_optimizer=True, parse_opt=False, map_location=device(type='cpu')):
224    def load(self, path, load_optimizer=True, parse_opt=False, map_location=torch.device('cpu')):
225        state_dict = torch.load(path, map_location=map_location)
226        if 'model' in state_dict:
227            self.faster_rcnn.load_state_dict(state_dict['model'])
228        else:  # legacy way, for backward compatibility
229            self.faster_rcnn.load_state_dict(state_dict)
230            return self
231        if parse_opt:
232            opt._parse(state_dict['config'])
233        if 'optimizer' in state_dict and load_optimizer:
234            self.optimizer.load_state_dict(state_dict['optimizer'])
235        return self
def save(self, save_optimizer=False, save_path=None, **kwargs):
237    def save(self, save_optimizer=False, save_path=None, **kwargs):
238        """serialize models include optimizer and other info
239        return path where the model-file is stored.
240
241        Args:
242            save_optimizer (bool): whether save optimizer.state_dict().
243            save_path (string): where to save model, if it's None, save_path
244                is generate using time str and info from kwargs.
245        
246        Returns:
247            save_path(str): the path to save models.
248        """
249        save_dict = dict()
250
251        save_dict['model'] = self.faster_rcnn.state_dict()
252        save_dict['config'] = opt._state_dict()
253        save_dict['other_info'] = kwargs
254        # save_dict['vis_info'] = self.vis.state_dict()
255
256        if save_optimizer:
257            save_dict['optimizer'] = self.optimizer.state_dict()
258
259        if save_path is None:
260            timestr = time.strftime('%m%d%H%M')
261            save_path = 'checkpoints/fasterrcnn_%s' % timestr
262            for k_, v_ in kwargs.items():
263                save_path += '_%s' % v_
264
265        save_dir = os.path.dirname(save_path)
266        if not os.path.exists(save_dir):
267            os.makedirs(save_dir)
268
269        torch.save(save_dict, save_path)
270        # self.vis.save([self.vis.env])
271        return save_path

serialize models include optimizer and other info return path where the model-file is stored.

Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs.

Returns: save_path(str): the path to save models.

def update_meters(self, losses):
273    def update_meters(self, losses):
274        loss_d = {k: to_scalar(v) for k, v in losses._asdict().items()}
275        for key, meter in self.meters.items():
276            meter.add(loss_d[key])
def reset_meters(self):
278    def reset_meters(self):
279        for key, meter in self.meters.items():
280            meter.reset()
281        self.roi_cm.reset()
282        self.rpn_cm.reset()
def get_meter_data(self):
284    def get_meter_data(self):
285        return {k: v.value()[0] for k, v in self.meters.items()}
Inherited Members
torch.nn.modules.module.Module
dump_patches
register_buffer
register_parameter
add_module
register_module
get_submodule
get_parameter
get_buffer
get_extra_state
set_extra_state
apply
cuda
xpu
cpu
type
float
double
half
bfloat16
to_empty
to
register_backward_hook
register_full_backward_hook
register_forward_pre_hook
register_forward_hook
state_dict
load_state_dict
parameters
named_parameters
buffers
named_buffers
children
named_children
modules
named_modules
train
requires_grad_
zero_grad
share_memory
extra_repr