what.models.detection.frcnn.faster_rcnn
1import os 2import time 3from collections import namedtuple 4 5import torch 6from torch import nn 7from torch.nn import functional as F 8 9from what.models.detection.utils.array_utils import to_numpy, to_scalar, to_tensor 10 11from .model.utils.creator_tool import AnchorTargetCreator, ProposalTargetCreator 12from .model.faster_rcnn_vgg16 import FasterRCNNVGG16 13 14from .meter.confusion_meter import ConfusionMeter 15from .meter.averagevalue_meter import AverageValueMeter 16from .utils.eval_tool import eval_detection_voc 17from .utils.config import opt 18 19LossTuple = namedtuple('LossTuple', 20 ['rpn_loc_loss', 21 'rpn_cls_loss', 22 'roi_loc_loss', 23 'roi_cls_loss', 24 'total_loss' 25 ]) 26 27def _smooth_l1_loss(x, t, in_weight, sigma): 28 sigma2 = sigma ** 2 29 diff = in_weight * (x - t) 30 abs_diff = diff.abs() 31 flag = (abs_diff.data < (1. / sigma2)).float() 32 y = (flag * (sigma2 / 2.) * (diff ** 2) + 33 (1 - flag) * (abs_diff - 0.5 / sigma2)) 34 return y.sum() 35 36def _fast_rcnn_loc_loss(pred_loc, gt_loc, gt_label, sigma): 37 in_weight = torch.zeros(gt_loc.shape) 38 # Localization loss is calculated only for positive rois. 39 # NOTE: unlike origin implementation, 40 # we don't need inside_weight and outside_weight, they can calculate by gt_label 41 in_weight[(gt_label > 0).view(-1, 1).expand_as(in_weight)] = 1 42 loc_loss = _smooth_l1_loss(pred_loc, gt_loc, in_weight.detach(), sigma) 43 # Normalize by total number of negtive and positive rois. 44 loc_loss /= ((gt_label >= 0).sum().float()) # ignore gt_label==-1 for rpn_loss 45 return loc_loss 46 47class FasterRCNN(nn.Module): 48 """wrapper for conveniently training. return losses 49 50 The losses include: 51 52 * :obj:`rpn_loc_loss`: The localization loss for \ 53 Region Proposal Network (RPN). 54 * :obj:`rpn_cls_loss`: The classification loss for RPN. 55 * :obj:`roi_loc_loss`: The localization loss for the head module. 56 * :obj:`roi_cls_loss`: The classification loss for the head module. 57 * :obj:`total_loss`: The sum of 4 loss above. 58 59 Args: 60 faster_rcnn (model.FasterRCNN): 61 A Faster R-CNN model that is going to be trained. 62 """ 63 64 def __init__(self, device=torch.device('cpu')): 65 super(FasterRCNN, self).__init__() 66 67 self.device = device 68 69 self.faster_rcnn = FasterRCNNVGG16(device=device) 70 self.rpn_sigma = opt.rpn_sigma 71 self.roi_sigma = opt.roi_sigma 72 73 # target creator create gt_bbox gt_label etc as training targets. 74 self.anchor_target_creator = AnchorTargetCreator() 75 self.proposal_target_creator = ProposalTargetCreator() 76 77 self.loc_normalize_mean = self.faster_rcnn.loc_normalize_mean 78 self.loc_normalize_std = self.faster_rcnn.loc_normalize_std 79 80 self.optimizer = self.faster_rcnn.get_optimizer() 81 82 # indicators for training status 83 self.rpn_cm = ConfusionMeter(2) 84 self.roi_cm = ConfusionMeter(21) 85 self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss 86 87 def predict(self, img): 88 return self.faster_rcnn.predict(img) 89 90 def forward(self, imgs, bboxes, labels, scale): 91 """Forward Faster R-CNN and calculate losses. 92 93 Here are notations used. 94 95 * :math:`N` is the batch size. 96 * :math:`R` is the number of bounding boxes per image. 97 98 Currently, only :math:`N=1` is supported. 99 100 Args: 101 imgs (~torch.autograd.Variable): A variable with a batch of images. 102 bboxes (~torch.autograd.Variable): A batch of bounding boxes. 103 Its shape is :math:`(N, R, 4)`. 104 labels (~torch.autograd..Variable): A batch of labels. 105 Its shape is :math:`(N, R)`. The background is excluded from 106 the definition, which means that the range of the value 107 is :math:`[0, L - 1]`. :math:`L` is the number of foreground 108 classes. 109 scale (float): Amount of scaling applied to 110 the raw image during preprocessing. 111 112 Returns: 113 namedtuple of 5 losses 114 """ 115 n = bboxes.shape[0] 116 if n != 1: 117 raise ValueError('Currently only batch size 1 is supported.') 118 119 _, _, H, W = imgs.shape 120 img_size = (H, W) 121 122 features = self.faster_rcnn.extractor(imgs) 123 124 rpn_locs, rpn_scores, rois, roi_indices, anchor = \ 125 self.faster_rcnn.rpn(features, img_size, scale) 126 127 # Since batch size is one, convert variables to singular form 128 bbox = bboxes[0] 129 label = labels[0] 130 rpn_score = rpn_scores[0] 131 rpn_loc = rpn_locs[0] 132 roi = rois 133 134 # Sample RoIs and forward 135 # it's fine to break the computation graph of rois, 136 # consider them as constant input 137 sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( 138 roi, 139 to_numpy(bbox), 140 to_numpy(label), 141 self.loc_normalize_mean, 142 self.loc_normalize_std) 143 # NOTE it's all zero because now it only support for batch=1 now 144 sample_roi_index = torch.zeros(len(sample_roi)) 145 roi_cls_loc, roi_score = self.faster_rcnn.head( 146 features, 147 sample_roi, 148 sample_roi_index) 149 150 # ------------------ RPN losses -------------------# 151 gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( 152 to_numpy(bbox), 153 anchor, 154 img_size) 155 gt_rpn_label = to_tensor(gt_rpn_label, self.device).long() 156 gt_rpn_loc = to_tensor(gt_rpn_loc, self.device) 157 rpn_loc_loss = _fast_rcnn_loc_loss( 158 rpn_loc, 159 gt_rpn_loc, 160 gt_rpn_label.data, 161 self.rpn_sigma) 162 163 # NOTE: default value of ignore_index is -100 ... 164 rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.to(self.device), ignore_index=-1) 165 _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] 166 _rpn_score = to_numpy(rpn_score)[to_numpy(gt_rpn_label) > -1] 167 self.rpn_cm.add(to_tensor(_rpn_score, torch.device("cpu")), _gt_rpn_label.data.long()) 168 169 # ------------------ ROI losses (fast rcnn loss) -------------------# 170 n_sample = roi_cls_loc.shape[0] 171 roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) 172 roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().to(self.device), \ 173 to_tensor(gt_roi_label, self.device).long()] 174 gt_roi_label = to_tensor(gt_roi_label, self.device).long() 175 gt_roi_loc = to_tensor(gt_roi_loc, self.device) 176 177 roi_loc_loss = _fast_rcnn_loc_loss( 178 roi_loc.contiguous(), 179 gt_roi_loc, 180 gt_roi_label.data, 181 self.roi_sigma) 182 183 roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label) 184 185 self.roi_cm.add(to_tensor(roi_score, torch.device("cpu")), gt_roi_label.data.long()) 186 187 losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] 188 losses = losses + [sum(losses)] 189 190 return LossTuple(*losses) 191 192 def step(self, imgs, bboxes, labels, scale): 193 self.optimizer.zero_grad() 194 losses = self.forward(imgs, bboxes, labels, scale) 195 losses.total_loss.backward() 196 self.optimizer.step() 197 self.update_meters(losses) 198 return losses 199 200 def eval(self, val_loader, test_num=10000): 201 202 pred_bboxes, pred_labels, pred_scores = list(), list(), list() 203 gt_bboxes, gt_labels, gt_difficults = list(), list(), list() 204 205 for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in enumerate(val_loader): 206 sizes = [sizes[0][0], sizes[1][0]] 207 pred_bboxes_, pred_labels_, pred_scores_ = self.faster_rcnn.predict(imgs, [sizes]) 208 gt_bboxes += list(gt_bboxes_.numpy()) 209 gt_labels += list(gt_labels_.numpy()) 210 gt_difficults += list(gt_difficults_.numpy()) 211 pred_bboxes += pred_bboxes_ 212 pred_labels += pred_labels_ 213 pred_scores += pred_scores_ 214 if ii == test_num: break 215 216 result = eval_detection_voc( 217 pred_bboxes, pred_labels, pred_scores, 218 gt_bboxes, gt_labels, gt_difficults, 219 use_07_metric=True) 220 return result 221 222 223 def load(self, path, load_optimizer=True, parse_opt=False, map_location=torch.device('cpu')): 224 state_dict = torch.load(path, map_location=map_location) 225 if 'model' in state_dict: 226 self.faster_rcnn.load_state_dict(state_dict['model']) 227 else: # legacy way, for backward compatibility 228 self.faster_rcnn.load_state_dict(state_dict) 229 return self 230 if parse_opt: 231 opt._parse(state_dict['config']) 232 if 'optimizer' in state_dict and load_optimizer: 233 self.optimizer.load_state_dict(state_dict['optimizer']) 234 return self 235 236 def save(self, save_optimizer=False, save_path=None, **kwargs): 237 """serialize models include optimizer and other info 238 return path where the model-file is stored. 239 240 Args: 241 save_optimizer (bool): whether save optimizer.state_dict(). 242 save_path (string): where to save model, if it's None, save_path 243 is generate using time str and info from kwargs. 244 245 Returns: 246 save_path(str): the path to save models. 247 """ 248 save_dict = dict() 249 250 save_dict['model'] = self.faster_rcnn.state_dict() 251 save_dict['config'] = opt._state_dict() 252 save_dict['other_info'] = kwargs 253 # save_dict['vis_info'] = self.vis.state_dict() 254 255 if save_optimizer: 256 save_dict['optimizer'] = self.optimizer.state_dict() 257 258 if save_path is None: 259 timestr = time.strftime('%m%d%H%M') 260 save_path = 'checkpoints/fasterrcnn_%s' % timestr 261 for k_, v_ in kwargs.items(): 262 save_path += '_%s' % v_ 263 264 save_dir = os.path.dirname(save_path) 265 if not os.path.exists(save_dir): 266 os.makedirs(save_dir) 267 268 torch.save(save_dict, save_path) 269 # self.vis.save([self.vis.env]) 270 return save_path 271 272 def update_meters(self, losses): 273 loss_d = {k: to_scalar(v) for k, v in losses._asdict().items()} 274 for key, meter in self.meters.items(): 275 meter.add(loss_d[key]) 276 277 def reset_meters(self): 278 for key, meter in self.meters.items(): 279 meter.reset() 280 self.roi_cm.reset() 281 self.rpn_cm.reset() 282 283 def get_meter_data(self): 284 return {k: v.value()[0] for k, v in self.meters.items()}
LossTuple(rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss, total_loss)
Create new instance of LossTuple(rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss, total_loss)
Inherited Members
- builtins.tuple
- index
- count
48class FasterRCNN(nn.Module): 49 """wrapper for conveniently training. return losses 50 51 The losses include: 52 53 * :obj:`rpn_loc_loss`: The localization loss for \ 54 Region Proposal Network (RPN). 55 * :obj:`rpn_cls_loss`: The classification loss for RPN. 56 * :obj:`roi_loc_loss`: The localization loss for the head module. 57 * :obj:`roi_cls_loss`: The classification loss for the head module. 58 * :obj:`total_loss`: The sum of 4 loss above. 59 60 Args: 61 faster_rcnn (model.FasterRCNN): 62 A Faster R-CNN model that is going to be trained. 63 """ 64 65 def __init__(self, device=torch.device('cpu')): 66 super(FasterRCNN, self).__init__() 67 68 self.device = device 69 70 self.faster_rcnn = FasterRCNNVGG16(device=device) 71 self.rpn_sigma = opt.rpn_sigma 72 self.roi_sigma = opt.roi_sigma 73 74 # target creator create gt_bbox gt_label etc as training targets. 75 self.anchor_target_creator = AnchorTargetCreator() 76 self.proposal_target_creator = ProposalTargetCreator() 77 78 self.loc_normalize_mean = self.faster_rcnn.loc_normalize_mean 79 self.loc_normalize_std = self.faster_rcnn.loc_normalize_std 80 81 self.optimizer = self.faster_rcnn.get_optimizer() 82 83 # indicators for training status 84 self.rpn_cm = ConfusionMeter(2) 85 self.roi_cm = ConfusionMeter(21) 86 self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss 87 88 def predict(self, img): 89 return self.faster_rcnn.predict(img) 90 91 def forward(self, imgs, bboxes, labels, scale): 92 """Forward Faster R-CNN and calculate losses. 93 94 Here are notations used. 95 96 * :math:`N` is the batch size. 97 * :math:`R` is the number of bounding boxes per image. 98 99 Currently, only :math:`N=1` is supported. 100 101 Args: 102 imgs (~torch.autograd.Variable): A variable with a batch of images. 103 bboxes (~torch.autograd.Variable): A batch of bounding boxes. 104 Its shape is :math:`(N, R, 4)`. 105 labels (~torch.autograd..Variable): A batch of labels. 106 Its shape is :math:`(N, R)`. The background is excluded from 107 the definition, which means that the range of the value 108 is :math:`[0, L - 1]`. :math:`L` is the number of foreground 109 classes. 110 scale (float): Amount of scaling applied to 111 the raw image during preprocessing. 112 113 Returns: 114 namedtuple of 5 losses 115 """ 116 n = bboxes.shape[0] 117 if n != 1: 118 raise ValueError('Currently only batch size 1 is supported.') 119 120 _, _, H, W = imgs.shape 121 img_size = (H, W) 122 123 features = self.faster_rcnn.extractor(imgs) 124 125 rpn_locs, rpn_scores, rois, roi_indices, anchor = \ 126 self.faster_rcnn.rpn(features, img_size, scale) 127 128 # Since batch size is one, convert variables to singular form 129 bbox = bboxes[0] 130 label = labels[0] 131 rpn_score = rpn_scores[0] 132 rpn_loc = rpn_locs[0] 133 roi = rois 134 135 # Sample RoIs and forward 136 # it's fine to break the computation graph of rois, 137 # consider them as constant input 138 sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( 139 roi, 140 to_numpy(bbox), 141 to_numpy(label), 142 self.loc_normalize_mean, 143 self.loc_normalize_std) 144 # NOTE it's all zero because now it only support for batch=1 now 145 sample_roi_index = torch.zeros(len(sample_roi)) 146 roi_cls_loc, roi_score = self.faster_rcnn.head( 147 features, 148 sample_roi, 149 sample_roi_index) 150 151 # ------------------ RPN losses -------------------# 152 gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( 153 to_numpy(bbox), 154 anchor, 155 img_size) 156 gt_rpn_label = to_tensor(gt_rpn_label, self.device).long() 157 gt_rpn_loc = to_tensor(gt_rpn_loc, self.device) 158 rpn_loc_loss = _fast_rcnn_loc_loss( 159 rpn_loc, 160 gt_rpn_loc, 161 gt_rpn_label.data, 162 self.rpn_sigma) 163 164 # NOTE: default value of ignore_index is -100 ... 165 rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.to(self.device), ignore_index=-1) 166 _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] 167 _rpn_score = to_numpy(rpn_score)[to_numpy(gt_rpn_label) > -1] 168 self.rpn_cm.add(to_tensor(_rpn_score, torch.device("cpu")), _gt_rpn_label.data.long()) 169 170 # ------------------ ROI losses (fast rcnn loss) -------------------# 171 n_sample = roi_cls_loc.shape[0] 172 roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) 173 roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().to(self.device), \ 174 to_tensor(gt_roi_label, self.device).long()] 175 gt_roi_label = to_tensor(gt_roi_label, self.device).long() 176 gt_roi_loc = to_tensor(gt_roi_loc, self.device) 177 178 roi_loc_loss = _fast_rcnn_loc_loss( 179 roi_loc.contiguous(), 180 gt_roi_loc, 181 gt_roi_label.data, 182 self.roi_sigma) 183 184 roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label) 185 186 self.roi_cm.add(to_tensor(roi_score, torch.device("cpu")), gt_roi_label.data.long()) 187 188 losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] 189 losses = losses + [sum(losses)] 190 191 return LossTuple(*losses) 192 193 def step(self, imgs, bboxes, labels, scale): 194 self.optimizer.zero_grad() 195 losses = self.forward(imgs, bboxes, labels, scale) 196 losses.total_loss.backward() 197 self.optimizer.step() 198 self.update_meters(losses) 199 return losses 200 201 def eval(self, val_loader, test_num=10000): 202 203 pred_bboxes, pred_labels, pred_scores = list(), list(), list() 204 gt_bboxes, gt_labels, gt_difficults = list(), list(), list() 205 206 for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in enumerate(val_loader): 207 sizes = [sizes[0][0], sizes[1][0]] 208 pred_bboxes_, pred_labels_, pred_scores_ = self.faster_rcnn.predict(imgs, [sizes]) 209 gt_bboxes += list(gt_bboxes_.numpy()) 210 gt_labels += list(gt_labels_.numpy()) 211 gt_difficults += list(gt_difficults_.numpy()) 212 pred_bboxes += pred_bboxes_ 213 pred_labels += pred_labels_ 214 pred_scores += pred_scores_ 215 if ii == test_num: break 216 217 result = eval_detection_voc( 218 pred_bboxes, pred_labels, pred_scores, 219 gt_bboxes, gt_labels, gt_difficults, 220 use_07_metric=True) 221 return result 222 223 224 def load(self, path, load_optimizer=True, parse_opt=False, map_location=torch.device('cpu')): 225 state_dict = torch.load(path, map_location=map_location) 226 if 'model' in state_dict: 227 self.faster_rcnn.load_state_dict(state_dict['model']) 228 else: # legacy way, for backward compatibility 229 self.faster_rcnn.load_state_dict(state_dict) 230 return self 231 if parse_opt: 232 opt._parse(state_dict['config']) 233 if 'optimizer' in state_dict and load_optimizer: 234 self.optimizer.load_state_dict(state_dict['optimizer']) 235 return self 236 237 def save(self, save_optimizer=False, save_path=None, **kwargs): 238 """serialize models include optimizer and other info 239 return path where the model-file is stored. 240 241 Args: 242 save_optimizer (bool): whether save optimizer.state_dict(). 243 save_path (string): where to save model, if it's None, save_path 244 is generate using time str and info from kwargs. 245 246 Returns: 247 save_path(str): the path to save models. 248 """ 249 save_dict = dict() 250 251 save_dict['model'] = self.faster_rcnn.state_dict() 252 save_dict['config'] = opt._state_dict() 253 save_dict['other_info'] = kwargs 254 # save_dict['vis_info'] = self.vis.state_dict() 255 256 if save_optimizer: 257 save_dict['optimizer'] = self.optimizer.state_dict() 258 259 if save_path is None: 260 timestr = time.strftime('%m%d%H%M') 261 save_path = 'checkpoints/fasterrcnn_%s' % timestr 262 for k_, v_ in kwargs.items(): 263 save_path += '_%s' % v_ 264 265 save_dir = os.path.dirname(save_path) 266 if not os.path.exists(save_dir): 267 os.makedirs(save_dir) 268 269 torch.save(save_dict, save_path) 270 # self.vis.save([self.vis.env]) 271 return save_path 272 273 def update_meters(self, losses): 274 loss_d = {k: to_scalar(v) for k, v in losses._asdict().items()} 275 for key, meter in self.meters.items(): 276 meter.add(loss_d[key]) 277 278 def reset_meters(self): 279 for key, meter in self.meters.items(): 280 meter.reset() 281 self.roi_cm.reset() 282 self.rpn_cm.reset() 283 284 def get_meter_data(self): 285 return {k: v.value()[0] for k, v in self.meters.items()}
wrapper for conveniently training. return losses
The losses include:
rpn_loc_loss
: The localization loss for Region Proposal Network (RPN).rpn_cls_loss
: The classification loss for RPN.roi_loc_loss
: The localization loss for the head module.roi_cls_loss
: The classification loss for the head module.total_loss
: The sum of 4 loss above.
Args: faster_rcnn (model.FasterRCNN): A Faster R-CNN model that is going to be trained.
65 def __init__(self, device=torch.device('cpu')): 66 super(FasterRCNN, self).__init__() 67 68 self.device = device 69 70 self.faster_rcnn = FasterRCNNVGG16(device=device) 71 self.rpn_sigma = opt.rpn_sigma 72 self.roi_sigma = opt.roi_sigma 73 74 # target creator create gt_bbox gt_label etc as training targets. 75 self.anchor_target_creator = AnchorTargetCreator() 76 self.proposal_target_creator = ProposalTargetCreator() 77 78 self.loc_normalize_mean = self.faster_rcnn.loc_normalize_mean 79 self.loc_normalize_std = self.faster_rcnn.loc_normalize_std 80 81 self.optimizer = self.faster_rcnn.get_optimizer() 82 83 # indicators for training status 84 self.rpn_cm = ConfusionMeter(2) 85 self.roi_cm = ConfusionMeter(21) 86 self.meters = {k: AverageValueMeter() for k in LossTuple._fields} # average loss
Initializes internal Module state, shared by both nn.Module and ScriptModule.
91 def forward(self, imgs, bboxes, labels, scale): 92 """Forward Faster R-CNN and calculate losses. 93 94 Here are notations used. 95 96 * :math:`N` is the batch size. 97 * :math:`R` is the number of bounding boxes per image. 98 99 Currently, only :math:`N=1` is supported. 100 101 Args: 102 imgs (~torch.autograd.Variable): A variable with a batch of images. 103 bboxes (~torch.autograd.Variable): A batch of bounding boxes. 104 Its shape is :math:`(N, R, 4)`. 105 labels (~torch.autograd..Variable): A batch of labels. 106 Its shape is :math:`(N, R)`. The background is excluded from 107 the definition, which means that the range of the value 108 is :math:`[0, L - 1]`. :math:`L` is the number of foreground 109 classes. 110 scale (float): Amount of scaling applied to 111 the raw image during preprocessing. 112 113 Returns: 114 namedtuple of 5 losses 115 """ 116 n = bboxes.shape[0] 117 if n != 1: 118 raise ValueError('Currently only batch size 1 is supported.') 119 120 _, _, H, W = imgs.shape 121 img_size = (H, W) 122 123 features = self.faster_rcnn.extractor(imgs) 124 125 rpn_locs, rpn_scores, rois, roi_indices, anchor = \ 126 self.faster_rcnn.rpn(features, img_size, scale) 127 128 # Since batch size is one, convert variables to singular form 129 bbox = bboxes[0] 130 label = labels[0] 131 rpn_score = rpn_scores[0] 132 rpn_loc = rpn_locs[0] 133 roi = rois 134 135 # Sample RoIs and forward 136 # it's fine to break the computation graph of rois, 137 # consider them as constant input 138 sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( 139 roi, 140 to_numpy(bbox), 141 to_numpy(label), 142 self.loc_normalize_mean, 143 self.loc_normalize_std) 144 # NOTE it's all zero because now it only support for batch=1 now 145 sample_roi_index = torch.zeros(len(sample_roi)) 146 roi_cls_loc, roi_score = self.faster_rcnn.head( 147 features, 148 sample_roi, 149 sample_roi_index) 150 151 # ------------------ RPN losses -------------------# 152 gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( 153 to_numpy(bbox), 154 anchor, 155 img_size) 156 gt_rpn_label = to_tensor(gt_rpn_label, self.device).long() 157 gt_rpn_loc = to_tensor(gt_rpn_loc, self.device) 158 rpn_loc_loss = _fast_rcnn_loc_loss( 159 rpn_loc, 160 gt_rpn_loc, 161 gt_rpn_label.data, 162 self.rpn_sigma) 163 164 # NOTE: default value of ignore_index is -100 ... 165 rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.to(self.device), ignore_index=-1) 166 _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1] 167 _rpn_score = to_numpy(rpn_score)[to_numpy(gt_rpn_label) > -1] 168 self.rpn_cm.add(to_tensor(_rpn_score, torch.device("cpu")), _gt_rpn_label.data.long()) 169 170 # ------------------ ROI losses (fast rcnn loss) -------------------# 171 n_sample = roi_cls_loc.shape[0] 172 roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4) 173 roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().to(self.device), \ 174 to_tensor(gt_roi_label, self.device).long()] 175 gt_roi_label = to_tensor(gt_roi_label, self.device).long() 176 gt_roi_loc = to_tensor(gt_roi_loc, self.device) 177 178 roi_loc_loss = _fast_rcnn_loc_loss( 179 roi_loc.contiguous(), 180 gt_roi_loc, 181 gt_roi_label.data, 182 self.roi_sigma) 183 184 roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label) 185 186 self.roi_cm.add(to_tensor(roi_score, torch.device("cpu")), gt_roi_label.data.long()) 187 188 losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss] 189 losses = losses + [sum(losses)] 190 191 return LossTuple(*losses)
Forward Faster R-CNN and calculate losses.
Here are notations used.
- \( N \) is the batch size.
- \( R \) is the number of bounding boxes per image.
Currently, only \( N=1 \) is supported.
Args: imgs (~torch.autograd.Variable): A variable with a batch of images. bboxes (~torch.autograd.Variable): A batch of bounding boxes. Its shape is \( (N, R, 4) \). labels (~torch.autograd..Variable): A batch of labels. Its shape is \( (N, R) \). The background is excluded from the definition, which means that the range of the value is \( [0, L - 1] \). \( L \) is the number of foreground classes. scale (float): Amount of scaling applied to the raw image during preprocessing.
Returns: namedtuple of 5 losses
201 def eval(self, val_loader, test_num=10000): 202 203 pred_bboxes, pred_labels, pred_scores = list(), list(), list() 204 gt_bboxes, gt_labels, gt_difficults = list(), list(), list() 205 206 for ii, (imgs, sizes, gt_bboxes_, gt_labels_, gt_difficults_) in enumerate(val_loader): 207 sizes = [sizes[0][0], sizes[1][0]] 208 pred_bboxes_, pred_labels_, pred_scores_ = self.faster_rcnn.predict(imgs, [sizes]) 209 gt_bboxes += list(gt_bboxes_.numpy()) 210 gt_labels += list(gt_labels_.numpy()) 211 gt_difficults += list(gt_difficults_.numpy()) 212 pred_bboxes += pred_bboxes_ 213 pred_labels += pred_labels_ 214 pred_scores += pred_scores_ 215 if ii == test_num: break 216 217 result = eval_detection_voc( 218 pred_bboxes, pred_labels, pred_scores, 219 gt_bboxes, gt_labels, gt_difficults, 220 use_07_metric=True) 221 return result
Sets the module in evaluation mode.
This has any effect only on certain modules. See documentations of
particular modules for details of their behaviors in training/evaluation
mode, if they are affected, e.g. Dropout
, BatchNorm
,
etc.
This is equivalent with self.train(False) <torch.nn.Module.train>()
.
See :ref:locally-disable-grad-doc
for a comparison between
.eval()
and several similar mechanisms that may be confused with it.
Returns: Module: self
224 def load(self, path, load_optimizer=True, parse_opt=False, map_location=torch.device('cpu')): 225 state_dict = torch.load(path, map_location=map_location) 226 if 'model' in state_dict: 227 self.faster_rcnn.load_state_dict(state_dict['model']) 228 else: # legacy way, for backward compatibility 229 self.faster_rcnn.load_state_dict(state_dict) 230 return self 231 if parse_opt: 232 opt._parse(state_dict['config']) 233 if 'optimizer' in state_dict and load_optimizer: 234 self.optimizer.load_state_dict(state_dict['optimizer']) 235 return self
237 def save(self, save_optimizer=False, save_path=None, **kwargs): 238 """serialize models include optimizer and other info 239 return path where the model-file is stored. 240 241 Args: 242 save_optimizer (bool): whether save optimizer.state_dict(). 243 save_path (string): where to save model, if it's None, save_path 244 is generate using time str and info from kwargs. 245 246 Returns: 247 save_path(str): the path to save models. 248 """ 249 save_dict = dict() 250 251 save_dict['model'] = self.faster_rcnn.state_dict() 252 save_dict['config'] = opt._state_dict() 253 save_dict['other_info'] = kwargs 254 # save_dict['vis_info'] = self.vis.state_dict() 255 256 if save_optimizer: 257 save_dict['optimizer'] = self.optimizer.state_dict() 258 259 if save_path is None: 260 timestr = time.strftime('%m%d%H%M') 261 save_path = 'checkpoints/fasterrcnn_%s' % timestr 262 for k_, v_ in kwargs.items(): 263 save_path += '_%s' % v_ 264 265 save_dir = os.path.dirname(save_path) 266 if not os.path.exists(save_dir): 267 os.makedirs(save_dir) 268 269 torch.save(save_dict, save_path) 270 # self.vis.save([self.vis.env]) 271 return save_path
serialize models include optimizer and other info return path where the model-file is stored.
Args: save_optimizer (bool): whether save optimizer.state_dict(). save_path (string): where to save model, if it's None, save_path is generate using time str and info from kwargs.
Returns: save_path(str): the path to save models.
Inherited Members
- torch.nn.modules.module.Module
- dump_patches
- register_buffer
- register_parameter
- add_module
- register_module
- get_submodule
- get_parameter
- get_buffer
- get_extra_state
- set_extra_state
- apply
- cuda
- xpu
- cpu
- type
- float
- double
- half
- bfloat16
- to_empty
- to
- register_backward_hook
- register_full_backward_hook
- register_forward_pre_hook
- register_forward_hook
- state_dict
- load_state_dict
- parameters
- named_parameters
- buffers
- named_buffers
- children
- named_children
- modules
- named_modules
- train
- requires_grad_
- zero_grad
- extra_repr