what.examples.faster_rcnn_demo

 1import cv2
 2import torch
 3import numpy as np
 4
 5from what.models.detection.frcnn.faster_rcnn import FasterRCNN
 6# from what.models.detection.frcnn.datasets.util import read_image
 7
 8from what.cli.model import *
 9from what.utils.file import get_file
10
11from what.models.detection.utils.box_utils import draw_bounding_boxes
12from what.models.detection.datasets.voc import VOC_CLASS_NAMES
13
14def frcnn_inference_demo():
15
16    video = input(f"Please input the OpenCV capture device (e.g. 0, 1, 2): ")
17
18    while not video.isdigit():
19        video = input(f"Please input the OpenCV capture device (e.g. 0, 1, 2): ")
20
21    # Capture from camera
22    cap = cv2.VideoCapture(int(video))
23    #cap.set(3, 1920)
24    #cap.set(4, 1080)
25
26    # Download the model first if not exists
27    # Check what_model_list for all available models
28    index = 8
29    if not os.path.isfile(os.path.join(WHAT_MODEL_PATH, what_model_list[index][WHAT_MODEL_FILE_INDEX])):
30        get_file(what_model_list[index][WHAT_MODEL_FILE_INDEX],
31                 WHAT_MODEL_PATH,
32                 what_model_list[index][WHAT_MODEL_URL_INDEX],
33                 what_model_list[index][WHAT_MODEL_HASH_INDEX])
34
35    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
36
37    model = FasterRCNN(device=device)
38
39    model.load(os.path.join(WHAT_MODEL_PATH, what_model_list[index][WHAT_MODEL_FILE_INDEX]), map_location=device)
40
41    while True:
42        _, orig_image = cap.read()
43        if orig_image is None:
44            continue
45
46        # Image preprocessing
47        input_img = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
48        height, width, _ = input_img.shape
49
50        # NHWC -> NCHW
51        input_img = np.array(input_img).transpose((2, 0, 1))
52        input_img = torch.from_numpy(input_img)[None]
53
54        # img = read_image('notebooks/demo.jpg', format='NCHW')
55        # input_img = torch.from_numpy(img)[None]
56        # RGB --> BGR
57        # img = img.transpose((1, 2, 0)).astype(np.uint8)
58
59        inputs, boxes, labels, scores = model.predict(input_img)
60
61        # (x1, y1, x2, y2) --> (c1, c2, w, h) (0.0, 1.0)
62        boxes = np.array(boxes)[0]
63        box_w  = boxes[:, 2] - boxes[:, 0]
64        box_h = boxes[:, 3] - boxes[:, 1]
65        boxes[:, 0] += box_w / 2
66        boxes[:, 0] /= width
67        boxes[:, 1] += box_h / 2
68        boxes[:, 1] /= height
69        boxes[:, 2] = box_w / width
70        boxes[:, 3] = box_h / height
71
72        if len(boxes) > 0:
73            output = draw_bounding_boxes(orig_image,
74                    boxes,
75                    labels[0],
76                    VOC_CLASS_NAMES[1:],
77                    scores[0])
78
79        cv2.imshow('Faster RCNN Demo', output)
80
81        if cv2.waitKey(1) & 0xFF == ord('q'):
82            break
83
84    cap.release()
85    cv2.destroyAllWindows()
def frcnn_inference_demo():
15def frcnn_inference_demo():
16
17    video = input(f"Please input the OpenCV capture device (e.g. 0, 1, 2): ")
18
19    while not video.isdigit():
20        video = input(f"Please input the OpenCV capture device (e.g. 0, 1, 2): ")
21
22    # Capture from camera
23    cap = cv2.VideoCapture(int(video))
24    #cap.set(3, 1920)
25    #cap.set(4, 1080)
26
27    # Download the model first if not exists
28    # Check what_model_list for all available models
29    index = 8
30    if not os.path.isfile(os.path.join(WHAT_MODEL_PATH, what_model_list[index][WHAT_MODEL_FILE_INDEX])):
31        get_file(what_model_list[index][WHAT_MODEL_FILE_INDEX],
32                 WHAT_MODEL_PATH,
33                 what_model_list[index][WHAT_MODEL_URL_INDEX],
34                 what_model_list[index][WHAT_MODEL_HASH_INDEX])
35
36    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
37
38    model = FasterRCNN(device=device)
39
40    model.load(os.path.join(WHAT_MODEL_PATH, what_model_list[index][WHAT_MODEL_FILE_INDEX]), map_location=device)
41
42    while True:
43        _, orig_image = cap.read()
44        if orig_image is None:
45            continue
46
47        # Image preprocessing
48        input_img = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
49        height, width, _ = input_img.shape
50
51        # NHWC -> NCHW
52        input_img = np.array(input_img).transpose((2, 0, 1))
53        input_img = torch.from_numpy(input_img)[None]
54
55        # img = read_image('notebooks/demo.jpg', format='NCHW')
56        # input_img = torch.from_numpy(img)[None]
57        # RGB --> BGR
58        # img = img.transpose((1, 2, 0)).astype(np.uint8)
59
60        inputs, boxes, labels, scores = model.predict(input_img)
61
62        # (x1, y1, x2, y2) --> (c1, c2, w, h) (0.0, 1.0)
63        boxes = np.array(boxes)[0]
64        box_w  = boxes[:, 2] - boxes[:, 0]
65        box_h = boxes[:, 3] - boxes[:, 1]
66        boxes[:, 0] += box_w / 2
67        boxes[:, 0] /= width
68        boxes[:, 1] += box_h / 2
69        boxes[:, 1] /= height
70        boxes[:, 2] = box_w / width
71        boxes[:, 3] = box_h / height
72
73        if len(boxes) > 0:
74            output = draw_bounding_boxes(orig_image,
75                    boxes,
76                    labels[0],
77                    VOC_CLASS_NAMES[1:],
78                    scores[0])
79
80        cv2.imshow('Faster RCNN Demo', output)
81
82        if cv2.waitKey(1) & 0xFF == ord('q'):
83            break
84
85    cap.release()
86    cv2.destroyAllWindows()