thai thong commited on
Commit
839f10e
·
1 Parent(s): 5faf727

add deepsort algorithm and modify frontend app

Browse files
Files changed (45) hide show
  1. app.py +38 -43
  2. deep_sort_pytorch/.gitignore +13 -0
  3. deep_sort_pytorch/LICENSE +21 -0
  4. deep_sort_pytorch/README.md +137 -0
  5. deep_sort_pytorch/configs/deep_sort.yaml +10 -0
  6. deep_sort_pytorch/deep_sort/README.md +3 -0
  7. deep_sort_pytorch/deep_sort/__init__.py +21 -0
  8. deep_sort_pytorch/deep_sort/deep/__init__.py +0 -0
  9. deep_sort_pytorch/deep_sort/deep/checkpoint/.gitkeep +0 -0
  10. deep_sort_pytorch/deep_sort/deep/evaluate.py +13 -0
  11. deep_sort_pytorch/deep_sort/deep/feature_extractor.py +54 -0
  12. deep_sort_pytorch/deep_sort/deep/model.py +109 -0
  13. deep_sort_pytorch/deep_sort/deep/original_model.py +111 -0
  14. deep_sort_pytorch/deep_sort/deep/test.py +80 -0
  15. deep_sort_pytorch/deep_sort/deep/train.jpg +0 -0
  16. deep_sort_pytorch/deep_sort/deep/train.py +206 -0
  17. deep_sort_pytorch/deep_sort/deep_sort.py +113 -0
  18. deep_sort_pytorch/deep_sort/sort - Copy/__init__.py +0 -0
  19. deep_sort_pytorch/deep_sort/sort - Copy/iou_matching.py +82 -0
  20. deep_sort_pytorch/deep_sort/sort - Copy/kalman_filter.py +229 -0
  21. deep_sort_pytorch/deep_sort/sort - Copy/linear_assignment.py +192 -0
  22. deep_sort_pytorch/deep_sort/sort - Copy/nn_matching.py +176 -0
  23. deep_sort_pytorch/deep_sort/sort - Copy/preprocessing.py +73 -0
  24. deep_sort_pytorch/deep_sort/sort/__init__.py +0 -0
  25. deep_sort_pytorch/deep_sort/sort/detection.py +50 -0
  26. deep_sort_pytorch/deep_sort/sort/iou_matching.py +82 -0
  27. deep_sort_pytorch/deep_sort/sort/kalman_filter.py +229 -0
  28. deep_sort_pytorch/deep_sort/sort/linear_assignment.py +192 -0
  29. deep_sort_pytorch/deep_sort/sort/nn_matching.py +176 -0
  30. deep_sort_pytorch/deep_sort/sort/preprocessing.py +73 -0
  31. deep_sort_pytorch/deep_sort/sort/track.py +170 -0
  32. deep_sort_pytorch/deep_sort/sort/tracker.py +143 -0
  33. deep_sort_pytorch/utils/__init__.py +0 -0
  34. deep_sort_pytorch/utils/asserts.py +13 -0
  35. deep_sort_pytorch/utils/draw.py +36 -0
  36. deep_sort_pytorch/utils/evaluation.py +103 -0
  37. deep_sort_pytorch/utils/io.py +133 -0
  38. deep_sort_pytorch/utils/json_logger.py +383 -0
  39. deep_sort_pytorch/utils/log.py +17 -0
  40. deep_sort_pytorch/utils/parser.py +41 -0
  41. deep_sort_pytorch/utils/tools.py +39 -0
  42. detect.py +1 -0
  43. detect_deepsort.py +310 -0
  44. detect_strongsort.py +5 -4
  45. requirements.txt +1 -0
app.py CHANGED
@@ -1,29 +1,43 @@
1
  import spaces
2
  import gradio as gr
3
- from detect_strongsort import run
 
 
4
  import os
5
  import threading
6
 
7
  should_continue = True
8
 
 
9
  @spaces.GPU(duration=120)
10
- def yolov9_inference(model_id, image_size, conf_threshold, iou_threshold, img_path=None, vid_path=None):
11
  global should_continue
12
  img_extensions = ['.jpg', '.jpeg', '.png', '.gif'] # Add more image extensions if needed
13
  vid_extensions = ['.mp4', '.avi', '.mov', '.mkv'] # Add more video extensions if needed
14
-
 
 
 
15
  input_path = None
 
16
  if img_path is not None:
17
- _, img_extension = os.path.splitext(img_path)
18
- if img_extension.lower() in img_extensions:
19
- input_path = img_path
 
 
20
  elif vid_path is not None:
21
- _, vid_extension = os.path.splitext(vid_path)
22
- if vid_extension.lower() in vid_extensions:
23
- input_path = vid_path
24
-
25
- output_path = run(yolo_weights=model_id, imgsz=(image_size,image_size), conf_thres=conf_threshold, iou_thres=iou_threshold, source=input_path, device='0', strong_sort_weights = "osnet_x0_25_msmt17.pt", hide_conf= True)
26
- # Assuming output_path is the path to the output file
 
 
 
 
 
27
  _, output_extension = os.path.splitext(output_path)
28
  if output_extension.lower() in img_extensions:
29
  output_image = output_path # Load the image file here
@@ -34,11 +48,6 @@ def yolov9_inference(model_id, image_size, conf_threshold, iou_threshold, img_pa
34
 
35
  return output_image, output_video, output_path
36
 
37
- def inference(model_id, image_size, conf_threshold, iou_threshold, img_path=None, vid_path=None):
38
- global should_continue
39
- should_continue = True
40
- output_image, output_video, output_path = yolov9_inference(model_id, image_size, conf_threshold, iou_threshold, img_path, vid_path)
41
- return output_image, output_video, output_path
42
 
43
 
44
  def stop_processing():
@@ -60,29 +69,17 @@ def app():
60
  "best_model-converted.pt",
61
  "yolov9_e_trained.pt",
62
  ],
63
- value="./last_best_model.pt"
64
 
65
  )
66
- image_size = gr.Slider(
67
- label="Image Size",
68
- minimum=320,
69
- maximum=1280,
70
- step=32,
71
- value=640,
72
- )
73
- conf_threshold = gr.Slider(
74
- label="Confidence Threshold",
75
- minimum=0.1,
76
- maximum=1.0,
77
- step=0.1,
78
- value=0.4,
79
- )
80
- iou_threshold = gr.Slider(
81
- label="IoU Threshold",
82
- minimum=0.1,
83
- maximum=1.0,
84
- step=0.1,
85
- value=0.5,
86
  )
87
  yolov9_infer = gr.Button(value="Inference")
88
  stop_button = gr.Button(value="Stop")
@@ -93,14 +90,12 @@ def app():
93
  output_path = gr.Textbox(label="Output path")
94
 
95
  yolov9_infer.click(
96
- fn=inference,
97
  inputs=[
98
  model_id,
99
- image_size,
100
- conf_threshold,
101
- iou_threshold,
102
  img_path,
103
- vid_path
 
104
  ],
105
  outputs=[output_image, output_video, output_path],
106
  )
 
1
  import spaces
2
  import gradio as gr
3
+ from detect_deepsort import run_deepsort
4
+ from detect_strongsort import run_strongsort
5
+ from detect import run
6
  import os
7
  import threading
8
 
9
  should_continue = True
10
 
11
+
12
  @spaces.GPU(duration=120)
13
+ def yolov9_inference(model_id, img_path=None, vid_path=None, tracking_algorithm = None):
14
  global should_continue
15
  img_extensions = ['.jpg', '.jpeg', '.png', '.gif'] # Add more image extensions if needed
16
  vid_extensions = ['.mp4', '.avi', '.mov', '.mkv'] # Add more video extensions if needed
17
+ #assert img_path is not None or vid_path is not None, "Either img_path or vid_path must be provided."
18
+ image_size = 640
19
+ conf_threshold = 0.5
20
+ iou_threshold = 0.5
21
  input_path = None
22
+ output_path = None
23
  if img_path is not None:
24
+ #_, img_extension = os.path.splitext(img_path)
25
+ #if img_extension.lower() in img_extensions:
26
+ input_path = img_path
27
+ print(input_path)
28
+ output_path = run(weights=model_id, imgsz=(image_size,image_size), conf_thres=conf_threshold, iou_thres=iou_threshold, source=input_path, device='cpu', hide_conf= True)
29
  elif vid_path is not None:
30
+ #_, vid_extension = os.path.splitext(vid_path)
31
+ #if vid_extension.lower() in vid_extensions:
32
+ input_path = vid_path
33
+ print(input_path)
34
+ if tracking_algorithm == 'deep_sort':
35
+ output_path = run_deepsort(weights=model_id, imgsz=(image_size,image_size), conf_thres=conf_threshold, iou_thres=iou_threshold, source=input_path, device='cpu', draw_trails=True)
36
+ elif tracking_algorithm == 'strong_sort':
37
+ output_path = run_strongsort(yolo_weights=model_id, imgsz=(image_size,image_size), conf_thres=conf_threshold, iou_thres=iou_threshold, source=input_path, device='0', strong_sort_weights = "osnet_x0_25_msmt17.pt", hide_conf= True)
38
+ else:
39
+ output_path = run(weights=model_id, imgsz=(image_size,image_size), conf_thres=conf_threshold, iou_thres=iou_threshold, source=input_path, device='cpu', hide_conf= True)
40
+ # Assuming output_path is the path to the output file
41
  _, output_extension = os.path.splitext(output_path)
42
  if output_extension.lower() in img_extensions:
43
  output_image = output_path # Load the image file here
 
48
 
49
  return output_image, output_video, output_path
50
 
 
 
 
 
 
51
 
52
 
53
  def stop_processing():
 
69
  "best_model-converted.pt",
70
  "yolov9_e_trained.pt",
71
  ],
72
+ value="last_best_model.pt"
73
 
74
  )
75
+ tracking_algorithm = gr.Dropdown(
76
+ label= "Tracking Algorithm",
77
+ choices=[
78
+ "None",
79
+ "deep_sort",
80
+ "strong_sort"
81
+ ],
82
+ value="None"
 
 
 
 
 
 
 
 
 
 
 
 
83
  )
84
  yolov9_infer = gr.Button(value="Inference")
85
  stop_button = gr.Button(value="Stop")
 
90
  output_path = gr.Textbox(label="Output path")
91
 
92
  yolov9_infer.click(
93
+ fn=yolov9_inference,
94
  inputs=[
95
  model_id,
 
 
 
96
  img_path,
97
+ vid_path,
98
+ tracking_algorithm
99
  ],
100
  outputs=[output_image, output_video, output_path],
101
  )
deep_sort_pytorch/.gitignore ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Folders
2
+ __pycache__/
3
+ build/
4
+ *.egg-info
5
+
6
+
7
+ # Files
8
+ *.weights
9
+ *.t7
10
+ *.mp4
11
+ *.avi
12
+ *.so
13
+ *.txt
deep_sort_pytorch/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2020 Ziqiang
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
deep_sort_pytorch/README.md ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Deep Sort with PyTorch
2
+
3
+ ![](demo/demo.gif)
4
+
5
+ ## Update(1-1-2020)
6
+ Changes
7
+ - fix bugs
8
+ - refactor code
9
+ - accerate detection by adding nms on gpu
10
+
11
+ ## Latest Update(07-22)
12
+ Changes
13
+ - bug fix (Thanks @JieChen91 and @yingsen1 for bug reporting).
14
+ - using batch for feature extracting for each frame, which lead to a small speed up.
15
+ - code improvement.
16
+
17
+ Futher improvement direction
18
+ - Train detector on specific dataset rather than the official one.
19
+ - Retrain REID model on pedestrain dataset for better performance.
20
+ - Replace YOLOv3 detector with advanced ones.
21
+
22
+ **Any contributions to this repository is welcome!**
23
+
24
+
25
+ ## Introduction
26
+ This is an implement of MOT tracking algorithm deep sort. Deep sort is basicly the same with sort but added a CNN model to extract features in image of human part bounded by a detector. This CNN model is indeed a RE-ID model and the detector used in [PAPER](https://arxiv.org/abs/1703.07402) is FasterRCNN , and the original source code is [HERE](https://github.com/nwojke/deep_sort).
27
+ However in original code, the CNN model is implemented with tensorflow, which I'm not familier with. SO I re-implemented the CNN feature extraction model with PyTorch, and changed the CNN model a little bit. Also, I use **YOLOv3** to generate bboxes instead of FasterRCNN.
28
+
29
+ ## Dependencies
30
+ - python 3 (python2 not sure)
31
+ - numpy
32
+ - scipy
33
+ - opencv-python
34
+ - sklearn
35
+ - torch >= 0.4
36
+ - torchvision >= 0.1
37
+ - pillow
38
+ - vizer
39
+ - edict
40
+
41
+ ## Quick Start
42
+ 0. Check all dependencies installed
43
+ ```bash
44
+ pip install -r requirements.txt
45
+ ```
46
+ for user in china, you can specify pypi source to accelerate install like:
47
+ ```bash
48
+ pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
49
+ ```
50
+
51
+ 1. Clone this repository
52
+ ```
53
+ git clone git@github.com:ZQPei/deep_sort_pytorch.git
54
+ ```
55
+
56
+ 2. Download YOLOv3 parameters
57
+ ```
58
+ cd detector/YOLOv3/weight/
59
+ wget https://pjreddie.com/media/files/yolov3.weights
60
+ wget https://pjreddie.com/media/files/yolov3-tiny.weights
61
+ cd ../../../
62
+ ```
63
+
64
+ 3. Download deepsort parameters ckpt.t7
65
+ ```
66
+ cd deep_sort/deep/checkpoint
67
+ # download ckpt.t7 from
68
+ https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6 to this folder
69
+ cd ../../../
70
+ ```
71
+
72
+ 4. Compile nms module
73
+ ```bash
74
+ cd detector/YOLOv3/nms
75
+ sh build.sh
76
+ cd ../../..
77
+ ```
78
+
79
+ Notice:
80
+ If compiling failed, the simplist way is to **Upgrade your pytorch >= 1.1 and torchvision >= 0.3" and you can avoid the troublesome compiling problems which are most likely caused by either `gcc version too low` or `libraries missing`.
81
+
82
+ 5. Run demo
83
+ ```
84
+ usage: python yolov3_deepsort.py VIDEO_PATH
85
+ [--help]
86
+ [--frame_interval FRAME_INTERVAL]
87
+ [--config_detection CONFIG_DETECTION]
88
+ [--config_deepsort CONFIG_DEEPSORT]
89
+ [--display]
90
+ [--display_width DISPLAY_WIDTH]
91
+ [--display_height DISPLAY_HEIGHT]
92
+ [--save_path SAVE_PATH]
93
+ [--cpu]
94
+
95
+ # yolov3 + deepsort
96
+ python yolov3_deepsort.py [VIDEO_PATH]
97
+
98
+ # yolov3_tiny + deepsort
99
+ python yolov3_deepsort.py [VIDEO_PATH] --config_detection ./configs/yolov3_tiny.yaml
100
+
101
+ # yolov3 + deepsort on webcam
102
+ python3 yolov3_deepsort.py /dev/video0 --camera 0
103
+
104
+ # yolov3_tiny + deepsort on webcam
105
+ python3 yolov3_deepsort.py /dev/video0 --config_detection ./configs/yolov3_tiny.yaml --camera 0
106
+ ```
107
+ Use `--display` to enable display.
108
+ Results will be saved to `./output/results.avi` and `./output/results.txt`.
109
+
110
+ All files above can also be accessed from BaiduDisk!
111
+ linker:[BaiduDisk](https://pan.baidu.com/s/1YJ1iPpdFTlUyLFoonYvozg)
112
+ passwd:fbuw
113
+
114
+ ## Training the RE-ID model
115
+ The original model used in paper is in original_model.py, and its parameter here [original_ckpt.t7](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6).
116
+
117
+ To train the model, first you need download [Market1501](http://www.liangzheng.com.cn/Project/project_reid.html) dataset or [Mars](http://www.liangzheng.com.cn/Project/project_mars.html) dataset.
118
+
119
+ Then you can try [train.py](deep_sort/deep/train.py) to train your own parameter and evaluate it using [test.py](deep_sort/deep/test.py) and [evaluate.py](deep_sort/deep/evalute.py).
120
+ ![train.jpg](deep_sort/deep/train.jpg)
121
+
122
+ ## Demo videos and images
123
+ [demo.avi](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6)
124
+ [demo2.avi](https://drive.google.com/drive/folders/1xhG0kRH1EX5B9_Iz8gQJb7UNnn_riXi6)
125
+
126
+ ![1.jpg](demo/1.jpg)
127
+ ![2.jpg](demo/2.jpg)
128
+
129
+
130
+ ## References
131
+ - paper: [Simple Online and Realtime Tracking with a Deep Association Metric](https://arxiv.org/abs/1703.07402)
132
+
133
+ - code: [nwojke/deep_sort](https://github.com/nwojke/deep_sort)
134
+
135
+ - paper: [YOLOv3](https://pjreddie.com/media/files/papers/YOLOv3.pdf)
136
+
137
+ - code: [Joseph Redmon/yolov3](https://pjreddie.com/darknet/yolo/)
deep_sort_pytorch/configs/deep_sort.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ DEEPSORT:
2
+ REID_CKPT: "deep_sort_pytorch/deep_sort/deep/checkpoint/ckpt.t7"
3
+ MAX_DIST: 0.2
4
+ MIN_CONFIDENCE: 0.3
5
+ NMS_MAX_OVERLAP: 0.5
6
+ MAX_IOU_DISTANCE: 0.7
7
+ MAX_AGE: 70
8
+ N_INIT: 3
9
+ NN_BUDGET: 100
10
+
deep_sort_pytorch/deep_sort/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Deep Sort
2
+
3
+ This is the implemention of deep sort with pytorch.
deep_sort_pytorch/deep_sort/__init__.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .deep_sort import DeepSort
2
+
3
+
4
+ __all__ = ['DeepSort', 'build_tracker']
5
+
6
+
7
+ def build_tracker(cfg, use_cuda):
8
+ return DeepSort(cfg.DEEPSORT.REID_CKPT,
9
+ max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
10
+ nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
11
+ max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=use_cuda)
12
+
13
+
14
+
15
+
16
+
17
+
18
+
19
+
20
+
21
+
deep_sort_pytorch/deep_sort/deep/__init__.py ADDED
File without changes
deep_sort_pytorch/deep_sort/deep/checkpoint/.gitkeep ADDED
File without changes
deep_sort_pytorch/deep_sort/deep/evaluate.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ features = torch.load("features.pth")
4
+ qf = features["qf"]
5
+ ql = features["ql"]
6
+ gf = features["gf"]
7
+ gl = features["gl"]
8
+
9
+ scores = qf.mm(gf.t())
10
+ res = scores.topk(5, dim=1)[1][:, 0]
11
+ top1correct = gl[res].eq(ql).sum().item()
12
+
13
+ print("Acc top1:{:.3f}".format(top1correct / ql.size(0)))
deep_sort_pytorch/deep_sort/deep/feature_extractor.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchvision.transforms as transforms
3
+ import numpy as np
4
+ import cv2
5
+ import logging
6
+
7
+ from .model import Net
8
+
9
+
10
+ class Extractor(object):
11
+ def __init__(self, model_path, use_cuda=True):
12
+ self.net = Net(reid=True)
13
+ self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
14
+ state_dict = torch.load(model_path, map_location=torch.device(self.device))[
15
+ 'net_dict']
16
+ self.net.load_state_dict(state_dict)
17
+ logger = logging.getLogger("root.tracker")
18
+ logger.info("Loading weights from {}... Done!".format(model_path))
19
+ self.net.to(self.device)
20
+ self.size = (64, 128)
21
+ self.norm = transforms.Compose([
22
+ transforms.ToTensor(),
23
+ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
24
+ ])
25
+
26
+ def _preprocess(self, im_crops):
27
+ """
28
+ TODO:
29
+ 1. to float with scale from 0 to 1
30
+ 2. resize to (64, 128) as Market1501 dataset did
31
+ 3. concatenate to a numpy array
32
+ 3. to torch Tensor
33
+ 4. normalize
34
+ """
35
+ def _resize(im, size):
36
+ return cv2.resize(im.astype(np.float32)/255., size)
37
+
38
+ im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze(
39
+ 0) for im in im_crops], dim=0).float()
40
+ return im_batch
41
+
42
+ def __call__(self, im_crops):
43
+ im_batch = self._preprocess(im_crops)
44
+ with torch.no_grad():
45
+ im_batch = im_batch.to(self.device)
46
+ features = self.net(im_batch)
47
+ return features.cpu().numpy()
48
+
49
+
50
+ if __name__ == '__main__':
51
+ img = cv2.imread("demo.jpg")[:, :, (2, 1, 0)]
52
+ extr = Extractor("checkpoint/ckpt.t7")
53
+ feature = extr(img)
54
+ print(feature.shape)
deep_sort_pytorch/deep_sort/deep/model.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+
6
+ class BasicBlock(nn.Module):
7
+ def __init__(self, c_in, c_out, is_downsample=False):
8
+ super(BasicBlock, self).__init__()
9
+ self.is_downsample = is_downsample
10
+ if is_downsample:
11
+ self.conv1 = nn.Conv2d(
12
+ c_in, c_out, 3, stride=2, padding=1, bias=False)
13
+ else:
14
+ self.conv1 = nn.Conv2d(
15
+ c_in, c_out, 3, stride=1, padding=1, bias=False)
16
+ self.bn1 = nn.BatchNorm2d(c_out)
17
+ self.relu = nn.ReLU(True)
18
+ self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1,
19
+ padding=1, bias=False)
20
+ self.bn2 = nn.BatchNorm2d(c_out)
21
+ if is_downsample:
22
+ self.downsample = nn.Sequential(
23
+ nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
24
+ nn.BatchNorm2d(c_out)
25
+ )
26
+ elif c_in != c_out:
27
+ self.downsample = nn.Sequential(
28
+ nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
29
+ nn.BatchNorm2d(c_out)
30
+ )
31
+ self.is_downsample = True
32
+
33
+ def forward(self, x):
34
+ y = self.conv1(x)
35
+ y = self.bn1(y)
36
+ y = self.relu(y)
37
+ y = self.conv2(y)
38
+ y = self.bn2(y)
39
+ if self.is_downsample:
40
+ x = self.downsample(x)
41
+ return F.relu(x.add(y), True)
42
+
43
+
44
+ def make_layers(c_in, c_out, repeat_times, is_downsample=False):
45
+ blocks = []
46
+ for i in range(repeat_times):
47
+ if i == 0:
48
+ blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ]
49
+ else:
50
+ blocks += [BasicBlock(c_out, c_out), ]
51
+ return nn.Sequential(*blocks)
52
+
53
+
54
+ class Net(nn.Module):
55
+ def __init__(self, num_classes=751, reid=False):
56
+ super(Net, self).__init__()
57
+ # 3 128 64
58
+ self.conv = nn.Sequential(
59
+ nn.Conv2d(3, 64, 3, stride=1, padding=1),
60
+ nn.BatchNorm2d(64),
61
+ nn.ReLU(inplace=True),
62
+ # nn.Conv2d(32,32,3,stride=1,padding=1),
63
+ # nn.BatchNorm2d(32),
64
+ # nn.ReLU(inplace=True),
65
+ nn.MaxPool2d(3, 2, padding=1),
66
+ )
67
+ # 32 64 32
68
+ self.layer1 = make_layers(64, 64, 2, False)
69
+ # 32 64 32
70
+ self.layer2 = make_layers(64, 128, 2, True)
71
+ # 64 32 16
72
+ self.layer3 = make_layers(128, 256, 2, True)
73
+ # 128 16 8
74
+ self.layer4 = make_layers(256, 512, 2, True)
75
+ # 256 8 4
76
+ self.avgpool = nn.AvgPool2d((8, 4), 1)
77
+ # 256 1 1
78
+ self.reid = reid
79
+ self.classifier = nn.Sequential(
80
+ nn.Linear(512, 256),
81
+ nn.BatchNorm1d(256),
82
+ nn.ReLU(inplace=True),
83
+ nn.Dropout(),
84
+ nn.Linear(256, num_classes),
85
+ )
86
+
87
+ def forward(self, x):
88
+ x = self.conv(x)
89
+ x = self.layer1(x)
90
+ x = self.layer2(x)
91
+ x = self.layer3(x)
92
+ x = self.layer4(x)
93
+ x = self.avgpool(x)
94
+ x = x.view(x.size(0), -1)
95
+ # B x 128
96
+ if self.reid:
97
+ x = x.div(x.norm(p=2, dim=1, keepdim=True))
98
+ return x
99
+ # classifier
100
+ x = self.classifier(x)
101
+ return x
102
+
103
+
104
+ if __name__ == '__main__':
105
+ net = Net()
106
+ x = torch.randn(4, 3, 128, 64)
107
+ y = net(x)
108
+ import ipdb
109
+ ipdb.set_trace()
deep_sort_pytorch/deep_sort/deep/original_model.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+
5
+
6
+ class BasicBlock(nn.Module):
7
+ def __init__(self, c_in, c_out, is_downsample=False):
8
+ super(BasicBlock, self).__init__()
9
+ self.is_downsample = is_downsample
10
+ if is_downsample:
11
+ self.conv1 = nn.Conv2d(
12
+ c_in, c_out, 3, stride=2, padding=1, bias=False)
13
+ else:
14
+ self.conv1 = nn.Conv2d(
15
+ c_in, c_out, 3, stride=1, padding=1, bias=False)
16
+ self.bn1 = nn.BatchNorm2d(c_out)
17
+ self.relu = nn.ReLU(True)
18
+ self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1,
19
+ padding=1, bias=False)
20
+ self.bn2 = nn.BatchNorm2d(c_out)
21
+ if is_downsample:
22
+ self.downsample = nn.Sequential(
23
+ nn.Conv2d(c_in, c_out, 1, stride=2, bias=False),
24
+ nn.BatchNorm2d(c_out)
25
+ )
26
+ elif c_in != c_out:
27
+ self.downsample = nn.Sequential(
28
+ nn.Conv2d(c_in, c_out, 1, stride=1, bias=False),
29
+ nn.BatchNorm2d(c_out)
30
+ )
31
+ self.is_downsample = True
32
+
33
+ def forward(self, x):
34
+ y = self.conv1(x)
35
+ y = self.bn1(y)
36
+ y = self.relu(y)
37
+ y = self.conv2(y)
38
+ y = self.bn2(y)
39
+ if self.is_downsample:
40
+ x = self.downsample(x)
41
+ return F.relu(x.add(y), True)
42
+
43
+
44
+ def make_layers(c_in, c_out, repeat_times, is_downsample=False):
45
+ blocks = []
46
+ for i in range(repeat_times):
47
+ if i == 0:
48
+ blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ]
49
+ else:
50
+ blocks += [BasicBlock(c_out, c_out), ]
51
+ return nn.Sequential(*blocks)
52
+
53
+
54
+ class Net(nn.Module):
55
+ def __init__(self, num_classes=625, reid=False):
56
+ super(Net, self).__init__()
57
+ # 3 128 64
58
+ self.conv = nn.Sequential(
59
+ nn.Conv2d(3, 32, 3, stride=1, padding=1),
60
+ nn.BatchNorm2d(32),
61
+ nn.ELU(inplace=True),
62
+ nn.Conv2d(32, 32, 3, stride=1, padding=1),
63
+ nn.BatchNorm2d(32),
64
+ nn.ELU(inplace=True),
65
+ nn.MaxPool2d(3, 2, padding=1),
66
+ )
67
+ # 32 64 32
68
+ self.layer1 = make_layers(32, 32, 2, False)
69
+ # 32 64 32
70
+ self.layer2 = make_layers(32, 64, 2, True)
71
+ # 64 32 16
72
+ self.layer3 = make_layers(64, 128, 2, True)
73
+ # 128 16 8
74
+ self.dense = nn.Sequential(
75
+ nn.Dropout(p=0.6),
76
+ nn.Linear(128*16*8, 128),
77
+ nn.BatchNorm1d(128),
78
+ nn.ELU(inplace=True)
79
+ )
80
+ # 256 1 1
81
+ self.reid = reid
82
+ self.batch_norm = nn.BatchNorm1d(128)
83
+ self.classifier = nn.Sequential(
84
+ nn.Linear(128, num_classes),
85
+ )
86
+
87
+ def forward(self, x):
88
+ x = self.conv(x)
89
+ x = self.layer1(x)
90
+ x = self.layer2(x)
91
+ x = self.layer3(x)
92
+
93
+ x = x.view(x.size(0), -1)
94
+ if self.reid:
95
+ x = self.dense[0](x)
96
+ x = self.dense[1](x)
97
+ x = x.div(x.norm(p=2, dim=1, keepdim=True))
98
+ return x
99
+ x = self.dense(x)
100
+ # B x 128
101
+ # classifier
102
+ x = self.classifier(x)
103
+ return x
104
+
105
+
106
+ if __name__ == '__main__':
107
+ net = Net(reid=True)
108
+ x = torch.randn(4, 3, 128, 64)
109
+ y = net(x)
110
+ import ipdb
111
+ ipdb.set_trace()
deep_sort_pytorch/deep_sort/deep/test.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.backends.cudnn as cudnn
3
+ import torchvision
4
+
5
+ import argparse
6
+ import os
7
+
8
+ from model import Net
9
+
10
+ parser = argparse.ArgumentParser(description="Train on market1501")
11
+ parser.add_argument("--data-dir", default='data', type=str)
12
+ parser.add_argument("--no-cuda", action="store_true")
13
+ parser.add_argument("--gpu-id", default=0, type=int)
14
+ args = parser.parse_args()
15
+
16
+ # device
17
+ device = "cuda:{}".format(
18
+ args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
19
+ if torch.cuda.is_available() and not args.no_cuda:
20
+ cudnn.benchmark = True
21
+
22
+ # data loader
23
+ root = args.data_dir
24
+ query_dir = os.path.join(root, "query")
25
+ gallery_dir = os.path.join(root, "gallery")
26
+ transform = torchvision.transforms.Compose([
27
+ torchvision.transforms.Resize((128, 64)),
28
+ torchvision.transforms.ToTensor(),
29
+ torchvision.transforms.Normalize(
30
+ [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
31
+ ])
32
+ queryloader = torch.utils.data.DataLoader(
33
+ torchvision.datasets.ImageFolder(query_dir, transform=transform),
34
+ batch_size=64, shuffle=False
35
+ )
36
+ galleryloader = torch.utils.data.DataLoader(
37
+ torchvision.datasets.ImageFolder(gallery_dir, transform=transform),
38
+ batch_size=64, shuffle=False
39
+ )
40
+
41
+ # net definition
42
+ net = Net(reid=True)
43
+ assert os.path.isfile(
44
+ "./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
45
+ print('Loading from checkpoint/ckpt.t7')
46
+ checkpoint = torch.load("./checkpoint/ckpt.t7")
47
+ net_dict = checkpoint['net_dict']
48
+ net.load_state_dict(net_dict, strict=False)
49
+ net.eval()
50
+ net.to(device)
51
+
52
+ # compute features
53
+ query_features = torch.tensor([]).float()
54
+ query_labels = torch.tensor([]).long()
55
+ gallery_features = torch.tensor([]).float()
56
+ gallery_labels = torch.tensor([]).long()
57
+
58
+ with torch.no_grad():
59
+ for idx, (inputs, labels) in enumerate(queryloader):
60
+ inputs = inputs.to(device)
61
+ features = net(inputs).cpu()
62
+ query_features = torch.cat((query_features, features), dim=0)
63
+ query_labels = torch.cat((query_labels, labels))
64
+
65
+ for idx, (inputs, labels) in enumerate(galleryloader):
66
+ inputs = inputs.to(device)
67
+ features = net(inputs).cpu()
68
+ gallery_features = torch.cat((gallery_features, features), dim=0)
69
+ gallery_labels = torch.cat((gallery_labels, labels))
70
+
71
+ gallery_labels -= 2
72
+
73
+ # save features
74
+ features = {
75
+ "qf": query_features,
76
+ "ql": query_labels,
77
+ "gf": gallery_features,
78
+ "gl": gallery_labels
79
+ }
80
+ torch.save(features, "features.pth")
deep_sort_pytorch/deep_sort/deep/train.jpg ADDED
deep_sort_pytorch/deep_sort/deep/train.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import time
4
+
5
+ import numpy as np
6
+ import matplotlib.pyplot as plt
7
+ import torch
8
+ import torch.backends.cudnn as cudnn
9
+ import torchvision
10
+
11
+ from model import Net
12
+
13
+ parser = argparse.ArgumentParser(description="Train on market1501")
14
+ parser.add_argument("--data-dir", default='data', type=str)
15
+ parser.add_argument("--no-cuda", action="store_true")
16
+ parser.add_argument("--gpu-id", default=0, type=int)
17
+ parser.add_argument("--lr", default=0.1, type=float)
18
+ parser.add_argument("--interval", '-i', default=20, type=int)
19
+ parser.add_argument('--resume', '-r', action='store_true')
20
+ args = parser.parse_args()
21
+
22
+ # device
23
+ device = "cuda:{}".format(
24
+ args.gpu_id) if torch.cuda.is_available() and not args.no_cuda else "cpu"
25
+ if torch.cuda.is_available() and not args.no_cuda:
26
+ cudnn.benchmark = True
27
+
28
+ # data loading
29
+ root = args.data_dir
30
+ train_dir = os.path.join(root, "train")
31
+ test_dir = os.path.join(root, "test")
32
+ transform_train = torchvision.transforms.Compose([
33
+ torchvision.transforms.RandomCrop((128, 64), padding=4),
34
+ torchvision.transforms.RandomHorizontalFlip(),
35
+ torchvision.transforms.ToTensor(),
36
+ torchvision.transforms.Normalize(
37
+ [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
38
+ ])
39
+ transform_test = torchvision.transforms.Compose([
40
+ torchvision.transforms.Resize((128, 64)),
41
+ torchvision.transforms.ToTensor(),
42
+ torchvision.transforms.Normalize(
43
+ [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
44
+ ])
45
+ trainloader = torch.utils.data.DataLoader(
46
+ torchvision.datasets.ImageFolder(train_dir, transform=transform_train),
47
+ batch_size=64, shuffle=True
48
+ )
49
+ testloader = torch.utils.data.DataLoader(
50
+ torchvision.datasets.ImageFolder(test_dir, transform=transform_test),
51
+ batch_size=64, shuffle=True
52
+ )
53
+ num_classes = max(len(trainloader.dataset.classes),
54
+ len(testloader.dataset.classes))
55
+
56
+ # net definition
57
+ start_epoch = 0
58
+ net = Net(num_classes=num_classes)
59
+ if args.resume:
60
+ assert os.path.isfile(
61
+ "./checkpoint/ckpt.t7"), "Error: no checkpoint file found!"
62
+ print('Loading from checkpoint/ckpt.t7')
63
+ checkpoint = torch.load("./checkpoint/ckpt.t7")
64
+ # import ipdb; ipdb.set_trace()
65
+ net_dict = checkpoint['net_dict']
66
+ net.load_state_dict(net_dict)
67
+ best_acc = checkpoint['acc']
68
+ start_epoch = checkpoint['epoch']
69
+ net.to(device)
70
+
71
+ # loss and optimizer
72
+ criterion = torch.nn.CrossEntropyLoss()
73
+ optimizer = torch.optim.SGD(
74
+ net.parameters(), args.lr, momentum=0.9, weight_decay=5e-4)
75
+ best_acc = 0.
76
+
77
+ # train function for each epoch
78
+
79
+
80
+ def train(epoch):
81
+ print("\nEpoch : %d" % (epoch+1))
82
+ net.train()
83
+ training_loss = 0.
84
+ train_loss = 0.
85
+ correct = 0
86
+ total = 0
87
+ interval = args.interval
88
+ start = time.time()
89
+ for idx, (inputs, labels) in enumerate(trainloader):
90
+ # forward
91
+ inputs, labels = inputs.to(device), labels.to(device)
92
+ outputs = net(inputs)
93
+ loss = criterion(outputs, labels)
94
+
95
+ # backward
96
+ optimizer.zero_grad()
97
+ loss.backward()
98
+ optimizer.step()
99
+
100
+ # accumurating
101
+ training_loss += loss.item()
102
+ train_loss += loss.item()
103
+ correct += outputs.max(dim=1)[1].eq(labels).sum().item()
104
+ total += labels.size(0)
105
+
106
+ # print
107
+ if (idx+1) % interval == 0:
108
+ end = time.time()
109
+ print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(
110
+ 100.*(idx+1)/len(trainloader), end-start, training_loss /
111
+ interval, correct, total, 100.*correct/total
112
+ ))
113
+ training_loss = 0.
114
+ start = time.time()
115
+
116
+ return train_loss/len(trainloader), 1. - correct/total
117
+
118
+
119
+ def test(epoch):
120
+ global best_acc
121
+ net.eval()
122
+ test_loss = 0.
123
+ correct = 0
124
+ total = 0
125
+ start = time.time()
126
+ with torch.no_grad():
127
+ for idx, (inputs, labels) in enumerate(testloader):
128
+ inputs, labels = inputs.to(device), labels.to(device)
129
+ outputs = net(inputs)
130
+ loss = criterion(outputs, labels)
131
+
132
+ test_loss += loss.item()
133
+ correct += outputs.max(dim=1)[1].eq(labels).sum().item()
134
+ total += labels.size(0)
135
+
136
+ print("Testing ...")
137
+ end = time.time()
138
+ print("[progress:{:.1f}%]time:{:.2f}s Loss:{:.5f} Correct:{}/{} Acc:{:.3f}%".format(
139
+ 100.*(idx+1)/len(testloader), end-start, test_loss /
140
+ len(testloader), correct, total, 100.*correct/total
141
+ ))
142
+
143
+ # saving checkpoint
144
+ acc = 100.*correct/total
145
+ if acc > best_acc:
146
+ best_acc = acc
147
+ print("Saving parameters to checkpoint/ckpt.t7")
148
+ checkpoint = {
149
+ 'net_dict': net.state_dict(),
150
+ 'acc': acc,
151
+ 'epoch': epoch,
152
+ }
153
+ if not os.path.isdir('checkpoint'):
154
+ os.mkdir('checkpoint')
155
+ torch.save(checkpoint, './checkpoint/ckpt.t7')
156
+
157
+ return test_loss/len(testloader), 1. - correct/total
158
+
159
+
160
+ # plot figure
161
+ x_epoch = []
162
+ record = {'train_loss': [], 'train_err': [], 'test_loss': [], 'test_err': []}
163
+ fig = plt.figure()
164
+ ax0 = fig.add_subplot(121, title="loss")
165
+ ax1 = fig.add_subplot(122, title="top1err")
166
+
167
+
168
+ def draw_curve(epoch, train_loss, train_err, test_loss, test_err):
169
+ global record
170
+ record['train_loss'].append(train_loss)
171
+ record['train_err'].append(train_err)
172
+ record['test_loss'].append(test_loss)
173
+ record['test_err'].append(test_err)
174
+
175
+ x_epoch.append(epoch)
176
+ ax0.plot(x_epoch, record['train_loss'], 'bo-', label='train')
177
+ ax0.plot(x_epoch, record['test_loss'], 'ro-', label='val')
178
+ ax1.plot(x_epoch, record['train_err'], 'bo-', label='train')
179
+ ax1.plot(x_epoch, record['test_err'], 'ro-', label='val')
180
+ if epoch == 0:
181
+ ax0.legend()
182
+ ax1.legend()
183
+ fig.savefig("train.jpg")
184
+
185
+ # lr decay
186
+
187
+
188
+ def lr_decay():
189
+ global optimizer
190
+ for params in optimizer.param_groups:
191
+ params['lr'] *= 0.1
192
+ lr = params['lr']
193
+ print("Learning rate adjusted to {}".format(lr))
194
+
195
+
196
+ def main():
197
+ for epoch in range(start_epoch, start_epoch+40):
198
+ train_loss, train_err = train(epoch)
199
+ test_loss, test_err = test(epoch)
200
+ draw_curve(epoch, train_loss, train_err, test_loss, test_err)
201
+ if (epoch+1) % 20 == 0:
202
+ lr_decay()
203
+
204
+
205
+ if __name__ == '__main__':
206
+ main()
deep_sort_pytorch/deep_sort/deep_sort.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+
4
+ from .deep.feature_extractor import Extractor
5
+ from .sort.nn_matching import NearestNeighborDistanceMetric
6
+ from .sort.detection import Detection
7
+ from .sort.tracker import Tracker
8
+
9
+
10
+ __all__ = ['DeepSort']
11
+
12
+
13
+ class DeepSort(object):
14
+ def __init__(self, model_path, max_dist=0.2, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, use_cuda=True):
15
+ self.min_confidence = min_confidence
16
+ self.nms_max_overlap = nms_max_overlap
17
+
18
+ self.extractor = Extractor(model_path, use_cuda=use_cuda)
19
+
20
+ max_cosine_distance = max_dist
21
+ metric = NearestNeighborDistanceMetric(
22
+ "cosine", max_cosine_distance, nn_budget)
23
+ self.tracker = Tracker(
24
+ metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init)
25
+
26
+ def update(self, bbox_xywh, confidences, oids, ori_img):
27
+ self.height, self.width = ori_img.shape[:2]
28
+ # generate detections
29
+ features = self._get_features(bbox_xywh, ori_img)
30
+ bbox_tlwh = self._xywh_to_tlwh(bbox_xywh)
31
+ detections = [Detection(bbox_tlwh[i], conf, features[i],oid) for i, (conf,oid) in enumerate(zip(confidences,oids)) if conf > self.min_confidence]
32
+
33
+ # run on non-maximum supression
34
+ boxes = np.array([d.tlwh for d in detections])
35
+ scores = np.array([d.confidence for d in detections])
36
+
37
+ # update tracker
38
+ self.tracker.predict()
39
+ self.tracker.update(detections)
40
+
41
+ # output bbox identities
42
+ outputs = []
43
+ for track in self.tracker.tracks:
44
+ if not track.is_confirmed() or track.time_since_update > 1:
45
+ continue
46
+ box = track.to_tlwh()
47
+ x1, y1, x2, y2 = self._tlwh_to_xyxy(box)
48
+ track_id = track.track_id
49
+ track_oid = track.oid
50
+ outputs.append(np.array([x1, y1, x2, y2, track_id, track_oid], dtype=np.int64))
51
+ if len(outputs) > 0:
52
+ outputs = np.stack(outputs, axis=0)
53
+ return outputs
54
+
55
+ """
56
+ TODO:
57
+ Convert bbox from xc_yc_w_h to xtl_ytl_w_h
58
+ Thanks JieChen91@github.com for reporting this bug!
59
+ """
60
+ @staticmethod
61
+ def _xywh_to_tlwh(bbox_xywh):
62
+ if isinstance(bbox_xywh, np.ndarray):
63
+ bbox_tlwh = bbox_xywh.copy()
64
+ elif isinstance(bbox_xywh, torch.Tensor):
65
+ bbox_tlwh = bbox_xywh.clone()
66
+ bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2.
67
+ bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2.
68
+ return bbox_tlwh
69
+
70
+ def _xywh_to_xyxy(self, bbox_xywh):
71
+ x, y, w, h = bbox_xywh
72
+ x1 = max(int(x - w / 2), 0)
73
+ x2 = min(int(x + w / 2), self.width - 1)
74
+ y1 = max(int(y - h / 2), 0)
75
+ y2 = min(int(y + h / 2), self.height - 1)
76
+ return x1, y1, x2, y2
77
+
78
+ def _tlwh_to_xyxy(self, bbox_tlwh):
79
+ """
80
+ TODO:
81
+ Convert bbox from xtl_ytl_w_h to xc_yc_w_h
82
+ Thanks JieChen91@github.com for reporting this bug!
83
+ """
84
+ x, y, w, h = bbox_tlwh
85
+ x1 = max(int(x), 0)
86
+ x2 = min(int(x+w), self.width - 1)
87
+ y1 = max(int(y), 0)
88
+ y2 = min(int(y+h), self.height - 1)
89
+ return x1, y1, x2, y2
90
+
91
+ def increment_ages(self):
92
+ self.tracker.increment_ages()
93
+
94
+ def _xyxy_to_tlwh(self, bbox_xyxy):
95
+ x1, y1, x2, y2 = bbox_xyxy
96
+
97
+ t = x1
98
+ l = y1
99
+ w = int(x2 - x1)
100
+ h = int(y2 - y1)
101
+ return t, l, w, h
102
+
103
+ def _get_features(self, bbox_xywh, ori_img):
104
+ im_crops = []
105
+ for box in bbox_xywh:
106
+ x1, y1, x2, y2 = self._xywh_to_xyxy(box)
107
+ im = ori_img[y1:y2, x1:x2]
108
+ im_crops.append(im)
109
+ if im_crops:
110
+ features = self.extractor(im_crops)
111
+ else:
112
+ features = np.array([])
113
+ return features
deep_sort_pytorch/deep_sort/sort - Copy/__init__.py ADDED
File without changes
deep_sort_pytorch/deep_sort/sort - Copy/iou_matching.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ from __future__ import absolute_import
3
+ import numpy as np
4
+ from . import linear_assignment
5
+
6
+
7
+ def iou(bbox, candidates):
8
+ """Computer intersection over union.
9
+
10
+ Parameters
11
+ ----------
12
+ bbox : ndarray
13
+ A bounding box in format `(top left x, top left y, width, height)`.
14
+ candidates : ndarray
15
+ A matrix of candidate bounding boxes (one per row) in the same format
16
+ as `bbox`.
17
+
18
+ Returns
19
+ -------
20
+ ndarray
21
+ The intersection over union in [0, 1] between the `bbox` and each
22
+ candidate. A higher score means a larger fraction of the `bbox` is
23
+ occluded by the candidate.
24
+
25
+ """
26
+ bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
27
+ candidates_tl = candidates[:, :2]
28
+ candidates_br = candidates[:, :2] + candidates[:, 2:]
29
+
30
+ tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
31
+ np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
32
+ br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
33
+ np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
34
+ wh = np.maximum(0., br - tl)
35
+
36
+ area_intersection = wh.prod(axis=1)
37
+ area_bbox = bbox[2:].prod()
38
+ area_candidates = candidates[:, 2:].prod(axis=1)
39
+ return area_intersection / (area_bbox + area_candidates - area_intersection)
40
+
41
+
42
+ def iou_cost(tracks, detections, track_indices=None,
43
+ detection_indices=None):
44
+ """An intersection over union distance metric.
45
+
46
+ Parameters
47
+ ----------
48
+ tracks : List[deep_sort.track.Track]
49
+ A list of tracks.
50
+ detections : List[deep_sort.detection.Detection]
51
+ A list of detections.
52
+ track_indices : Optional[List[int]]
53
+ A list of indices to tracks that should be matched. Defaults to
54
+ all `tracks`.
55
+ detection_indices : Optional[List[int]]
56
+ A list of indices to detections that should be matched. Defaults
57
+ to all `detections`.
58
+
59
+ Returns
60
+ -------
61
+ ndarray
62
+ Returns a cost matrix of shape
63
+ len(track_indices), len(detection_indices) where entry (i, j) is
64
+ `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
65
+
66
+ """
67
+ if track_indices is None:
68
+ track_indices = np.arange(len(tracks))
69
+ if detection_indices is None:
70
+ detection_indices = np.arange(len(detections))
71
+
72
+ cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
73
+ for row, track_idx in enumerate(track_indices):
74
+ if tracks[track_idx].time_since_update > 1:
75
+ cost_matrix[row, :] = linear_assignment.INFTY_COST
76
+ continue
77
+
78
+ bbox = tracks[track_idx].to_tlwh()
79
+ candidates = np.asarray(
80
+ [detections[i].tlwh for i in detection_indices])
81
+ cost_matrix[row, :] = 1. - iou(bbox, candidates)
82
+ return cost_matrix
deep_sort_pytorch/deep_sort/sort - Copy/kalman_filter.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ import numpy as np
3
+ import scipy.linalg
4
+
5
+
6
+ """
7
+ Table for the 0.95 quantile of the chi-square distribution with N degrees of
8
+ freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
9
+ function and used as Mahalanobis gating threshold.
10
+ """
11
+ chi2inv95 = {
12
+ 1: 3.8415,
13
+ 2: 5.9915,
14
+ 3: 7.8147,
15
+ 4: 9.4877,
16
+ 5: 11.070,
17
+ 6: 12.592,
18
+ 7: 14.067,
19
+ 8: 15.507,
20
+ 9: 16.919}
21
+
22
+
23
+ class KalmanFilter(object):
24
+ """
25
+ A simple Kalman filter for tracking bounding boxes in image space.
26
+
27
+ The 8-dimensional state space
28
+
29
+ x, y, a, h, vx, vy, va, vh
30
+
31
+ contains the bounding box center position (x, y), aspect ratio a, height h,
32
+ and their respective velocities.
33
+
34
+ Object motion follows a constant velocity model. The bounding box location
35
+ (x, y, a, h) is taken as direct observation of the state space (linear
36
+ observation model).
37
+
38
+ """
39
+
40
+ def __init__(self):
41
+ ndim, dt = 4, 1.
42
+
43
+ # Create Kalman filter model matrices.
44
+ self._motion_mat = np.eye(2 * ndim, 2 * ndim)
45
+ for i in range(ndim):
46
+ self._motion_mat[i, ndim + i] = dt
47
+ self._update_mat = np.eye(ndim, 2 * ndim)
48
+
49
+ # Motion and observation uncertainty are chosen relative to the current
50
+ # state estimate. These weights control the amount of uncertainty in
51
+ # the model. This is a bit hacky.
52
+ self._std_weight_position = 1. / 20
53
+ self._std_weight_velocity = 1. / 160
54
+
55
+ def initiate(self, measurement):
56
+ """Create track from unassociated measurement.
57
+
58
+ Parameters
59
+ ----------
60
+ measurement : ndarray
61
+ Bounding box coordinates (x, y, a, h) with center position (x, y),
62
+ aspect ratio a, and height h.
63
+
64
+ Returns
65
+ -------
66
+ (ndarray, ndarray)
67
+ Returns the mean vector (8 dimensional) and covariance matrix (8x8
68
+ dimensional) of the new track. Unobserved velocities are initialized
69
+ to 0 mean.
70
+
71
+ """
72
+ mean_pos = measurement
73
+ mean_vel = np.zeros_like(mean_pos)
74
+ mean = np.r_[mean_pos, mean_vel]
75
+
76
+ std = [
77
+ 2 * self._std_weight_position * measurement[3],
78
+ 2 * self._std_weight_position * measurement[3],
79
+ 1e-2,
80
+ 2 * self._std_weight_position * measurement[3],
81
+ 10 * self._std_weight_velocity * measurement[3],
82
+ 10 * self._std_weight_velocity * measurement[3],
83
+ 1e-5,
84
+ 10 * self._std_weight_velocity * measurement[3]]
85
+ covariance = np.diag(np.square(std))
86
+ return mean, covariance
87
+
88
+ def predict(self, mean, covariance):
89
+ """Run Kalman filter prediction step.
90
+
91
+ Parameters
92
+ ----------
93
+ mean : ndarray
94
+ The 8 dimensional mean vector of the object state at the previous
95
+ time step.
96
+ covariance : ndarray
97
+ The 8x8 dimensional covariance matrix of the object state at the
98
+ previous time step.
99
+
100
+ Returns
101
+ -------
102
+ (ndarray, ndarray)
103
+ Returns the mean vector and covariance matrix of the predicted
104
+ state. Unobserved velocities are initialized to 0 mean.
105
+
106
+ """
107
+ std_pos = [
108
+ self._std_weight_position * mean[3],
109
+ self._std_weight_position * mean[3],
110
+ 1e-2,
111
+ self._std_weight_position * mean[3]]
112
+ std_vel = [
113
+ self._std_weight_velocity * mean[3],
114
+ self._std_weight_velocity * mean[3],
115
+ 1e-5,
116
+ self._std_weight_velocity * mean[3]]
117
+ motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118
+
119
+ mean = np.dot(self._motion_mat, mean)
120
+ covariance = np.linalg.multi_dot((
121
+ self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
122
+
123
+ return mean, covariance
124
+
125
+ def project(self, mean, covariance):
126
+ """Project state distribution to measurement space.
127
+
128
+ Parameters
129
+ ----------
130
+ mean : ndarray
131
+ The state's mean vector (8 dimensional array).
132
+ covariance : ndarray
133
+ The state's covariance matrix (8x8 dimensional).
134
+
135
+ Returns
136
+ -------
137
+ (ndarray, ndarray)
138
+ Returns the projected mean and covariance matrix of the given state
139
+ estimate.
140
+
141
+ """
142
+ std = [
143
+ self._std_weight_position * mean[3],
144
+ self._std_weight_position * mean[3],
145
+ 1e-1,
146
+ self._std_weight_position * mean[3]]
147
+ innovation_cov = np.diag(np.square(std))
148
+
149
+ mean = np.dot(self._update_mat, mean)
150
+ covariance = np.linalg.multi_dot((
151
+ self._update_mat, covariance, self._update_mat.T))
152
+ return mean, covariance + innovation_cov
153
+
154
+ def update(self, mean, covariance, measurement):
155
+ """Run Kalman filter correction step.
156
+
157
+ Parameters
158
+ ----------
159
+ mean : ndarray
160
+ The predicted state's mean vector (8 dimensional).
161
+ covariance : ndarray
162
+ The state's covariance matrix (8x8 dimensional).
163
+ measurement : ndarray
164
+ The 4 dimensional measurement vector (x, y, a, h), where (x, y)
165
+ is the center position, a the aspect ratio, and h the height of the
166
+ bounding box.
167
+
168
+ Returns
169
+ -------
170
+ (ndarray, ndarray)
171
+ Returns the measurement-corrected state distribution.
172
+
173
+ """
174
+ projected_mean, projected_cov = self.project(mean, covariance)
175
+
176
+ chol_factor, lower = scipy.linalg.cho_factor(
177
+ projected_cov, lower=True, check_finite=False)
178
+ kalman_gain = scipy.linalg.cho_solve(
179
+ (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
180
+ check_finite=False).T
181
+ innovation = measurement - projected_mean
182
+
183
+ new_mean = mean + np.dot(innovation, kalman_gain.T)
184
+ new_covariance = covariance - np.linalg.multi_dot((
185
+ kalman_gain, projected_cov, kalman_gain.T))
186
+ return new_mean, new_covariance
187
+
188
+ def gating_distance(self, mean, covariance, measurements,
189
+ only_position=False):
190
+ """Compute gating distance between state distribution and measurements.
191
+
192
+ A suitable distance threshold can be obtained from `chi2inv95`. If
193
+ `only_position` is False, the chi-square distribution has 4 degrees of
194
+ freedom, otherwise 2.
195
+
196
+ Parameters
197
+ ----------
198
+ mean : ndarray
199
+ Mean vector over the state distribution (8 dimensional).
200
+ covariance : ndarray
201
+ Covariance of the state distribution (8x8 dimensional).
202
+ measurements : ndarray
203
+ An Nx4 dimensional matrix of N measurements, each in
204
+ format (x, y, a, h) where (x, y) is the bounding box center
205
+ position, a the aspect ratio, and h the height.
206
+ only_position : Optional[bool]
207
+ If True, distance computation is done with respect to the bounding
208
+ box center position only.
209
+
210
+ Returns
211
+ -------
212
+ ndarray
213
+ Returns an array of length N, where the i-th element contains the
214
+ squared Mahalanobis distance between (mean, covariance) and
215
+ `measurements[i]`.
216
+
217
+ """
218
+ mean, covariance = self.project(mean, covariance)
219
+ if only_position:
220
+ mean, covariance = mean[:2], covariance[:2, :2]
221
+ measurements = measurements[:, :2]
222
+
223
+ cholesky_factor = np.linalg.cholesky(covariance)
224
+ d = measurements - mean
225
+ z = scipy.linalg.solve_triangular(
226
+ cholesky_factor, d.T, lower=True, check_finite=False,
227
+ overwrite_b=True)
228
+ squared_maha = np.sum(z * z, axis=0)
229
+ return squared_maha
deep_sort_pytorch/deep_sort/sort - Copy/linear_assignment.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ from __future__ import absolute_import
3
+ import numpy as np
4
+ # from sklearn.utils.linear_assignment_ import linear_assignment
5
+ from scipy.optimize import linear_sum_assignment as linear_assignment
6
+ from . import kalman_filter
7
+
8
+
9
+ INFTY_COST = 1e+5
10
+
11
+
12
+ def min_cost_matching(
13
+ distance_metric, max_distance, tracks, detections, track_indices=None,
14
+ detection_indices=None):
15
+ """Solve linear assignment problem.
16
+
17
+ Parameters
18
+ ----------
19
+ distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
20
+ The distance metric is given a list of tracks and detections as well as
21
+ a list of N track indices and M detection indices. The metric should
22
+ return the NxM dimensional cost matrix, where element (i, j) is the
23
+ association cost between the i-th track in the given track indices and
24
+ the j-th detection in the given detection_indices.
25
+ max_distance : float
26
+ Gating threshold. Associations with cost larger than this value are
27
+ disregarded.
28
+ tracks : List[track.Track]
29
+ A list of predicted tracks at the current time step.
30
+ detections : List[detection.Detection]
31
+ A list of detections at the current time step.
32
+ track_indices : List[int]
33
+ List of track indices that maps rows in `cost_matrix` to tracks in
34
+ `tracks` (see description above).
35
+ detection_indices : List[int]
36
+ List of detection indices that maps columns in `cost_matrix` to
37
+ detections in `detections` (see description above).
38
+
39
+ Returns
40
+ -------
41
+ (List[(int, int)], List[int], List[int])
42
+ Returns a tuple with the following three entries:
43
+ * A list of matched track and detection indices.
44
+ * A list of unmatched track indices.
45
+ * A list of unmatched detection indices.
46
+
47
+ """
48
+ if track_indices is None:
49
+ track_indices = np.arange(len(tracks))
50
+ if detection_indices is None:
51
+ detection_indices = np.arange(len(detections))
52
+
53
+ if len(detection_indices) == 0 or len(track_indices) == 0:
54
+ return [], track_indices, detection_indices # Nothing to match.
55
+
56
+ cost_matrix = distance_metric(
57
+ tracks, detections, track_indices, detection_indices)
58
+ cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
59
+
60
+ row_indices, col_indices = linear_assignment(cost_matrix)
61
+
62
+ matches, unmatched_tracks, unmatched_detections = [], [], []
63
+ for col, detection_idx in enumerate(detection_indices):
64
+ if col not in col_indices:
65
+ unmatched_detections.append(detection_idx)
66
+ for row, track_idx in enumerate(track_indices):
67
+ if row not in row_indices:
68
+ unmatched_tracks.append(track_idx)
69
+ for row, col in zip(row_indices, col_indices):
70
+ track_idx = track_indices[row]
71
+ detection_idx = detection_indices[col]
72
+ if cost_matrix[row, col] > max_distance:
73
+ unmatched_tracks.append(track_idx)
74
+ unmatched_detections.append(detection_idx)
75
+ else:
76
+ matches.append((track_idx, detection_idx))
77
+ return matches, unmatched_tracks, unmatched_detections
78
+
79
+
80
+ def matching_cascade(
81
+ distance_metric, max_distance, cascade_depth, tracks, detections,
82
+ track_indices=None, detection_indices=None):
83
+ """Run matching cascade.
84
+
85
+ Parameters
86
+ ----------
87
+ distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
88
+ The distance metric is given a list of tracks and detections as well as
89
+ a list of N track indices and M detection indices. The metric should
90
+ return the NxM dimensional cost matrix, where element (i, j) is the
91
+ association cost between the i-th track in the given track indices and
92
+ the j-th detection in the given detection indices.
93
+ max_distance : float
94
+ Gating threshold. Associations with cost larger than this value are
95
+ disregarded.
96
+ cascade_depth: int
97
+ The cascade depth, should be se to the maximum track age.
98
+ tracks : List[track.Track]
99
+ A list of predicted tracks at the current time step.
100
+ detections : List[detection.Detection]
101
+ A list of detections at the current time step.
102
+ track_indices : Optional[List[int]]
103
+ List of track indices that maps rows in `cost_matrix` to tracks in
104
+ `tracks` (see description above). Defaults to all tracks.
105
+ detection_indices : Optional[List[int]]
106
+ List of detection indices that maps columns in `cost_matrix` to
107
+ detections in `detections` (see description above). Defaults to all
108
+ detections.
109
+
110
+ Returns
111
+ -------
112
+ (List[(int, int)], List[int], List[int])
113
+ Returns a tuple with the following three entries:
114
+ * A list of matched track and detection indices.
115
+ * A list of unmatched track indices.
116
+ * A list of unmatched detection indices.
117
+
118
+ """
119
+ if track_indices is None:
120
+ track_indices = list(range(len(tracks)))
121
+ if detection_indices is None:
122
+ detection_indices = list(range(len(detections)))
123
+
124
+ unmatched_detections = detection_indices
125
+ matches = []
126
+ for level in range(cascade_depth):
127
+ if len(unmatched_detections) == 0: # No detections left
128
+ break
129
+
130
+ track_indices_l = [
131
+ k for k in track_indices
132
+ if tracks[k].time_since_update == 1 + level
133
+ ]
134
+ if len(track_indices_l) == 0: # Nothing to match at this level
135
+ continue
136
+
137
+ matches_l, _, unmatched_detections = \
138
+ min_cost_matching(
139
+ distance_metric, max_distance, tracks, detections,
140
+ track_indices_l, unmatched_detections)
141
+ matches += matches_l
142
+ unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
143
+ return matches, unmatched_tracks, unmatched_detections
144
+
145
+
146
+ def gate_cost_matrix(
147
+ kf, cost_matrix, tracks, detections, track_indices, detection_indices,
148
+ gated_cost=INFTY_COST, only_position=False):
149
+ """Invalidate infeasible entries in cost matrix based on the state
150
+ distributions obtained by Kalman filtering.
151
+
152
+ Parameters
153
+ ----------
154
+ kf : The Kalman filter.
155
+ cost_matrix : ndarray
156
+ The NxM dimensional cost matrix, where N is the number of track indices
157
+ and M is the number of detection indices, such that entry (i, j) is the
158
+ association cost between `tracks[track_indices[i]]` and
159
+ `detections[detection_indices[j]]`.
160
+ tracks : List[track.Track]
161
+ A list of predicted tracks at the current time step.
162
+ detections : List[detection.Detection]
163
+ A list of detections at the current time step.
164
+ track_indices : List[int]
165
+ List of track indices that maps rows in `cost_matrix` to tracks in
166
+ `tracks` (see description above).
167
+ detection_indices : List[int]
168
+ List of detection indices that maps columns in `cost_matrix` to
169
+ detections in `detections` (see description above).
170
+ gated_cost : Optional[float]
171
+ Entries in the cost matrix corresponding to infeasible associations are
172
+ set this value. Defaults to a very large value.
173
+ only_position : Optional[bool]
174
+ If True, only the x, y position of the state distribution is considered
175
+ during gating. Defaults to False.
176
+
177
+ Returns
178
+ -------
179
+ ndarray
180
+ Returns the modified cost matrix.
181
+
182
+ """
183
+ gating_dim = 2 if only_position else 4
184
+ gating_threshold = kalman_filter.chi2inv95[gating_dim]
185
+ measurements = np.asarray(
186
+ [detections[i].to_xyah() for i in detection_indices])
187
+ for row, track_idx in enumerate(track_indices):
188
+ track = tracks[track_idx]
189
+ gating_distance = kf.gating_distance(
190
+ track.mean, track.covariance, measurements, only_position)
191
+ cost_matrix[row, gating_distance > gating_threshold] = gated_cost
192
+ return cost_matrix
deep_sort_pytorch/deep_sort/sort - Copy/nn_matching.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ import numpy as np
3
+
4
+
5
+ def _pdist(a, b):
6
+ """Compute pair-wise squared distance between points in `a` and `b`.
7
+
8
+ Parameters
9
+ ----------
10
+ a : array_like
11
+ An NxM matrix of N samples of dimensionality M.
12
+ b : array_like
13
+ An LxM matrix of L samples of dimensionality M.
14
+
15
+ Returns
16
+ -------
17
+ ndarray
18
+ Returns a matrix of size len(a), len(b) such that eleement (i, j)
19
+ contains the squared distance between `a[i]` and `b[j]`.
20
+
21
+ """
22
+ a, b = np.asarray(a), np.asarray(b)
23
+ if len(a) == 0 or len(b) == 0:
24
+ return np.zeros((len(a), len(b)))
25
+ a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
26
+ r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
27
+ r2 = np.clip(r2, 0., float(np.inf))
28
+ return r2
29
+
30
+
31
+ def _cosine_distance(a, b, data_is_normalized=False):
32
+ """Compute pair-wise cosine distance between points in `a` and `b`.
33
+
34
+ Parameters
35
+ ----------
36
+ a : array_like
37
+ An NxM matrix of N samples of dimensionality M.
38
+ b : array_like
39
+ An LxM matrix of L samples of dimensionality M.
40
+ data_is_normalized : Optional[bool]
41
+ If True, assumes rows in a and b are unit length vectors.
42
+ Otherwise, a and b are explicitly normalized to lenght 1.
43
+
44
+ Returns
45
+ -------
46
+ ndarray
47
+ Returns a matrix of size len(a), len(b) such that eleement (i, j)
48
+ contains the squared distance between `a[i]` and `b[j]`.
49
+
50
+ """
51
+ if not data_is_normalized:
52
+ a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
53
+ b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
54
+ return 1. - np.dot(a, b.T)
55
+
56
+
57
+ def _nn_euclidean_distance(x, y):
58
+ """ Helper function for nearest neighbor distance metric (Euclidean).
59
+
60
+ Parameters
61
+ ----------
62
+ x : ndarray
63
+ A matrix of N row-vectors (sample points).
64
+ y : ndarray
65
+ A matrix of M row-vectors (query points).
66
+
67
+ Returns
68
+ -------
69
+ ndarray
70
+ A vector of length M that contains for each entry in `y` the
71
+ smallest Euclidean distance to a sample in `x`.
72
+
73
+ """
74
+ distances = _pdist(x, y)
75
+ return np.maximum(0.0, distances.min(axis=0))
76
+
77
+
78
+ def _nn_cosine_distance(x, y):
79
+ """ Helper function for nearest neighbor distance metric (cosine).
80
+
81
+ Parameters
82
+ ----------
83
+ x : ndarray
84
+ A matrix of N row-vectors (sample points).
85
+ y : ndarray
86
+ A matrix of M row-vectors (query points).
87
+
88
+ Returns
89
+ -------
90
+ ndarray
91
+ A vector of length M that contains for each entry in `y` the
92
+ smallest cosine distance to a sample in `x`.
93
+
94
+ """
95
+ distances = _cosine_distance(x, y)
96
+ return distances.min(axis=0)
97
+
98
+
99
+ class NearestNeighborDistanceMetric(object):
100
+ """
101
+ A nearest neighbor distance metric that, for each target, returns
102
+ the closest distance to any sample that has been observed so far.
103
+
104
+ Parameters
105
+ ----------
106
+ metric : str
107
+ Either "euclidean" or "cosine".
108
+ matching_threshold: float
109
+ The matching threshold. Samples with larger distance are considered an
110
+ invalid match.
111
+ budget : Optional[int]
112
+ If not None, fix samples per class to at most this number. Removes
113
+ the oldest samples when the budget is reached.
114
+
115
+ Attributes
116
+ ----------
117
+ samples : Dict[int -> List[ndarray]]
118
+ A dictionary that maps from target identities to the list of samples
119
+ that have been observed so far.
120
+
121
+ """
122
+
123
+ def __init__(self, metric, matching_threshold, budget=None):
124
+
125
+ if metric == "euclidean":
126
+ self._metric = _nn_euclidean_distance
127
+ elif metric == "cosine":
128
+ self._metric = _nn_cosine_distance
129
+ else:
130
+ raise ValueError(
131
+ "Invalid metric; must be either 'euclidean' or 'cosine'")
132
+ self.matching_threshold = matching_threshold
133
+ self.budget = budget
134
+ self.samples = {}
135
+
136
+ def partial_fit(self, features, targets, active_targets):
137
+ """Update the distance metric with new data.
138
+
139
+ Parameters
140
+ ----------
141
+ features : ndarray
142
+ An NxM matrix of N features of dimensionality M.
143
+ targets : ndarray
144
+ An integer array of associated target identities.
145
+ active_targets : List[int]
146
+ A list of targets that are currently present in the scene.
147
+
148
+ """
149
+ for feature, target in zip(features, targets):
150
+ self.samples.setdefault(target, []).append(feature)
151
+ if self.budget is not None:
152
+ self.samples[target] = self.samples[target][-self.budget:]
153
+ self.samples = {k: self.samples[k] for k in active_targets}
154
+
155
+ def distance(self, features, targets):
156
+ """Compute distance between features and targets.
157
+
158
+ Parameters
159
+ ----------
160
+ features : ndarray
161
+ An NxM matrix of N features of dimensionality M.
162
+ targets : List[int]
163
+ A list of targets to match the given `features` against.
164
+
165
+ Returns
166
+ -------
167
+ ndarray
168
+ Returns a cost matrix of shape len(targets), len(features), where
169
+ element (i, j) contains the closest squared distance between
170
+ `targets[i]` and `features[j]`.
171
+
172
+ """
173
+ cost_matrix = np.zeros((len(targets), len(features)))
174
+ for i, target in enumerate(targets):
175
+ cost_matrix[i, :] = self._metric(self.samples[target], features)
176
+ return cost_matrix
deep_sort_pytorch/deep_sort/sort - Copy/preprocessing.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ import numpy as np
3
+ import cv2
4
+
5
+
6
+ def non_max_suppression(boxes, max_bbox_overlap, scores=None):
7
+ """Suppress overlapping detections.
8
+
9
+ Original code from [1]_ has been adapted to include confidence score.
10
+
11
+ .. [1] http://www.pyimagesearch.com/2015/02/16/
12
+ faster-non-maximum-suppression-python/
13
+
14
+ Examples
15
+ --------
16
+
17
+ >>> boxes = [d.roi for d in detections]
18
+ >>> scores = [d.confidence for d in detections]
19
+ >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
20
+ >>> detections = [detections[i] for i in indices]
21
+
22
+ Parameters
23
+ ----------
24
+ boxes : ndarray
25
+ Array of ROIs (x, y, width, height).
26
+ max_bbox_overlap : float
27
+ ROIs that overlap more than this values are suppressed.
28
+ scores : Optional[array_like]
29
+ Detector confidence score.
30
+
31
+ Returns
32
+ -------
33
+ List[int]
34
+ Returns indices of detections that have survived non-maxima suppression.
35
+
36
+ """
37
+ if len(boxes) == 0:
38
+ return []
39
+
40
+ boxes = boxes.astype(np.float)
41
+ pick = []
42
+
43
+ x1 = boxes[:, 0]
44
+ y1 = boxes[:, 1]
45
+ x2 = boxes[:, 2] + boxes[:, 0]
46
+ y2 = boxes[:, 3] + boxes[:, 1]
47
+
48
+ area = (x2 - x1 + 1) * (y2 - y1 + 1)
49
+ if scores is not None:
50
+ idxs = np.argsort(scores)
51
+ else:
52
+ idxs = np.argsort(y2)
53
+
54
+ while len(idxs) > 0:
55
+ last = len(idxs) - 1
56
+ i = idxs[last]
57
+ pick.append(i)
58
+
59
+ xx1 = np.maximum(x1[i], x1[idxs[:last]])
60
+ yy1 = np.maximum(y1[i], y1[idxs[:last]])
61
+ xx2 = np.minimum(x2[i], x2[idxs[:last]])
62
+ yy2 = np.minimum(y2[i], y2[idxs[:last]])
63
+
64
+ w = np.maximum(0, xx2 - xx1 + 1)
65
+ h = np.maximum(0, yy2 - yy1 + 1)
66
+
67
+ overlap = (w * h) / area[idxs[:last]]
68
+
69
+ idxs = np.delete(
70
+ idxs, np.concatenate(
71
+ ([last], np.where(overlap > max_bbox_overlap)[0])))
72
+
73
+ return pick
deep_sort_pytorch/deep_sort/sort/__init__.py ADDED
File without changes
deep_sort_pytorch/deep_sort/sort/detection.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ import numpy as np
3
+
4
+
5
+ class Detection(object):
6
+ """
7
+ This class represents a bounding box detection in a single image.
8
+
9
+ Parameters
10
+ ----------
11
+ tlwh : array_like
12
+ Bounding box in format `(x, y, w, h)`.
13
+ confidence : float
14
+ Detector confidence score.
15
+ feature : array_like
16
+ A feature vector that describes the object contained in this image.
17
+
18
+ Attributes
19
+ ----------
20
+ tlwh : ndarray
21
+ Bounding box in format `(top left x, top left y, width, height)`.
22
+ confidence : ndarray
23
+ Detector confidence score.
24
+ feature : ndarray | NoneType
25
+ A feature vector that describes the object contained in this image.
26
+
27
+ """
28
+
29
+ def __init__(self, tlwh, confidence, feature, oid):
30
+ self.tlwh = np.asarray(tlwh, dtype=np.float64)
31
+ self.confidence = float(confidence)
32
+ self.feature = np.asarray(feature, dtype=np.float32)
33
+ self.oid = oid
34
+
35
+ def to_tlbr(self):
36
+ """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
37
+ `(top left, bottom right)`.
38
+ """
39
+ ret = self.tlwh.copy()
40
+ ret[2:] += ret[:2]
41
+ return ret
42
+
43
+ def to_xyah(self):
44
+ """Convert bounding box to format `(center x, center y, aspect ratio,
45
+ height)`, where the aspect ratio is `width / height`.
46
+ """
47
+ ret = self.tlwh.copy()
48
+ ret[:2] += ret[2:] / 2
49
+ ret[2] /= ret[3]
50
+ return ret
deep_sort_pytorch/deep_sort/sort/iou_matching.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ from __future__ import absolute_import
3
+ import numpy as np
4
+ from . import linear_assignment
5
+
6
+
7
+ def iou(bbox, candidates):
8
+ """Computer intersection over union.
9
+
10
+ Parameters
11
+ ----------
12
+ bbox : ndarray
13
+ A bounding box in format `(top left x, top left y, width, height)`.
14
+ candidates : ndarray
15
+ A matrix of candidate bounding boxes (one per row) in the same format
16
+ as `bbox`.
17
+
18
+ Returns
19
+ -------
20
+ ndarray
21
+ The intersection over union in [0, 1] between the `bbox` and each
22
+ candidate. A higher score means a larger fraction of the `bbox` is
23
+ occluded by the candidate.
24
+
25
+ """
26
+ bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
27
+ candidates_tl = candidates[:, :2]
28
+ candidates_br = candidates[:, :2] + candidates[:, 2:]
29
+
30
+ tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
31
+ np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
32
+ br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
33
+ np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
34
+ wh = np.maximum(0., br - tl)
35
+
36
+ area_intersection = wh.prod(axis=1)
37
+ area_bbox = bbox[2:].prod()
38
+ area_candidates = candidates[:, 2:].prod(axis=1)
39
+ return area_intersection / (area_bbox + area_candidates - area_intersection)
40
+
41
+
42
+ def iou_cost(tracks, detections, track_indices=None,
43
+ detection_indices=None):
44
+ """An intersection over union distance metric.
45
+
46
+ Parameters
47
+ ----------
48
+ tracks : List[deep_sort.track.Track]
49
+ A list of tracks.
50
+ detections : List[deep_sort.detection.Detection]
51
+ A list of detections.
52
+ track_indices : Optional[List[int]]
53
+ A list of indices to tracks that should be matched. Defaults to
54
+ all `tracks`.
55
+ detection_indices : Optional[List[int]]
56
+ A list of indices to detections that should be matched. Defaults
57
+ to all `detections`.
58
+
59
+ Returns
60
+ -------
61
+ ndarray
62
+ Returns a cost matrix of shape
63
+ len(track_indices), len(detection_indices) where entry (i, j) is
64
+ `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
65
+
66
+ """
67
+ if track_indices is None:
68
+ track_indices = np.arange(len(tracks))
69
+ if detection_indices is None:
70
+ detection_indices = np.arange(len(detections))
71
+
72
+ cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
73
+ for row, track_idx in enumerate(track_indices):
74
+ if tracks[track_idx].time_since_update > 1:
75
+ cost_matrix[row, :] = linear_assignment.INFTY_COST
76
+ continue
77
+
78
+ bbox = tracks[track_idx].to_tlwh()
79
+ candidates = np.asarray(
80
+ [detections[i].tlwh for i in detection_indices])
81
+ cost_matrix[row, :] = 1. - iou(bbox, candidates)
82
+ return cost_matrix
deep_sort_pytorch/deep_sort/sort/kalman_filter.py ADDED
@@ -0,0 +1,229 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ import numpy as np
3
+ import scipy.linalg
4
+
5
+
6
+ """
7
+ Table for the 0.95 quantile of the chi-square distribution with N degrees of
8
+ freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
9
+ function and used as Mahalanobis gating threshold.
10
+ """
11
+ chi2inv95 = {
12
+ 1: 3.8415,
13
+ 2: 5.9915,
14
+ 3: 7.8147,
15
+ 4: 9.4877,
16
+ 5: 11.070,
17
+ 6: 12.592,
18
+ 7: 14.067,
19
+ 8: 15.507,
20
+ 9: 16.919}
21
+
22
+
23
+ class KalmanFilter(object):
24
+ """
25
+ A simple Kalman filter for tracking bounding boxes in image space.
26
+
27
+ The 8-dimensional state space
28
+
29
+ x, y, a, h, vx, vy, va, vh
30
+
31
+ contains the bounding box center position (x, y), aspect ratio a, height h,
32
+ and their respective velocities.
33
+
34
+ Object motion follows a constant velocity model. The bounding box location
35
+ (x, y, a, h) is taken as direct observation of the state space (linear
36
+ observation model).
37
+
38
+ """
39
+
40
+ def __init__(self):
41
+ ndim, dt = 4, 1.
42
+
43
+ # Create Kalman filter model matrices.
44
+ self._motion_mat = np.eye(2 * ndim, 2 * ndim)
45
+ for i in range(ndim):
46
+ self._motion_mat[i, ndim + i] = dt
47
+ self._update_mat = np.eye(ndim, 2 * ndim)
48
+
49
+ # Motion and observation uncertainty are chosen relative to the current
50
+ # state estimate. These weights control the amount of uncertainty in
51
+ # the model. This is a bit hacky.
52
+ self._std_weight_position = 1. / 20
53
+ self._std_weight_velocity = 1. / 160
54
+
55
+ def initiate(self, measurement):
56
+ """Create track from unassociated measurement.
57
+
58
+ Parameters
59
+ ----------
60
+ measurement : ndarray
61
+ Bounding box coordinates (x, y, a, h) with center position (x, y),
62
+ aspect ratio a, and height h.
63
+
64
+ Returns
65
+ -------
66
+ (ndarray, ndarray)
67
+ Returns the mean vector (8 dimensional) and covariance matrix (8x8
68
+ dimensional) of the new track. Unobserved velocities are initialized
69
+ to 0 mean.
70
+
71
+ """
72
+ mean_pos = measurement
73
+ mean_vel = np.zeros_like(mean_pos)
74
+ mean = np.r_[mean_pos, mean_vel]
75
+
76
+ std = [
77
+ 2 * self._std_weight_position * measurement[3],
78
+ 2 * self._std_weight_position * measurement[3],
79
+ 1e-2,
80
+ 2 * self._std_weight_position * measurement[3],
81
+ 10 * self._std_weight_velocity * measurement[3],
82
+ 10 * self._std_weight_velocity * measurement[3],
83
+ 1e-5,
84
+ 10 * self._std_weight_velocity * measurement[3]]
85
+ covariance = np.diag(np.square(std))
86
+ return mean, covariance
87
+
88
+ def predict(self, mean, covariance):
89
+ """Run Kalman filter prediction step.
90
+
91
+ Parameters
92
+ ----------
93
+ mean : ndarray
94
+ The 8 dimensional mean vector of the object state at the previous
95
+ time step.
96
+ covariance : ndarray
97
+ The 8x8 dimensional covariance matrix of the object state at the
98
+ previous time step.
99
+
100
+ Returns
101
+ -------
102
+ (ndarray, ndarray)
103
+ Returns the mean vector and covariance matrix of the predicted
104
+ state. Unobserved velocities are initialized to 0 mean.
105
+
106
+ """
107
+ std_pos = [
108
+ self._std_weight_position * mean[3],
109
+ self._std_weight_position * mean[3],
110
+ 1e-2,
111
+ self._std_weight_position * mean[3]]
112
+ std_vel = [
113
+ self._std_weight_velocity * mean[3],
114
+ self._std_weight_velocity * mean[3],
115
+ 1e-5,
116
+ self._std_weight_velocity * mean[3]]
117
+ motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118
+
119
+ mean = np.dot(self._motion_mat, mean)
120
+ covariance = np.linalg.multi_dot((
121
+ self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
122
+
123
+ return mean, covariance
124
+
125
+ def project(self, mean, covariance):
126
+ """Project state distribution to measurement space.
127
+
128
+ Parameters
129
+ ----------
130
+ mean : ndarray
131
+ The state's mean vector (8 dimensional array).
132
+ covariance : ndarray
133
+ The state's covariance matrix (8x8 dimensional).
134
+
135
+ Returns
136
+ -------
137
+ (ndarray, ndarray)
138
+ Returns the projected mean and covariance matrix of the given state
139
+ estimate.
140
+
141
+ """
142
+ std = [
143
+ self._std_weight_position * mean[3],
144
+ self._std_weight_position * mean[3],
145
+ 1e-1,
146
+ self._std_weight_position * mean[3]]
147
+ innovation_cov = np.diag(np.square(std))
148
+
149
+ mean = np.dot(self._update_mat, mean)
150
+ covariance = np.linalg.multi_dot((
151
+ self._update_mat, covariance, self._update_mat.T))
152
+ return mean, covariance + innovation_cov
153
+
154
+ def update(self, mean, covariance, measurement):
155
+ """Run Kalman filter correction step.
156
+
157
+ Parameters
158
+ ----------
159
+ mean : ndarray
160
+ The predicted state's mean vector (8 dimensional).
161
+ covariance : ndarray
162
+ The state's covariance matrix (8x8 dimensional).
163
+ measurement : ndarray
164
+ The 4 dimensional measurement vector (x, y, a, h), where (x, y)
165
+ is the center position, a the aspect ratio, and h the height of the
166
+ bounding box.
167
+
168
+ Returns
169
+ -------
170
+ (ndarray, ndarray)
171
+ Returns the measurement-corrected state distribution.
172
+
173
+ """
174
+ projected_mean, projected_cov = self.project(mean, covariance)
175
+
176
+ chol_factor, lower = scipy.linalg.cho_factor(
177
+ projected_cov, lower=True, check_finite=False)
178
+ kalman_gain = scipy.linalg.cho_solve(
179
+ (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
180
+ check_finite=False).T
181
+ innovation = measurement - projected_mean
182
+
183
+ new_mean = mean + np.dot(innovation, kalman_gain.T)
184
+ new_covariance = covariance - np.linalg.multi_dot((
185
+ kalman_gain, projected_cov, kalman_gain.T))
186
+ return new_mean, new_covariance
187
+
188
+ def gating_distance(self, mean, covariance, measurements,
189
+ only_position=False):
190
+ """Compute gating distance between state distribution and measurements.
191
+
192
+ A suitable distance threshold can be obtained from `chi2inv95`. If
193
+ `only_position` is False, the chi-square distribution has 4 degrees of
194
+ freedom, otherwise 2.
195
+
196
+ Parameters
197
+ ----------
198
+ mean : ndarray
199
+ Mean vector over the state distribution (8 dimensional).
200
+ covariance : ndarray
201
+ Covariance of the state distribution (8x8 dimensional).
202
+ measurements : ndarray
203
+ An Nx4 dimensional matrix of N measurements, each in
204
+ format (x, y, a, h) where (x, y) is the bounding box center
205
+ position, a the aspect ratio, and h the height.
206
+ only_position : Optional[bool]
207
+ If True, distance computation is done with respect to the bounding
208
+ box center position only.
209
+
210
+ Returns
211
+ -------
212
+ ndarray
213
+ Returns an array of length N, where the i-th element contains the
214
+ squared Mahalanobis distance between (mean, covariance) and
215
+ `measurements[i]`.
216
+
217
+ """
218
+ mean, covariance = self.project(mean, covariance)
219
+ if only_position:
220
+ mean, covariance = mean[:2], covariance[:2, :2]
221
+ measurements = measurements[:, :2]
222
+
223
+ cholesky_factor = np.linalg.cholesky(covariance)
224
+ d = measurements - mean
225
+ z = scipy.linalg.solve_triangular(
226
+ cholesky_factor, d.T, lower=True, check_finite=False,
227
+ overwrite_b=True)
228
+ squared_maha = np.sum(z * z, axis=0)
229
+ return squared_maha
deep_sort_pytorch/deep_sort/sort/linear_assignment.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ from __future__ import absolute_import
3
+ import numpy as np
4
+ # from sklearn.utils.linear_assignment_ import linear_assignment
5
+ from scipy.optimize import linear_sum_assignment as linear_assignment
6
+ from . import kalman_filter
7
+
8
+
9
+ INFTY_COST = 1e+5
10
+
11
+
12
+ def min_cost_matching(
13
+ distance_metric, max_distance, tracks, detections, track_indices=None,
14
+ detection_indices=None):
15
+ """Solve linear assignment problem.
16
+
17
+ Parameters
18
+ ----------
19
+ distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
20
+ The distance metric is given a list of tracks and detections as well as
21
+ a list of N track indices and M detection indices. The metric should
22
+ return the NxM dimensional cost matrix, where element (i, j) is the
23
+ association cost between the i-th track in the given track indices and
24
+ the j-th detection in the given detection_indices.
25
+ max_distance : float
26
+ Gating threshold. Associations with cost larger than this value are
27
+ disregarded.
28
+ tracks : List[track.Track]
29
+ A list of predicted tracks at the current time step.
30
+ detections : List[detection.Detection]
31
+ A list of detections at the current time step.
32
+ track_indices : List[int]
33
+ List of track indices that maps rows in `cost_matrix` to tracks in
34
+ `tracks` (see description above).
35
+ detection_indices : List[int]
36
+ List of detection indices that maps columns in `cost_matrix` to
37
+ detections in `detections` (see description above).
38
+
39
+ Returns
40
+ -------
41
+ (List[(int, int)], List[int], List[int])
42
+ Returns a tuple with the following three entries:
43
+ * A list of matched track and detection indices.
44
+ * A list of unmatched track indices.
45
+ * A list of unmatched detection indices.
46
+
47
+ """
48
+ if track_indices is None:
49
+ track_indices = np.arange(len(tracks))
50
+ if detection_indices is None:
51
+ detection_indices = np.arange(len(detections))
52
+
53
+ if len(detection_indices) == 0 or len(track_indices) == 0:
54
+ return [], track_indices, detection_indices # Nothing to match.
55
+
56
+ cost_matrix = distance_metric(
57
+ tracks, detections, track_indices, detection_indices)
58
+ cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
59
+
60
+ row_indices, col_indices = linear_assignment(cost_matrix)
61
+
62
+ matches, unmatched_tracks, unmatched_detections = [], [], []
63
+ for col, detection_idx in enumerate(detection_indices):
64
+ if col not in col_indices:
65
+ unmatched_detections.append(detection_idx)
66
+ for row, track_idx in enumerate(track_indices):
67
+ if row not in row_indices:
68
+ unmatched_tracks.append(track_idx)
69
+ for row, col in zip(row_indices, col_indices):
70
+ track_idx = track_indices[row]
71
+ detection_idx = detection_indices[col]
72
+ if cost_matrix[row, col] > max_distance:
73
+ unmatched_tracks.append(track_idx)
74
+ unmatched_detections.append(detection_idx)
75
+ else:
76
+ matches.append((track_idx, detection_idx))
77
+ return matches, unmatched_tracks, unmatched_detections
78
+
79
+
80
+ def matching_cascade(
81
+ distance_metric, max_distance, cascade_depth, tracks, detections,
82
+ track_indices=None, detection_indices=None):
83
+ """Run matching cascade.
84
+
85
+ Parameters
86
+ ----------
87
+ distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
88
+ The distance metric is given a list of tracks and detections as well as
89
+ a list of N track indices and M detection indices. The metric should
90
+ return the NxM dimensional cost matrix, where element (i, j) is the
91
+ association cost between the i-th track in the given track indices and
92
+ the j-th detection in the given detection indices.
93
+ max_distance : float
94
+ Gating threshold. Associations with cost larger than this value are
95
+ disregarded.
96
+ cascade_depth: int
97
+ The cascade depth, should be se to the maximum track age.
98
+ tracks : List[track.Track]
99
+ A list of predicted tracks at the current time step.
100
+ detections : List[detection.Detection]
101
+ A list of detections at the current time step.
102
+ track_indices : Optional[List[int]]
103
+ List of track indices that maps rows in `cost_matrix` to tracks in
104
+ `tracks` (see description above). Defaults to all tracks.
105
+ detection_indices : Optional[List[int]]
106
+ List of detection indices that maps columns in `cost_matrix` to
107
+ detections in `detections` (see description above). Defaults to all
108
+ detections.
109
+
110
+ Returns
111
+ -------
112
+ (List[(int, int)], List[int], List[int])
113
+ Returns a tuple with the following three entries:
114
+ * A list of matched track and detection indices.
115
+ * A list of unmatched track indices.
116
+ * A list of unmatched detection indices.
117
+
118
+ """
119
+ if track_indices is None:
120
+ track_indices = list(range(len(tracks)))
121
+ if detection_indices is None:
122
+ detection_indices = list(range(len(detections)))
123
+
124
+ unmatched_detections = detection_indices
125
+ matches = []
126
+ for level in range(cascade_depth):
127
+ if len(unmatched_detections) == 0: # No detections left
128
+ break
129
+
130
+ track_indices_l = [
131
+ k for k in track_indices
132
+ if tracks[k].time_since_update == 1 + level
133
+ ]
134
+ if len(track_indices_l) == 0: # Nothing to match at this level
135
+ continue
136
+
137
+ matches_l, _, unmatched_detections = \
138
+ min_cost_matching(
139
+ distance_metric, max_distance, tracks, detections,
140
+ track_indices_l, unmatched_detections)
141
+ matches += matches_l
142
+ unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
143
+ return matches, unmatched_tracks, unmatched_detections
144
+
145
+
146
+ def gate_cost_matrix(
147
+ kf, cost_matrix, tracks, detections, track_indices, detection_indices,
148
+ gated_cost=INFTY_COST, only_position=False):
149
+ """Invalidate infeasible entries in cost matrix based on the state
150
+ distributions obtained by Kalman filtering.
151
+
152
+ Parameters
153
+ ----------
154
+ kf : The Kalman filter.
155
+ cost_matrix : ndarray
156
+ The NxM dimensional cost matrix, where N is the number of track indices
157
+ and M is the number of detection indices, such that entry (i, j) is the
158
+ association cost between `tracks[track_indices[i]]` and
159
+ `detections[detection_indices[j]]`.
160
+ tracks : List[track.Track]
161
+ A list of predicted tracks at the current time step.
162
+ detections : List[detection.Detection]
163
+ A list of detections at the current time step.
164
+ track_indices : List[int]
165
+ List of track indices that maps rows in `cost_matrix` to tracks in
166
+ `tracks` (see description above).
167
+ detection_indices : List[int]
168
+ List of detection indices that maps columns in `cost_matrix` to
169
+ detections in `detections` (see description above).
170
+ gated_cost : Optional[float]
171
+ Entries in the cost matrix corresponding to infeasible associations are
172
+ set this value. Defaults to a very large value.
173
+ only_position : Optional[bool]
174
+ If True, only the x, y position of the state distribution is considered
175
+ during gating. Defaults to False.
176
+
177
+ Returns
178
+ -------
179
+ ndarray
180
+ Returns the modified cost matrix.
181
+
182
+ """
183
+ gating_dim = 2 if only_position else 4
184
+ gating_threshold = kalman_filter.chi2inv95[gating_dim]
185
+ measurements = np.asarray(
186
+ [detections[i].to_xyah() for i in detection_indices])
187
+ for row, track_idx in enumerate(track_indices):
188
+ track = tracks[track_idx]
189
+ gating_distance = kf.gating_distance(
190
+ track.mean, track.covariance, measurements, only_position)
191
+ cost_matrix[row, gating_distance > gating_threshold] = gated_cost
192
+ return cost_matrix
deep_sort_pytorch/deep_sort/sort/nn_matching.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ import numpy as np
3
+
4
+
5
+ def _pdist(a, b):
6
+ """Compute pair-wise squared distance between points in `a` and `b`.
7
+
8
+ Parameters
9
+ ----------
10
+ a : array_like
11
+ An NxM matrix of N samples of dimensionality M.
12
+ b : array_like
13
+ An LxM matrix of L samples of dimensionality M.
14
+
15
+ Returns
16
+ -------
17
+ ndarray
18
+ Returns a matrix of size len(a), len(b) such that eleement (i, j)
19
+ contains the squared distance between `a[i]` and `b[j]`.
20
+
21
+ """
22
+ a, b = np.asarray(a), np.asarray(b)
23
+ if len(a) == 0 or len(b) == 0:
24
+ return np.zeros((len(a), len(b)))
25
+ a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
26
+ r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
27
+ r2 = np.clip(r2, 0., float(np.inf))
28
+ return r2
29
+
30
+
31
+ def _cosine_distance(a, b, data_is_normalized=False):
32
+ """Compute pair-wise cosine distance between points in `a` and `b`.
33
+
34
+ Parameters
35
+ ----------
36
+ a : array_like
37
+ An NxM matrix of N samples of dimensionality M.
38
+ b : array_like
39
+ An LxM matrix of L samples of dimensionality M.
40
+ data_is_normalized : Optional[bool]
41
+ If True, assumes rows in a and b are unit length vectors.
42
+ Otherwise, a and b are explicitly normalized to lenght 1.
43
+
44
+ Returns
45
+ -------
46
+ ndarray
47
+ Returns a matrix of size len(a), len(b) such that eleement (i, j)
48
+ contains the squared distance between `a[i]` and `b[j]`.
49
+
50
+ """
51
+ if not data_is_normalized:
52
+ a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
53
+ b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
54
+ return 1. - np.dot(a, b.T)
55
+
56
+
57
+ def _nn_euclidean_distance(x, y):
58
+ """ Helper function for nearest neighbor distance metric (Euclidean).
59
+
60
+ Parameters
61
+ ----------
62
+ x : ndarray
63
+ A matrix of N row-vectors (sample points).
64
+ y : ndarray
65
+ A matrix of M row-vectors (query points).
66
+
67
+ Returns
68
+ -------
69
+ ndarray
70
+ A vector of length M that contains for each entry in `y` the
71
+ smallest Euclidean distance to a sample in `x`.
72
+
73
+ """
74
+ distances = _pdist(x, y)
75
+ return np.maximum(0.0, distances.min(axis=0))
76
+
77
+
78
+ def _nn_cosine_distance(x, y):
79
+ """ Helper function for nearest neighbor distance metric (cosine).
80
+
81
+ Parameters
82
+ ----------
83
+ x : ndarray
84
+ A matrix of N row-vectors (sample points).
85
+ y : ndarray
86
+ A matrix of M row-vectors (query points).
87
+
88
+ Returns
89
+ -------
90
+ ndarray
91
+ A vector of length M that contains for each entry in `y` the
92
+ smallest cosine distance to a sample in `x`.
93
+
94
+ """
95
+ distances = _cosine_distance(x, y)
96
+ return distances.min(axis=0)
97
+
98
+
99
+ class NearestNeighborDistanceMetric(object):
100
+ """
101
+ A nearest neighbor distance metric that, for each target, returns
102
+ the closest distance to any sample that has been observed so far.
103
+
104
+ Parameters
105
+ ----------
106
+ metric : str
107
+ Either "euclidean" or "cosine".
108
+ matching_threshold: float
109
+ The matching threshold. Samples with larger distance are considered an
110
+ invalid match.
111
+ budget : Optional[int]
112
+ If not None, fix samples per class to at most this number. Removes
113
+ the oldest samples when the budget is reached.
114
+
115
+ Attributes
116
+ ----------
117
+ samples : Dict[int -> List[ndarray]]
118
+ A dictionary that maps from target identities to the list of samples
119
+ that have been observed so far.
120
+
121
+ """
122
+
123
+ def __init__(self, metric, matching_threshold, budget=None):
124
+
125
+ if metric == "euclidean":
126
+ self._metric = _nn_euclidean_distance
127
+ elif metric == "cosine":
128
+ self._metric = _nn_cosine_distance
129
+ else:
130
+ raise ValueError(
131
+ "Invalid metric; must be either 'euclidean' or 'cosine'")
132
+ self.matching_threshold = matching_threshold
133
+ self.budget = budget
134
+ self.samples = {}
135
+
136
+ def partial_fit(self, features, targets, active_targets):
137
+ """Update the distance metric with new data.
138
+
139
+ Parameters
140
+ ----------
141
+ features : ndarray
142
+ An NxM matrix of N features of dimensionality M.
143
+ targets : ndarray
144
+ An integer array of associated target identities.
145
+ active_targets : List[int]
146
+ A list of targets that are currently present in the scene.
147
+
148
+ """
149
+ for feature, target in zip(features, targets):
150
+ self.samples.setdefault(target, []).append(feature)
151
+ if self.budget is not None:
152
+ self.samples[target] = self.samples[target][-self.budget:]
153
+ self.samples = {k: self.samples[k] for k in active_targets}
154
+
155
+ def distance(self, features, targets):
156
+ """Compute distance between features and targets.
157
+
158
+ Parameters
159
+ ----------
160
+ features : ndarray
161
+ An NxM matrix of N features of dimensionality M.
162
+ targets : List[int]
163
+ A list of targets to match the given `features` against.
164
+
165
+ Returns
166
+ -------
167
+ ndarray
168
+ Returns a cost matrix of shape len(targets), len(features), where
169
+ element (i, j) contains the closest squared distance between
170
+ `targets[i]` and `features[j]`.
171
+
172
+ """
173
+ cost_matrix = np.zeros((len(targets), len(features)))
174
+ for i, target in enumerate(targets):
175
+ cost_matrix[i, :] = self._metric(self.samples[target], features)
176
+ return cost_matrix
deep_sort_pytorch/deep_sort/sort/preprocessing.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ import numpy as np
3
+ import cv2
4
+
5
+
6
+ def non_max_suppression(boxes, max_bbox_overlap, scores=None):
7
+ """Suppress overlapping detections.
8
+
9
+ Original code from [1]_ has been adapted to include confidence score.
10
+
11
+ .. [1] http://www.pyimagesearch.com/2015/02/16/
12
+ faster-non-maximum-suppression-python/
13
+
14
+ Examples
15
+ --------
16
+
17
+ >>> boxes = [d.roi for d in detections]
18
+ >>> scores = [d.confidence for d in detections]
19
+ >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
20
+ >>> detections = [detections[i] for i in indices]
21
+
22
+ Parameters
23
+ ----------
24
+ boxes : ndarray
25
+ Array of ROIs (x, y, width, height).
26
+ max_bbox_overlap : float
27
+ ROIs that overlap more than this values are suppressed.
28
+ scores : Optional[array_like]
29
+ Detector confidence score.
30
+
31
+ Returns
32
+ -------
33
+ List[int]
34
+ Returns indices of detections that have survived non-maxima suppression.
35
+
36
+ """
37
+ if len(boxes) == 0:
38
+ return []
39
+
40
+ boxes = boxes.astype(np.float)
41
+ pick = []
42
+
43
+ x1 = boxes[:, 0]
44
+ y1 = boxes[:, 1]
45
+ x2 = boxes[:, 2] + boxes[:, 0]
46
+ y2 = boxes[:, 3] + boxes[:, 1]
47
+
48
+ area = (x2 - x1 + 1) * (y2 - y1 + 1)
49
+ if scores is not None:
50
+ idxs = np.argsort(scores)
51
+ else:
52
+ idxs = np.argsort(y2)
53
+
54
+ while len(idxs) > 0:
55
+ last = len(idxs) - 1
56
+ i = idxs[last]
57
+ pick.append(i)
58
+
59
+ xx1 = np.maximum(x1[i], x1[idxs[:last]])
60
+ yy1 = np.maximum(y1[i], y1[idxs[:last]])
61
+ xx2 = np.minimum(x2[i], x2[idxs[:last]])
62
+ yy2 = np.minimum(y2[i], y2[idxs[:last]])
63
+
64
+ w = np.maximum(0, xx2 - xx1 + 1)
65
+ h = np.maximum(0, yy2 - yy1 + 1)
66
+
67
+ overlap = (w * h) / area[idxs[:last]]
68
+
69
+ idxs = np.delete(
70
+ idxs, np.concatenate(
71
+ ([last], np.where(overlap > max_bbox_overlap)[0])))
72
+
73
+ return pick
deep_sort_pytorch/deep_sort/sort/track.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+
3
+
4
+ class TrackState:
5
+ """
6
+ Enumeration type for the single target track state. Newly created tracks are
7
+ classified as `tentative` until enough evidence has been collected. Then,
8
+ the track state is changed to `confirmed`. Tracks that are no longer alive
9
+ are classified as `deleted` to mark them for removal from the set of active
10
+ tracks.
11
+
12
+ """
13
+
14
+ Tentative = 1
15
+ Confirmed = 2
16
+ Deleted = 3
17
+
18
+
19
+ class Track:
20
+ """
21
+ A single target track with state space `(x, y, a, h)` and associated
22
+ velocities, where `(x, y)` is the center of the bounding box, `a` is the
23
+ aspect ratio and `h` is the height.
24
+
25
+ Parameters
26
+ ----------
27
+ mean : ndarray
28
+ Mean vector of the initial state distribution.
29
+ covariance : ndarray
30
+ Covariance matrix of the initial state distribution.
31
+ track_id : int
32
+ A unique track identifier.
33
+ n_init : int
34
+ Number of consecutive detections before the track is confirmed. The
35
+ track state is set to `Deleted` if a miss occurs within the first
36
+ `n_init` frames.
37
+ max_age : int
38
+ The maximum number of consecutive misses before the track state is
39
+ set to `Deleted`.
40
+ feature : Optional[ndarray]
41
+ Feature vector of the detection this track originates from. If not None,
42
+ this feature is added to the `features` cache.
43
+
44
+ Attributes
45
+ ----------
46
+ mean : ndarray
47
+ Mean vector of the initial state distribution.
48
+ covariance : ndarray
49
+ Covariance matrix of the initial state distribution.
50
+ track_id : int
51
+ A unique track identifier.
52
+ hits : int
53
+ Total number of measurement updates.
54
+ age : int
55
+ Total number of frames since first occurance.
56
+ time_since_update : int
57
+ Total number of frames since last measurement update.
58
+ state : TrackState
59
+ The current track state.
60
+ features : List[ndarray]
61
+ A cache of features. On each measurement update, the associated feature
62
+ vector is added to this list.
63
+
64
+ """
65
+
66
+ def __init__(self, mean, covariance, track_id, n_init, max_age,oid,
67
+ feature=None):
68
+ self.mean = mean
69
+ self.covariance = covariance
70
+ self.track_id = track_id
71
+ self.hits = 1
72
+ self.age = 1
73
+ self.time_since_update = 0
74
+ self.oid = oid
75
+
76
+ self.state = TrackState.Tentative
77
+ self.features = []
78
+ if feature is not None:
79
+ self.features.append(feature)
80
+
81
+ self._n_init = n_init
82
+ self._max_age = max_age
83
+
84
+ def to_tlwh(self):
85
+ """Get current position in bounding box format `(top left x, top left y,
86
+ width, height)`.
87
+
88
+ Returns
89
+ -------
90
+ ndarray
91
+ The bounding box.
92
+
93
+ """
94
+ ret = self.mean[:4].copy()
95
+ ret[2] *= ret[3]
96
+ ret[:2] -= ret[2:] / 2
97
+ return ret
98
+
99
+ def to_tlbr(self):
100
+ """Get current position in bounding box format `(min x, miny, max x,
101
+ max y)`.
102
+
103
+ Returns
104
+ -------
105
+ ndarray
106
+ The bounding box.
107
+
108
+ """
109
+ ret = self.to_tlwh()
110
+ ret[2:] = ret[:2] + ret[2:]
111
+ return ret
112
+
113
+ def increment_age(self):
114
+ self.age += 1
115
+ self.time_since_update += 1
116
+
117
+ def predict(self, kf):
118
+ """Propagate the state distribution to the current time step using a
119
+ Kalman filter prediction step.
120
+
121
+ Parameters
122
+ ----------
123
+ kf : kalman_filter.KalmanFilter
124
+ The Kalman filter.
125
+
126
+ """
127
+ self.mean, self.covariance = kf.predict(self.mean, self.covariance)
128
+ self.increment_age()
129
+
130
+ def update(self, kf, detection):
131
+ """Perform Kalman filter measurement update step and update the feature
132
+ cache.
133
+
134
+ Parameters
135
+ ----------
136
+ kf : kalman_filter.KalmanFilter
137
+ The Kalman filter.
138
+ detection : Detection
139
+ The associated detection.
140
+
141
+ """
142
+ self.mean, self.covariance = kf.update(
143
+ self.mean, self.covariance, detection.to_xyah())
144
+ self.features.append(detection.feature)
145
+
146
+ self.hits += 1
147
+ self.time_since_update = 0
148
+ if self.state == TrackState.Tentative and self.hits >= self._n_init:
149
+ self.state = TrackState.Confirmed
150
+
151
+ def mark_missed(self):
152
+ """Mark this track as missed (no association at the current time step).
153
+ """
154
+ if self.state == TrackState.Tentative:
155
+ self.state = TrackState.Deleted
156
+ elif self.time_since_update > self._max_age:
157
+ self.state = TrackState.Deleted
158
+
159
+ def is_tentative(self):
160
+ """Returns True if this track is tentative (unconfirmed).
161
+ """
162
+ return self.state == TrackState.Tentative
163
+
164
+ def is_confirmed(self):
165
+ """Returns True if this track is confirmed."""
166
+ return self.state == TrackState.Confirmed
167
+
168
+ def is_deleted(self):
169
+ """Returns True if this track is dead and should be deleted."""
170
+ return self.state == TrackState.Deleted
deep_sort_pytorch/deep_sort/sort/tracker.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # vim: expandtab:ts=4:sw=4
2
+ from __future__ import absolute_import
3
+ import numpy as np
4
+ from . import kalman_filter
5
+ from . import linear_assignment
6
+ from . import iou_matching
7
+ from .track import Track
8
+
9
+
10
+ class Tracker:
11
+ """
12
+ This is the multi-target tracker.
13
+
14
+ Parameters
15
+ ----------
16
+ metric : nn_matching.NearestNeighborDistanceMetric
17
+ A distance metric for measurement-to-track association.
18
+ max_age : int
19
+ Maximum number of missed misses before a track is deleted.
20
+ n_init : int
21
+ Number of consecutive detections before the track is confirmed. The
22
+ track state is set to `Deleted` if a miss occurs within the first
23
+ `n_init` frames.
24
+
25
+ Attributes
26
+ ----------
27
+ metric : nn_matching.NearestNeighborDistanceMetric
28
+ The distance metric used for measurement to track association.
29
+ max_age : int
30
+ Maximum number of missed misses before a track is deleted.
31
+ n_init : int
32
+ Number of frames that a track remains in initialization phase.
33
+ kf : kalman_filter.KalmanFilter
34
+ A Kalman filter to filter target trajectories in image space.
35
+ tracks : List[Track]
36
+ The list of active tracks at the current time step.
37
+
38
+ """
39
+
40
+ def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3):
41
+ self.metric = metric
42
+ self.max_iou_distance = max_iou_distance
43
+ self.max_age = max_age
44
+ self.n_init = n_init
45
+
46
+ self.kf = kalman_filter.KalmanFilter()
47
+ self.tracks = []
48
+ self._next_id = 1
49
+
50
+ def predict(self):
51
+ """Propagate track state distributions one time step forward.
52
+
53
+ This function should be called once every time step, before `update`.
54
+ """
55
+ for track in self.tracks:
56
+ track.predict(self.kf)
57
+
58
+ def increment_ages(self):
59
+ for track in self.tracks:
60
+ track.increment_age()
61
+ track.mark_missed()
62
+
63
+ def update(self, detections):
64
+ """Perform measurement update and track management.
65
+
66
+ Parameters
67
+ ----------
68
+ detections : List[deep_sort.detection.Detection]
69
+ A list of detections at the current time step.
70
+
71
+ """
72
+ # Run matching cascade.
73
+ matches, unmatched_tracks, unmatched_detections = \
74
+ self._match(detections)
75
+
76
+ # Update track set.
77
+ for track_idx, detection_idx in matches:
78
+ self.tracks[track_idx].update(
79
+ self.kf, detections[detection_idx])
80
+ for track_idx in unmatched_tracks:
81
+ self.tracks[track_idx].mark_missed()
82
+ for detection_idx in unmatched_detections:
83
+ self._initiate_track(detections[detection_idx])
84
+ self.tracks = [t for t in self.tracks if not t.is_deleted()]
85
+
86
+ # Update distance metric.
87
+ active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
88
+ features, targets = [], []
89
+ for track in self.tracks:
90
+ if not track.is_confirmed():
91
+ continue
92
+ features += track.features
93
+ targets += [track.track_id for _ in track.features]
94
+ track.features = []
95
+ self.metric.partial_fit(
96
+ np.asarray(features), np.asarray(targets), active_targets)
97
+
98
+ def _match(self, detections):
99
+
100
+ def gated_metric(tracks, dets, track_indices, detection_indices):
101
+ features = np.array([dets[i].feature for i in detection_indices])
102
+ targets = np.array([tracks[i].track_id for i in track_indices])
103
+ cost_matrix = self.metric.distance(features, targets)
104
+ cost_matrix = linear_assignment.gate_cost_matrix(
105
+ self.kf, cost_matrix, tracks, dets, track_indices,
106
+ detection_indices)
107
+
108
+ return cost_matrix
109
+
110
+ # Split track set into confirmed and unconfirmed tracks.
111
+ confirmed_tracks = [
112
+ i for i, t in enumerate(self.tracks) if t.is_confirmed()]
113
+ unconfirmed_tracks = [
114
+ i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
115
+
116
+ # Associate confirmed tracks using appearance features.
117
+ matches_a, unmatched_tracks_a, unmatched_detections = \
118
+ linear_assignment.matching_cascade(
119
+ gated_metric, self.metric.matching_threshold, self.max_age,
120
+ self.tracks, detections, confirmed_tracks)
121
+
122
+ # Associate remaining tracks together with unconfirmed tracks using IOU.
123
+ iou_track_candidates = unconfirmed_tracks + [
124
+ k for k in unmatched_tracks_a if
125
+ self.tracks[k].time_since_update == 1]
126
+ unmatched_tracks_a = [
127
+ k for k in unmatched_tracks_a if
128
+ self.tracks[k].time_since_update != 1]
129
+ matches_b, unmatched_tracks_b, unmatched_detections = \
130
+ linear_assignment.min_cost_matching(
131
+ iou_matching.iou_cost, self.max_iou_distance, self.tracks,
132
+ detections, iou_track_candidates, unmatched_detections)
133
+
134
+ matches = matches_a + matches_b
135
+ unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
136
+ return matches, unmatched_tracks, unmatched_detections
137
+
138
+ def _initiate_track(self, detection):
139
+ mean, covariance = self.kf.initiate(detection.to_xyah())
140
+ self.tracks.append(Track(
141
+ mean, covariance, self._next_id, self.n_init, self.max_age,detection.oid,
142
+ detection.feature))
143
+ self._next_id += 1
deep_sort_pytorch/utils/__init__.py ADDED
File without changes
deep_sort_pytorch/utils/asserts.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from os import environ
2
+
3
+
4
+ def assert_in(file, files_to_check):
5
+ if file not in files_to_check:
6
+ raise AssertionError("{} does not exist in the list".format(str(file)))
7
+ return True
8
+
9
+
10
+ def assert_in_env(check_list: list):
11
+ for item in check_list:
12
+ assert_in(item, environ.keys())
13
+ return True
deep_sort_pytorch/utils/draw.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+
4
+ palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)
5
+
6
+
7
+ def compute_color_for_labels(label):
8
+ """
9
+ Simple function that adds fixed color depending on the class
10
+ """
11
+ color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
12
+ return tuple(color)
13
+
14
+
15
+ def draw_boxes(img, bbox, identities=None, offset=(0,0)):
16
+ for i,box in enumerate(bbox):
17
+ x1,y1,x2,y2 = [int(i) for i in box]
18
+ x1 += offset[0]
19
+ x2 += offset[0]
20
+ y1 += offset[1]
21
+ y2 += offset[1]
22
+ # box text and bar
23
+ id = int(identities[i]) if identities is not None else 0
24
+ color = compute_color_for_labels(id)
25
+ label = '{}{:d}'.format("", id)
26
+ t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0]
27
+ cv2.rectangle(img,(x1, y1),(x2,y2),color,3)
28
+ cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)
29
+ cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2)
30
+ return img
31
+
32
+
33
+
34
+ if __name__ == '__main__':
35
+ for i in range(82):
36
+ print(compute_color_for_labels(i))
deep_sort_pytorch/utils/evaluation.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import copy
4
+ import motmetrics as mm
5
+ mm.lap.default_solver = 'lap'
6
+ from utils.io import read_results, unzip_objs
7
+
8
+
9
+ class Evaluator(object):
10
+
11
+ def __init__(self, data_root, seq_name, data_type):
12
+ self.data_root = data_root
13
+ self.seq_name = seq_name
14
+ self.data_type = data_type
15
+
16
+ self.load_annotations()
17
+ self.reset_accumulator()
18
+
19
+ def load_annotations(self):
20
+ assert self.data_type == 'mot'
21
+
22
+ gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
23
+ self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
24
+ self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
25
+
26
+ def reset_accumulator(self):
27
+ self.acc = mm.MOTAccumulator(auto_id=True)
28
+
29
+ def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
30
+ # results
31
+ trk_tlwhs = np.copy(trk_tlwhs)
32
+ trk_ids = np.copy(trk_ids)
33
+
34
+ # gts
35
+ gt_objs = self.gt_frame_dict.get(frame_id, [])
36
+ gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
37
+
38
+ # ignore boxes
39
+ ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
40
+ ignore_tlwhs = unzip_objs(ignore_objs)[0]
41
+
42
+
43
+ # remove ignored results
44
+ keep = np.ones(len(trk_tlwhs), dtype=bool)
45
+ iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
46
+ if len(iou_distance) > 0:
47
+ match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
48
+ match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
49
+ match_ious = iou_distance[match_is, match_js]
50
+
51
+ match_js = np.asarray(match_js, dtype=int)
52
+ match_js = match_js[np.logical_not(np.isnan(match_ious))]
53
+ keep[match_js] = False
54
+ trk_tlwhs = trk_tlwhs[keep]
55
+ trk_ids = trk_ids[keep]
56
+
57
+ # get distance matrix
58
+ iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
59
+
60
+ # acc
61
+ self.acc.update(gt_ids, trk_ids, iou_distance)
62
+
63
+ if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
64
+ events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics
65
+ else:
66
+ events = None
67
+ return events
68
+
69
+ def eval_file(self, filename):
70
+ self.reset_accumulator()
71
+
72
+ result_frame_dict = read_results(filename, self.data_type, is_gt=False)
73
+ frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
74
+ for frame_id in frames:
75
+ trk_objs = result_frame_dict.get(frame_id, [])
76
+ trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
77
+ self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
78
+
79
+ return self.acc
80
+
81
+ @staticmethod
82
+ def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
83
+ names = copy.deepcopy(names)
84
+ if metrics is None:
85
+ metrics = mm.metrics.motchallenge_metrics
86
+ metrics = copy.deepcopy(metrics)
87
+
88
+ mh = mm.metrics.create()
89
+ summary = mh.compute_many(
90
+ accs,
91
+ metrics=metrics,
92
+ names=names,
93
+ generate_overall=True
94
+ )
95
+
96
+ return summary
97
+
98
+ @staticmethod
99
+ def save_summary(summary, filename):
100
+ import pandas as pd
101
+ writer = pd.ExcelWriter(filename)
102
+ summary.to_excel(writer)
103
+ writer.save()
deep_sort_pytorch/utils/io.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Dict
3
+ import numpy as np
4
+
5
+ # from utils.log import get_logger
6
+
7
+
8
+ def write_results(filename, results, data_type):
9
+ if data_type == 'mot':
10
+ save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n'
11
+ elif data_type == 'kitti':
12
+ save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
13
+ else:
14
+ raise ValueError(data_type)
15
+
16
+ with open(filename, 'w') as f:
17
+ for frame_id, tlwhs, track_ids in results:
18
+ if data_type == 'kitti':
19
+ frame_id -= 1
20
+ for tlwh, track_id in zip(tlwhs, track_ids):
21
+ if track_id < 0:
22
+ continue
23
+ x1, y1, w, h = tlwh
24
+ x2, y2 = x1 + w, y1 + h
25
+ line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h)
26
+ f.write(line)
27
+
28
+
29
+ # def write_results(filename, results_dict: Dict, data_type: str):
30
+ # if not filename:
31
+ # return
32
+ # path = os.path.dirname(filename)
33
+ # if not os.path.exists(path):
34
+ # os.makedirs(path)
35
+
36
+ # if data_type in ('mot', 'mcmot', 'lab'):
37
+ # save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
38
+ # elif data_type == 'kitti':
39
+ # save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
40
+ # else:
41
+ # raise ValueError(data_type)
42
+
43
+ # with open(filename, 'w') as f:
44
+ # for frame_id, frame_data in results_dict.items():
45
+ # if data_type == 'kitti':
46
+ # frame_id -= 1
47
+ # for tlwh, track_id in frame_data:
48
+ # if track_id < 0:
49
+ # continue
50
+ # x1, y1, w, h = tlwh
51
+ # x2, y2 = x1 + w, y1 + h
52
+ # line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)
53
+ # f.write(line)
54
+ # logger.info('Save results to {}'.format(filename))
55
+
56
+
57
+ def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
58
+ if data_type in ('mot', 'lab'):
59
+ read_fun = read_mot_results
60
+ else:
61
+ raise ValueError('Unknown data type: {}'.format(data_type))
62
+
63
+ return read_fun(filename, is_gt, is_ignore)
64
+
65
+
66
+ """
67
+ labels={'ped', ... % 1
68
+ 'person_on_vhcl', ... % 2
69
+ 'car', ... % 3
70
+ 'bicycle', ... % 4
71
+ 'mbike', ... % 5
72
+ 'non_mot_vhcl', ... % 6
73
+ 'static_person', ... % 7
74
+ 'distractor', ... % 8
75
+ 'occluder', ... % 9
76
+ 'occluder_on_grnd', ... %10
77
+ 'occluder_full', ... % 11
78
+ 'reflection', ... % 12
79
+ 'crowd' ... % 13
80
+ };
81
+ """
82
+
83
+
84
+ def read_mot_results(filename, is_gt, is_ignore):
85
+ valid_labels = {1}
86
+ ignore_labels = {2, 7, 8, 12}
87
+ results_dict = dict()
88
+ if os.path.isfile(filename):
89
+ with open(filename, 'r') as f:
90
+ for line in f.readlines():
91
+ linelist = line.split(',')
92
+ if len(linelist) < 7:
93
+ continue
94
+ fid = int(linelist[0])
95
+ if fid < 1:
96
+ continue
97
+ results_dict.setdefault(fid, list())
98
+
99
+ if is_gt:
100
+ if 'MOT16-' in filename or 'MOT17-' in filename:
101
+ label = int(float(linelist[7]))
102
+ mark = int(float(linelist[6]))
103
+ if mark == 0 or label not in valid_labels:
104
+ continue
105
+ score = 1
106
+ elif is_ignore:
107
+ if 'MOT16-' in filename or 'MOT17-' in filename:
108
+ label = int(float(linelist[7]))
109
+ vis_ratio = float(linelist[8])
110
+ if label not in ignore_labels and vis_ratio >= 0:
111
+ continue
112
+ else:
113
+ continue
114
+ score = 1
115
+ else:
116
+ score = float(linelist[6])
117
+
118
+ tlwh = tuple(map(float, linelist[2:6]))
119
+ target_id = int(linelist[1])
120
+
121
+ results_dict[fid].append((tlwh, target_id, score))
122
+
123
+ return results_dict
124
+
125
+
126
+ def unzip_objs(objs):
127
+ if len(objs) > 0:
128
+ tlwhs, ids, scores = zip(*objs)
129
+ else:
130
+ tlwhs, ids, scores = [], [], []
131
+ tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
132
+
133
+ return tlwhs, ids, scores
deep_sort_pytorch/utils/json_logger.py ADDED
@@ -0,0 +1,383 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ References:
3
+ https://medium.com/analytics-vidhya/creating-a-custom-logging-mechanism-for-real-time-object-detection-using-tdd-4ca2cfcd0a2f
4
+ """
5
+ import json
6
+ from os import makedirs
7
+ from os.path import exists, join
8
+ from datetime import datetime
9
+
10
+
11
+ class JsonMeta(object):
12
+ HOURS = 3
13
+ MINUTES = 59
14
+ SECONDS = 59
15
+ PATH_TO_SAVE = 'LOGS'
16
+ DEFAULT_FILE_NAME = 'remaining'
17
+
18
+
19
+ class BaseJsonLogger(object):
20
+ """
21
+ This is the base class that returns __dict__ of its own
22
+ it also returns the dicts of objects in the attributes that are list instances
23
+
24
+ """
25
+
26
+ def dic(self):
27
+ # returns dicts of objects
28
+ out = {}
29
+ for k, v in self.__dict__.items():
30
+ if hasattr(v, 'dic'):
31
+ out[k] = v.dic()
32
+ elif isinstance(v, list):
33
+ out[k] = self.list(v)
34
+ else:
35
+ out[k] = v
36
+ return out
37
+
38
+ @staticmethod
39
+ def list(values):
40
+ # applies the dic method on items in the list
41
+ return [v.dic() if hasattr(v, 'dic') else v for v in values]
42
+
43
+
44
+ class Label(BaseJsonLogger):
45
+ """
46
+ For each bounding box there are various categories with confidences. Label class keeps track of that information.
47
+ """
48
+
49
+ def __init__(self, category: str, confidence: float):
50
+ self.category = category
51
+ self.confidence = confidence
52
+
53
+
54
+ class Bbox(BaseJsonLogger):
55
+ """
56
+ This module stores the information for each frame and use them in JsonParser
57
+ Attributes:
58
+ labels (list): List of label module.
59
+ top (int):
60
+ left (int):
61
+ width (int):
62
+ height (int):
63
+
64
+ Args:
65
+ bbox_id (float):
66
+ top (int):
67
+ left (int):
68
+ width (int):
69
+ height (int):
70
+
71
+ References:
72
+ Check Label module for better understanding.
73
+
74
+
75
+ """
76
+
77
+ def __init__(self, bbox_id, top, left, width, height):
78
+ self.labels = []
79
+ self.bbox_id = bbox_id
80
+ self.top = top
81
+ self.left = left
82
+ self.width = width
83
+ self.height = height
84
+
85
+ def add_label(self, category, confidence):
86
+ # adds category and confidence only if top_k is not exceeded.
87
+ self.labels.append(Label(category, confidence))
88
+
89
+ def labels_full(self, value):
90
+ return len(self.labels) == value
91
+
92
+
93
+ class Frame(BaseJsonLogger):
94
+ """
95
+ This module stores the information for each frame and use them in JsonParser
96
+ Attributes:
97
+ timestamp (float): The elapsed time of captured frame
98
+ frame_id (int): The frame number of the captured video
99
+ bboxes (list of Bbox objects): Stores the list of bbox objects.
100
+
101
+ References:
102
+ Check Bbox class for better information
103
+
104
+ Args:
105
+ timestamp (float):
106
+ frame_id (int):
107
+
108
+ """
109
+
110
+ def __init__(self, frame_id: int, timestamp: float = None):
111
+ self.frame_id = frame_id
112
+ self.timestamp = timestamp
113
+ self.bboxes = []
114
+
115
+ def add_bbox(self, bbox_id: int, top: int, left: int, width: int, height: int):
116
+ bboxes_ids = [bbox.bbox_id for bbox in self.bboxes]
117
+ if bbox_id not in bboxes_ids:
118
+ self.bboxes.append(Bbox(bbox_id, top, left, width, height))
119
+ else:
120
+ raise ValueError("Frame with id: {} already has a Bbox with id: {}".format(self.frame_id, bbox_id))
121
+
122
+ def add_label_to_bbox(self, bbox_id: int, category: str, confidence: float):
123
+ bboxes = {bbox.id: bbox for bbox in self.bboxes}
124
+ if bbox_id in bboxes.keys():
125
+ res = bboxes.get(bbox_id)
126
+ res.add_label(category, confidence)
127
+ else:
128
+ raise ValueError('the bbox with id: {} does not exists!'.format(bbox_id))
129
+
130
+
131
+ class BboxToJsonLogger(BaseJsonLogger):
132
+ """
133
+ ُ This module is designed to automate the task of logging jsons. An example json is used
134
+ to show the contents of json file shortly
135
+ Example:
136
+ {
137
+ "video_details": {
138
+ "frame_width": 1920,
139
+ "frame_height": 1080,
140
+ "frame_rate": 20,
141
+ "video_name": "/home/gpu/codes/MSD/pedestrian_2/project/public/camera1.avi"
142
+ },
143
+ "frames": [
144
+ {
145
+ "frame_id": 329,
146
+ "timestamp": 3365.1254
147
+ "bboxes": [
148
+ {
149
+ "labels": [
150
+ {
151
+ "category": "pedestrian",
152
+ "confidence": 0.9
153
+ }
154
+ ],
155
+ "bbox_id": 0,
156
+ "top": 1257,
157
+ "left": 138,
158
+ "width": 68,
159
+ "height": 109
160
+ }
161
+ ]
162
+ }],
163
+
164
+ Attributes:
165
+ frames (dict): It's a dictionary that maps each frame_id to json attributes.
166
+ video_details (dict): information about video file.
167
+ top_k_labels (int): shows the allowed number of labels
168
+ start_time (datetime object): we use it to automate the json output by time.
169
+
170
+ Args:
171
+ top_k_labels (int): shows the allowed number of labels
172
+
173
+ """
174
+
175
+ def __init__(self, top_k_labels: int = 1):
176
+ self.frames = {}
177
+ self.video_details = self.video_details = dict(frame_width=None, frame_height=None, frame_rate=None,
178
+ video_name=None)
179
+ self.top_k_labels = top_k_labels
180
+ self.start_time = datetime.now()
181
+
182
+ def set_top_k(self, value):
183
+ self.top_k_labels = value
184
+
185
+ def frame_exists(self, frame_id: int) -> bool:
186
+ """
187
+ Args:
188
+ frame_id (int):
189
+
190
+ Returns:
191
+ bool: true if frame_id is recognized
192
+ """
193
+ return frame_id in self.frames.keys()
194
+
195
+ def add_frame(self, frame_id: int, timestamp: float = None) -> None:
196
+ """
197
+ Args:
198
+ frame_id (int):
199
+ timestamp (float): opencv captured frame time property
200
+
201
+ Raises:
202
+ ValueError: if frame_id would not exist in class frames attribute
203
+
204
+ Returns:
205
+ None
206
+
207
+ """
208
+ if not self.frame_exists(frame_id):
209
+ self.frames[frame_id] = Frame(frame_id, timestamp)
210
+ else:
211
+ raise ValueError("Frame id: {} already exists".format(frame_id))
212
+
213
+ def bbox_exists(self, frame_id: int, bbox_id: int) -> bool:
214
+ """
215
+ Args:
216
+ frame_id:
217
+ bbox_id:
218
+
219
+ Returns:
220
+ bool: if bbox exists in frame bboxes list
221
+ """
222
+ bboxes = []
223
+ if self.frame_exists(frame_id=frame_id):
224
+ bboxes = [bbox.bbox_id for bbox in self.frames[frame_id].bboxes]
225
+ return bbox_id in bboxes
226
+
227
+ def find_bbox(self, frame_id: int, bbox_id: int):
228
+ """
229
+
230
+ Args:
231
+ frame_id:
232
+ bbox_id:
233
+
234
+ Returns:
235
+ bbox_id (int):
236
+
237
+ Raises:
238
+ ValueError: if bbox_id does not exist in the bbox list of specific frame.
239
+ """
240
+ if not self.bbox_exists(frame_id, bbox_id):
241
+ raise ValueError("frame with id: {} does not contain bbox with id: {}".format(frame_id, bbox_id))
242
+ bboxes = {bbox.bbox_id: bbox for bbox in self.frames[frame_id].bboxes}
243
+ return bboxes.get(bbox_id)
244
+
245
+ def add_bbox_to_frame(self, frame_id: int, bbox_id: int, top: int, left: int, width: int, height: int) -> None:
246
+ """
247
+
248
+ Args:
249
+ frame_id (int):
250
+ bbox_id (int):
251
+ top (int):
252
+ left (int):
253
+ width (int):
254
+ height (int):
255
+
256
+ Returns:
257
+ None
258
+
259
+ Raises:
260
+ ValueError: if bbox_id already exist in frame information with frame_id
261
+ ValueError: if frame_id does not exist in frames attribute
262
+ """
263
+ if self.frame_exists(frame_id):
264
+ frame = self.frames[frame_id]
265
+ if not self.bbox_exists(frame_id, bbox_id):
266
+ frame.add_bbox(bbox_id, top, left, width, height)
267
+ else:
268
+ raise ValueError(
269
+ "frame with frame_id: {} already contains the bbox with id: {} ".format(frame_id, bbox_id))
270
+ else:
271
+ raise ValueError("frame with frame_id: {} does not exist".format(frame_id))
272
+
273
+ def add_label_to_bbox(self, frame_id: int, bbox_id: int, category: str, confidence: float):
274
+ """
275
+ Args:
276
+ frame_id:
277
+ bbox_id:
278
+ category:
279
+ confidence: the confidence value returned from yolo detection
280
+
281
+ Returns:
282
+ None
283
+
284
+ Raises:
285
+ ValueError: if labels quota (top_k_labels) exceeds.
286
+ """
287
+ bbox = self.find_bbox(frame_id, bbox_id)
288
+ if not bbox.labels_full(self.top_k_labels):
289
+ bbox.add_label(category, confidence)
290
+ else:
291
+ raise ValueError("labels in frame_id: {}, bbox_id: {} is fulled".format(frame_id, bbox_id))
292
+
293
+ def add_video_details(self, frame_width: int = None, frame_height: int = None, frame_rate: int = None,
294
+ video_name: str = None):
295
+ self.video_details['frame_width'] = frame_width
296
+ self.video_details['frame_height'] = frame_height
297
+ self.video_details['frame_rate'] = frame_rate
298
+ self.video_details['video_name'] = video_name
299
+
300
+ def output(self):
301
+ output = {'video_details': self.video_details}
302
+ result = list(self.frames.values())
303
+ output['frames'] = [item.dic() for item in result]
304
+ return output
305
+
306
+ def json_output(self, output_name):
307
+ """
308
+ Args:
309
+ output_name:
310
+
311
+ Returns:
312
+ None
313
+
314
+ Notes:
315
+ It creates the json output with `output_name` name.
316
+ """
317
+ if not output_name.endswith('.json'):
318
+ output_name += '.json'
319
+ with open(output_name, 'w') as file:
320
+ json.dump(self.output(), file)
321
+ file.close()
322
+
323
+ def set_start(self):
324
+ self.start_time = datetime.now()
325
+
326
+ def schedule_output_by_time(self, output_dir=JsonMeta.PATH_TO_SAVE, hours: int = 0, minutes: int = 0,
327
+ seconds: int = 60) -> None:
328
+ """
329
+ Notes:
330
+ Creates folder and then periodically stores the jsons on that address.
331
+
332
+ Args:
333
+ output_dir (str): the directory where output files will be stored
334
+ hours (int):
335
+ minutes (int):
336
+ seconds (int):
337
+
338
+ Returns:
339
+ None
340
+
341
+ """
342
+ end = datetime.now()
343
+ interval = 0
344
+ interval += abs(min([hours, JsonMeta.HOURS]) * 3600)
345
+ interval += abs(min([minutes, JsonMeta.MINUTES]) * 60)
346
+ interval += abs(min([seconds, JsonMeta.SECONDS]))
347
+ diff = (end - self.start_time).seconds
348
+
349
+ if diff > interval:
350
+ output_name = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '.json'
351
+ if not exists(output_dir):
352
+ makedirs(output_dir)
353
+ output = join(output_dir, output_name)
354
+ self.json_output(output_name=output)
355
+ self.frames = {}
356
+ self.start_time = datetime.now()
357
+
358
+ def schedule_output_by_frames(self, frames_quota, frame_counter, output_dir=JsonMeta.PATH_TO_SAVE):
359
+ """
360
+ saves as the number of frames quota increases higher.
361
+ :param frames_quota:
362
+ :param frame_counter:
363
+ :param output_dir:
364
+ :return:
365
+ """
366
+ pass
367
+
368
+ def flush(self, output_dir):
369
+ """
370
+ Notes:
371
+ We use this function to output jsons whenever possible.
372
+ like the time that we exit the while loop of opencv.
373
+
374
+ Args:
375
+ output_dir:
376
+
377
+ Returns:
378
+ None
379
+
380
+ """
381
+ filename = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '-remaining.json'
382
+ output = join(output_dir, filename)
383
+ self.json_output(output_name=output)
deep_sort_pytorch/utils/log.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+
4
+ def get_logger(name='root'):
5
+ formatter = logging.Formatter(
6
+ # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
7
+ fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
8
+
9
+ handler = logging.StreamHandler()
10
+ handler.setFormatter(formatter)
11
+
12
+ logger = logging.getLogger(name)
13
+ logger.setLevel(logging.INFO)
14
+ logger.addHandler(handler)
15
+ return logger
16
+
17
+
deep_sort_pytorch/utils/parser.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import yaml
3
+ from easydict import EasyDict as edict
4
+
5
+
6
+ class YamlParser(edict):
7
+ """
8
+ This is yaml parser based on EasyDict.
9
+ """
10
+
11
+ def __init__(self, cfg_dict=None, config_file=None):
12
+ if cfg_dict is None:
13
+ cfg_dict = {}
14
+
15
+ if config_file is not None:
16
+ assert(os.path.isfile(config_file))
17
+ with open(config_file, 'r') as fo:
18
+ yaml_ = yaml.load(fo.read(), Loader=yaml.FullLoader)
19
+ cfg_dict.update(yaml_)
20
+
21
+ super(YamlParser, self).__init__(cfg_dict)
22
+
23
+ def merge_from_file(self, config_file):
24
+ with open(config_file, 'r') as fo:
25
+ yaml_ = yaml.load(fo.read(), Loader=yaml.FullLoader)
26
+ self.update(yaml_)
27
+
28
+ def merge_from_dict(self, config_dict):
29
+ self.update(config_dict)
30
+
31
+
32
+ def get_config(config_file=None):
33
+ return YamlParser(config_file=config_file)
34
+
35
+
36
+ if __name__ == "__main__":
37
+ cfg = YamlParser(config_file="../configs/yolov3.yaml")
38
+ cfg.merge_from_file("../configs/deep_sort.yaml")
39
+
40
+ import ipdb
41
+ ipdb.set_trace()
deep_sort_pytorch/utils/tools.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from functools import wraps
2
+ from time import time
3
+
4
+
5
+ def is_video(ext: str):
6
+ """
7
+ Returns true if ext exists in
8
+ allowed_exts for video files.
9
+
10
+ Args:
11
+ ext:
12
+
13
+ Returns:
14
+
15
+ """
16
+
17
+ allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp')
18
+ return any((ext.endswith(x) for x in allowed_exts))
19
+
20
+
21
+ def tik_tok(func):
22
+ """
23
+ keep track of time for each process.
24
+ Args:
25
+ func:
26
+
27
+ Returns:
28
+
29
+ """
30
+ @wraps(func)
31
+ def _time_it(*args, **kwargs):
32
+ start = time()
33
+ try:
34
+ return func(*args, **kwargs)
35
+ finally:
36
+ end_ = time()
37
+ print("time: {:.03f}s, fps: {:.03f}".format(end_ - start, 1 / (end_ - start)))
38
+
39
+ return _time_it
detect.py CHANGED
@@ -233,3 +233,4 @@ def main(opt):
233
  if __name__ == "__main__":
234
  opt = parse_opt()
235
  main(opt)
 
 
233
  if __name__ == "__main__":
234
  opt = parse_opt()
235
  main(opt)
236
+
detect_deepsort.py ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import os
3
+ import platform
4
+ import sys
5
+ from pathlib import Path
6
+ import math
7
+ import torch
8
+ import numpy as np
9
+ from deep_sort_pytorch.utils.parser import get_config
10
+ from deep_sort_pytorch.deep_sort import DeepSort
11
+ from collections import deque
12
+ FILE = Path(__file__).resolve()
13
+ ROOT = FILE.parents[0] # YOLO root directory
14
+ if str(ROOT) not in sys.path:
15
+ sys.path.append(str(ROOT)) # add ROOT to PATH
16
+ ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
17
+
18
+ from models.common import DetectMultiBackend
19
+ from utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams
20
+ from utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2,
21
+ increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh)
22
+ from utils.plots import Annotator, colors, save_one_box
23
+ from utils.torch_utils import select_device, smart_inference_mode
24
+
25
+ def initialize_deepsort():
26
+ # Create the Deep SORT configuration object and load settings from the YAML file
27
+ cfg_deep = get_config()
28
+ cfg_deep.merge_from_file("deep_sort_pytorch/configs/deep_sort.yaml")
29
+
30
+ # Initialize the DeepSort tracker
31
+ deepsort = DeepSort(cfg_deep.DEEPSORT.REID_CKPT,
32
+ max_dist=cfg_deep.DEEPSORT.MAX_DIST,
33
+ # min_confidence parameter sets the minimum tracking confidence required for an object detection to be considered in the tracking process
34
+ min_confidence=cfg_deep.DEEPSORT.MIN_CONFIDENCE,
35
+ #nms_max_overlap specifies the maximum allowed overlap between bounding boxes during non-maximum suppression (NMS)
36
+ nms_max_overlap=cfg_deep.DEEPSORT.NMS_MAX_OVERLAP,
37
+ #max_iou_distance parameter defines the maximum intersection-over-union (IoU) distance between object detections
38
+ max_iou_distance=cfg_deep.DEEPSORT.MAX_IOU_DISTANCE,
39
+ # Max_age: If an object's tracking ID is lost (i.e., the object is no longer detected), this parameter determines how many frames the tracker should wait before assigning a new id
40
+ max_age=cfg_deep.DEEPSORT.MAX_AGE, n_init=cfg_deep.DEEPSORT.N_INIT,
41
+ #nn_budget: It sets the budget for the nearest-neighbor search.
42
+ nn_budget=cfg_deep.DEEPSORT.NN_BUDGET,
43
+ use_cuda=True
44
+ )
45
+
46
+ return deepsort
47
+
48
+ deepsort = initialize_deepsort()
49
+ data_deque = {}
50
+ def classNames():
51
+ cocoClassNames = ["Bus", "Bike", "Car", "Pedestrian", "Truck"
52
+ ]
53
+ return cocoClassNames
54
+ className = classNames()
55
+
56
+ def colorLabels(classid):
57
+ if classid == 0: #person
58
+ color = (85, 45, 255)
59
+ elif classid == 1: #car
60
+ color = (222, 82, 175)
61
+ elif classid == 2: #Motorbike
62
+ color = (0, 204, 255)
63
+ elif classid == 3: #Bus
64
+ color = (0,149,255)
65
+ else:
66
+ color = (200, 100,0)
67
+ return tuple(color)
68
+
69
+ def draw_boxes(frame, bbox_xyxy, draw_trails, identities=None, categories=None, offset=(0,0)):
70
+ height, width, _ = frame.shape
71
+ for key in list(data_deque):
72
+ if key not in identities:
73
+ data_deque.pop(key)
74
+
75
+ for i, box in enumerate(bbox_xyxy):
76
+ x1, y1, x2, y2 = [int(i) for i in box]
77
+ x1 += offset[0]
78
+ y1 += offset[0]
79
+ x2 += offset[0]
80
+ y2 += offset[0]
81
+ #Find the center point of the bounding box
82
+ center = int((x1+x2)/2), int((y1+y2)/2)
83
+ cat = int(categories[i]) if categories is not None else 0
84
+ color = colorLabels(cat)
85
+ #color = [255,0,0]#compute_color_labels(cat)
86
+ id = int(identities[i]) if identities is not None else 0
87
+ # create new buffer for new object
88
+ if id not in data_deque:
89
+ data_deque[id] = deque(maxlen= 64)
90
+ data_deque[id].appendleft(center)
91
+ cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
92
+ name = className[cat]
93
+ label = str(id) + ":" + name
94
+ text_size = cv2.getTextSize(label, 0, fontScale=0.5, thickness=2)[0]
95
+ c2 = x1 + text_size[0], y1 - text_size[1] - 3
96
+ cv2.rectangle(frame, (x1, y1), c2, color, -1)
97
+ cv2.putText(frame, label, (x1, y1 - 2), 0, 0.5, [255, 255, 255], thickness=1, lineType=cv2.LINE_AA)
98
+ cv2.circle(frame,center, 2, (0,255,0), cv2.FILLED)
99
+ if draw_trails:
100
+ # draw trail
101
+ for i in range(1, len(data_deque[id])):
102
+ # check if on buffer value is none
103
+ if data_deque[id][i - 1] is None or data_deque[id][i] is None:
104
+ continue
105
+ # generate dynamic thickness of trails
106
+ thickness = int(np.sqrt(64 / float(i + i)) * 1.5)
107
+ # draw trails
108
+ cv2.line(frame, data_deque[id][i - 1], data_deque[id][i], color, thickness)
109
+ return frame
110
+
111
+ @smart_inference_mode()
112
+ def run_deepsort(
113
+ weights=ROOT / 'yolo.pt', # model path or triton URL
114
+ source=ROOT / 'data/images', # file/dir/URL/glob/screen/0(webcam)
115
+ data=ROOT / 'data/coco.yaml', # dataset.yaml path
116
+ imgsz=(640, 640), # inference size (height, width)
117
+ conf_thres=0.25, # confidence threshold
118
+ iou_thres=0.45, # NMS IOU threshold
119
+ max_det=1000, # maximum detections per image
120
+ device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu
121
+ view_img=False, # show results
122
+ nosave=False, # do not save images/videos
123
+ classes=None, # filter by class: --class 0, or --class 0 2 3
124
+ agnostic_nms=False, # class-agnostic NMS
125
+ augment=False, # augmented inference
126
+ visualize=False, # visualize features
127
+ update=False, # update all models
128
+ project=ROOT / 'runs/detect', # save results to project/name
129
+ name='exp', # save results to project/name
130
+ exist_ok=False, # existing project/name ok, do not increment
131
+ half=False, # use FP16 half-precision inference
132
+ dnn=False, # use OpenCV DNN for ONNX inference
133
+ vid_stride=1, # video frame-rate stride
134
+ draw_trails = False,
135
+ ):
136
+ source = str(source)
137
+ save_img = not nosave and not source.endswith('.txt') # save inference images
138
+ is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
139
+ is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
140
+ webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
141
+ screenshot = source.lower().startswith('screen')
142
+ if is_url and is_file:
143
+ source = check_file(source) # download
144
+
145
+ # Directories
146
+ save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run
147
+ save_dir.mkdir(parents=True, exist_ok=True) # make dir
148
+
149
+ # Load model
150
+ device = select_device(device)
151
+ model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)
152
+ stride, names, pt = model.stride, model.names, model.pt
153
+ imgsz = check_img_size(imgsz, s=stride) # check image size
154
+
155
+ # Dataloader
156
+ bs = 1 # batch_size
157
+ if webcam:
158
+ view_img = check_imshow(warn=True)
159
+ dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
160
+ bs = len(dataset)
161
+ elif screenshot:
162
+ dataset = LoadScreenshots(source, img_size=imgsz, stride=stride, auto=pt)
163
+ else:
164
+ dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)
165
+ vid_path, vid_writer = [None] * bs, [None] * bs
166
+
167
+ # Run inference
168
+ model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup
169
+ seen, windows, dt = 0, [], (Profile(), Profile(), Profile())
170
+ for path, im, im0s, vid_cap, s in dataset:
171
+ with dt[0]:
172
+ im = torch.from_numpy(im).to(model.device)
173
+ im = im.half() if model.fp16 else im.float() # uint8 to fp16/32
174
+ im /= 255 # 0 - 255 to 0.0 - 1.0
175
+ if len(im.shape) == 3:
176
+ im = im[None] # expand for batch dim
177
+
178
+ # Inference
179
+ with dt[1]:
180
+ visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
181
+ pred = model(im, augment=augment, visualize=visualize)
182
+ pred = pred[0][0]
183
+
184
+ # NMS
185
+ with dt[2]:
186
+ pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
187
+
188
+ # Second-stage classifier (optional)
189
+ # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
190
+
191
+ # Process predictions
192
+ for i, det in enumerate(pred): # per image
193
+ seen += 1
194
+ if webcam: # batch_size >= 1
195
+ p, im0, frame = path[i], im0s[i].copy(), dataset.count
196
+ s += f'{i}: '
197
+ else:
198
+ p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
199
+
200
+ p = Path(p) # to Path
201
+ save_path = str(save_dir / p.name) # im.jpg
202
+ txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt
203
+ s += '%gx%g ' % im.shape[2:] # print string
204
+ gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh
205
+ ims = im0.copy()
206
+ if len(det):
207
+ # Rescale boxes from img_size to im0 size
208
+ det[:, :4] = scale_boxes(im.shape[2:], det[:, :4], im0.shape).round()
209
+
210
+ # Print results
211
+ for c in det[:, 5].unique():
212
+ n = (det[:, 5] == c).sum() # detections per class
213
+ s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string
214
+ xywh_bboxs = []
215
+ confs = []
216
+ oids = []
217
+ outputs = []
218
+ # Write results
219
+ for *xyxy, conf, cls in reversed(det):
220
+ x1, y1, x2, y2 = xyxy
221
+ x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
222
+ #Find the Center Coordinates for each of the detected object
223
+ cx, cy = int((x1+x2)/2), int((y1+y2)/2)
224
+ #Find the Width and Height of the Boundng box
225
+ bbox_width = abs(x1-x2)
226
+ bbox_height = abs(y1-y2)
227
+ xcycwh = [cx, cy, bbox_width, bbox_height]
228
+ xywh_bboxs.append(xcycwh)
229
+ conf = math.ceil(conf*100)/100
230
+ confs.append(conf)
231
+ classNameInt = int(cls)
232
+ oids.append(classNameInt)
233
+ xywhs = torch.tensor(xywh_bboxs)
234
+ confss = torch.tensor(confs)
235
+ outputs = deepsort.update(xywhs, confss, oids, ims)
236
+ if len(outputs) > 0:
237
+ bbox_xyxy = outputs[:, :4]
238
+ identities = outputs[:, -2]
239
+ object_id = outputs[:, -1]
240
+ draw_boxes(ims, bbox_xyxy, draw_trails, identities, object_id)
241
+
242
+ # Stream results
243
+ if view_img:
244
+ if platform.system() == 'Linux' and p not in windows:
245
+ windows.append(p)
246
+ cv2.namedWindow(str(p), cv2.WINDOW_NORMAL | cv2.WINDOW_KEEPRATIO) # allow window resize (Linux)
247
+ cv2.resizeWindow(str(p), ims.shape[1], ims.shape[0])
248
+ cv2.imshow(str(p), ims)
249
+ cv2.waitKey(1) # 1 millisecond
250
+ # Save results (image with detections)
251
+ if save_img:
252
+ if vid_path[i] != save_path: # new video
253
+ vid_path[i] = save_path
254
+ if isinstance(vid_writer[i], cv2.VideoWriter):
255
+ vid_writer[i].release() # release previous video writer
256
+ if vid_cap: # video
257
+ fps = vid_cap.get(cv2.CAP_PROP_FPS)
258
+ w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
259
+ h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
260
+ else: # stream
261
+ fps, w, h = 30, ims.shape[1], ims.shape[0]
262
+ save_path = str(Path(save_path).with_suffix('.mp4')) # force *.mp4 suffix on results videos
263
+ vid_writer[i] = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h))
264
+ vid_writer[i].write(ims)
265
+
266
+ # Print time (inference-only)
267
+ LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms")
268
+ if update:
269
+ strip_optimizer(weights[0]) # update model (to fix SourceChangeWarning)
270
+ return save_path
271
+
272
+ def parse_opt():
273
+ parser = argparse.ArgumentParser()
274
+ parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolo.pt', help='model path or triton URL')
275
+ parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob/screen/0(webcam)')
276
+ parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path')
277
+ parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
278
+ parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
279
+ parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold')
280
+ parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image')
281
+ parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
282
+ parser.add_argument('--view-img', action='store_true', help='show results')
283
+ parser.add_argument('--nosave', action='store_true', help='do not save images/videos')
284
+ parser.add_argument('--draw-trails', action='store_true', help='do not drawtrails')
285
+ parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --classes 0, or --classes 0 2 3')
286
+ parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
287
+ parser.add_argument('--augment', action='store_true', help='augmented inference')
288
+ parser.add_argument('--visualize', action='store_true', help='visualize features')
289
+ parser.add_argument('--update', action='store_true', help='update all models')
290
+ parser.add_argument('--project', default=ROOT / 'runs/detect', help='save results to project/name')
291
+ parser.add_argument('--name', default='exp', help='save results to project/name')
292
+ parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
293
+ parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
294
+ parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
295
+ parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride')
296
+ opt = parser.parse_args()
297
+ opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
298
+ print_args(vars(opt))
299
+ return opt
300
+
301
+
302
+ def main(opt):
303
+ # check_requirements(exclude=('tensorboard', 'thop'))
304
+ run_deepsort(**vars(opt))
305
+
306
+
307
+
308
+ if __name__ == "__main__":
309
+ opt = parse_opt()
310
+ main(opt)
detect_strongsort.py CHANGED
@@ -56,7 +56,7 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=3):
56
 
57
 
58
  @smart_inference_mode()
59
- def run(
60
  source='0',
61
  data = ROOT / 'data/coco.yaml', # data.yaml path
62
  yolo_weights=WEIGHTS / 'yolo.pt', # model.pt path(s),
@@ -137,14 +137,15 @@ def run(
137
  cfg.merge_from_file(config_strongsort)
138
 
139
  # Create as many strong sort instances as there are video sources
 
140
  strongsort_list = []
141
  for i in range(bs):
142
  strongsort_list.append(
143
  StrongSORT(
144
  strong_sort_weights,
145
- device,
146
  half,
147
- #max_dist=cfg.STRONGSORT.MAX_DIST,
148
  max_iou_distance=cfg.STRONGSORT.MAX_IOU_DISTANCE,
149
  max_age=cfg.STRONGSORT.MAX_AGE,
150
  n_init=cfg.STRONGSORT.N_INIT,
@@ -383,7 +384,7 @@ def parse_opt():
383
 
384
  def main(opt):
385
  # check_requirements(requirements=ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
386
- run(**vars(opt))
387
 
388
 
389
  if __name__ == "__main__":
 
56
 
57
 
58
  @smart_inference_mode()
59
+ def run_strongsort(
60
  source='0',
61
  data = ROOT / 'data/coco.yaml', # data.yaml path
62
  yolo_weights=WEIGHTS / 'yolo.pt', # model.pt path(s),
 
137
  cfg.merge_from_file(config_strongsort)
138
 
139
  # Create as many strong sort instances as there are video sources
140
+ gpu = '0'
141
  strongsort_list = []
142
  for i in range(bs):
143
  strongsort_list.append(
144
  StrongSORT(
145
  strong_sort_weights,
146
+ gpu,
147
  half,
148
+ max_dist=cfg.STRONGSORT.MAX_DIST,
149
  max_iou_distance=cfg.STRONGSORT.MAX_IOU_DISTANCE,
150
  max_age=cfg.STRONGSORT.MAX_AGE,
151
  n_init=cfg.STRONGSORT.N_INIT,
 
384
 
385
  def main(opt):
386
  # check_requirements(requirements=ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
387
+ run_strongsort(**vars(opt))
388
 
389
 
390
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -11,6 +11,7 @@ Pillow>=7.1.2
11
  psutil
12
  torchreid
13
  gdown
 
14
  PyYAML>=5.3.1
15
  requests>=2.23.0
16
  scipy>=1.4.1
 
11
  psutil
12
  torchreid
13
  gdown
14
+ =======
15
  PyYAML>=5.3.1
16
  requests>=2.23.0
17
  scipy>=1.4.1