diff --git a/README.md b/README.md index 0be125c..da5cb28 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Toyota Technological Institute at Chicago ## Introduction -DeepLab is a series of image semantic segmentation models, whose latest version, i.e. v3+, proves to be the state-of-art. Its major contribution is the use of atrous spatial pyramid pooling (ASPP) operation at the end of the encoder. While the model works extremely well, its open sourced code is hard to read (at least from my personal perspective). Here we reimplemented DeepLab v3, the earlier version of v3+ (which only additionally employs the decoder architecture), in a much simpler and understandable way. +DeepLab is a series of image semantic segmentation models, whose latest version, i.e. v3+, proves to be the state-of-art. Its major contribution is the use of atrous spatial pyramid pooling (ASPP) operation at the end of the encoder. While the model works extremely well, its open source code is hard to read (at least from my personal perspective). Here we re-implemented DeepLab v3, the earlier version of v3+ (which only additionally employs the decoder architecture), in a much simpler and more understandable way. ## Dependencies @@ -32,7 +32,8 @@ DeepLab is a series of image semantic segmentation models, whose latest version, ├── nets ├── README.md ├── test_demo.py -├── test_any_image.py +├── test_demo_any.py +├── test_demo_cam.py ├── train.py └── utils.py ``` @@ -172,11 +173,23 @@ Image| Label | Prediction | :-------------------------:|:-------------------------:|:-------------------------: ![](data/demos/deeplab/resnet_101_voc2012/image_3.jpg) | ![](data/demos/deeplab/resnet_101_voc2012/image_3_label.png) | ![](data/demos/deeplab/resnet_101_voc2012/image_3_prediction.png) -## Running demo on your own images:
-Just put some JPG images into demo_dir and run the script test_any_image.py -Results will be written into same folder. -Make sure that proper model trained and a checkpoint is saved in models_dir -See the script for details. +## Custom Demo + +Just put some JPG-format images into `demo_dir` and run the following command in the terminal. + +```bash +$ python test_demo_any.py +``` +Results will be written into same folder. Make sure that proper model trained and a checkpoint is saved in `models_dir`. See the script for details. + +Contributed by [pinaxe1](https://github.com/leimao/DeepLab_v3/pull/7). Will modify to accept arguments and multiple image formats. + +## Camera Demo + +The script `test_demo_cam.py` does inference (semantic segmentation) on videostream from a camera. +Make sure you have trained model and set an existing checkpoint filename as a model_filename. +Then run the script and watch output in cv2.namedWindow. +To stop the script press the "q" button. ## References diff --git a/test_demo_cam.py b/test_demo_cam.py new file mode 100644 index 0000000..04c1f37 --- /dev/null +++ b/test_demo_cam.py @@ -0,0 +1,46 @@ +''' +The script does inference (semantic segmentation) on videostream from camera. +Just run the script and watch output in cv2.namedWindow. +Make sure you have trained model and set an existing checkpoint filename as a model_filename +To stop the script press the "q" button. + +Created on Sun Sep 15 19:53:37 2019 +@author: Pinaxe +''' + +from os import path as osp +import numpy as np +import cv2 + +from model import DeepLab +from utils import ( save_load_means, subtract_channel_means, label_to_color_image) + +if __name__ == '__main__': + cap = cv2.VideoCapture(0) + cv2.namedWindow('frame', cv2.WINDOW_NORMAL) + cv2.namedWindow('reslt', cv2.WINDOW_NORMAL) + model_filename = 'data/models/deeplab/resnet_101_voc2012/resnet_101_0.3685.ckpt' + + channel_means = save_load_means(means_filename='channel_means.npz',image_filenames=None, recalculate=False) + + deeplab = DeepLab('resnet_101', training=False) + deeplab.load(model_filename) + + while(True): + _, frame = cap.read() + cv2.imshow('frame', frame) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + image=frame + image_input = subtract_channel_means(image=image, channel_means=channel_means) + output = deeplab.test(inputs=[image_input], target_height=image.shape[0], target_width=image.shape[1])[0] + + img=label_to_color_image(np.argmax(output, axis=-1)) + img=img.astype(np.uint8) + cv2.imshow('reslt', img) + + + cap.release() + cv2.destroyAllWindows() + deeplab.close()