diff --git a/.gitignore b/.gitignore
index 84344cb0..abfabec8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,5 @@ hiddenlayer/
 __pycache__
 *.prototxt
 videos/
+*.env
+*.venv
\ No newline at end of file
diff --git a/README.md b/README.md
index 65244b36..9e6060c8 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ Don't be mean to star this repo if it helps your research.
 
 #### Install Requriements
 
-Create a python 3.7 environement, eg:
+Create a python 3.7 environement (3.9 is also a valid version for Mac users), eg:
 
     conda create -n pytorch-openpose python=3.7
     conda activate pytorch-openpose
@@ -49,7 +49,7 @@ Run:
 
 to run a demo with a feed from your webcam or run
 
-    python demo.py
+    python demo.py [-i <optional: image-file>] [-o <optional: output-image-path>]
 
 to use a image from the images folder or run 
 
diff --git a/demo.py b/demo.py
index 34ff7f47..42f55192 100644
--- a/demo.py
+++ b/demo.py
@@ -1,18 +1,29 @@
+import os
 import cv2
 import matplotlib.pyplot as plt
 import copy
 import numpy as np
 
-from src import model
+from argparse import ArgumentParser
+from tqdm import tqdm
+
 from src import util
 from src.body import Body
 from src.hand import Hand
 
+
+this_dir = os.path.dirname(__file__)
+
+parser = ArgumentParser()
+parser.add_argument("-i", "--input", type=str, default=os.path.join(this_dir, "images/demo.jpg"))
+parser.add_argument("-o", "--output", type=str)
+
+args = parser.parse_args()
+
 body_estimation = Body('model/body_pose_model.pth')
 hand_estimation = Hand('model/hand_pose_model.pth')
 
-test_image = 'images/demo.jpg'
-oriImg = cv2.imread(test_image)  # B,G,R order
+oriImg = cv2.imread(args.input)  # B,G,R order
 candidate, subset = body_estimation(oriImg)
 canvas = copy.deepcopy(oriImg)
 canvas = util.draw_bodypose(canvas, candidate, subset)
@@ -20,7 +31,7 @@
 hands_list = util.handDetect(candidate, subset, oriImg)
 
 all_hand_peaks = []
-for x, y, w, is_left in hands_list:
+for x, y, w, is_left in tqdm(hands_list):
     # cv2.rectangle(canvas, (x, y), (x+w, y+w), (0, 255, 0), 2, lineType=cv2.LINE_AA)
     # cv2.putText(canvas, 'left' if is_left else 'right', (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
 
@@ -39,6 +50,9 @@
 
 canvas = util.draw_handpose(canvas, all_hand_peaks)
 
-plt.imshow(canvas[:, :, [2, 1, 0]])
-plt.axis('off')
-plt.show()
+if args.output is None:
+    plt.imshow(canvas[:, :, [2, 1, 0]])
+    plt.axis('off')
+    plt.show()
+else:
+    cv2.imwrite(args.output)
diff --git a/demo_camera.py b/demo_camera.py
index ad32930d..eb8b0682 100644
--- a/demo_camera.py
+++ b/demo_camera.py
@@ -1,10 +1,8 @@
 import cv2
-import matplotlib.pyplot as plt
 import copy
 import numpy as np
 import torch
 
-from src import model
 from src import util
 from src.body import Body
 from src.hand import Hand
@@ -12,8 +10,6 @@
 body_estimation = Body('model/body_pose_model.pth')
 hand_estimation = Hand('model/hand_pose_model.pth')
 
-print(f"Torch device: {torch.cuda.get_device_name()}")
-
 cap = cv2.VideoCapture(0)
 cap.set(3, 640)
 cap.set(4, 480)
diff --git a/demo_video.py b/demo_video.py
index d13e2394..65cd589d 100644
--- a/demo_video.py
+++ b/demo_video.py
@@ -1,7 +1,6 @@
 import copy
 import numpy as np
 import cv2
-from glob import glob
 import os
 import argparse
 import json
@@ -10,9 +9,9 @@
 # from: https://stackoverflow.com/a/61927951
 import argparse
 import subprocess
-import sys
-from pathlib import Path
+
 from typing import NamedTuple
+from tqdm import tqdm
 
 
 class FFProbeResult(NamedTuple):
@@ -35,7 +34,6 @@ def ffprobe(file_path) -> FFProbeResult:
 
 
 # openpose setup
-from src import model
 from src import util
 from src.body import Body
 from src.hand import Hand
@@ -113,6 +111,8 @@ def close(self):
 
 
 writer = None
+pbar = tqdm(total=int(cap.get(cv2.CAP_PROP_FRAME_COUNT)))
+
 while(cap.isOpened()):
     ret, frame = cap.read()
     if frame is None:
@@ -130,6 +130,7 @@ def close(self):
 
     # write the frame
     writer(posed_frame)
+    pbar.update()
 
     if cv2.waitKey(1) & 0xFF == ord('q'):
         break
diff --git a/requirements.txt b/requirements.txt
index 1a7565d3..e862cb26 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,5 @@ matplotlib
 opencv-python
 scipy
 scikit-image
-tqdm
\ No newline at end of file
+tqdm
+accelerate
\ No newline at end of file
diff --git a/src/body.py b/src/body.py
index ecf06938..ee1bdbf5 100644
--- a/src/body.py
+++ b/src/body.py
@@ -1,12 +1,11 @@
 import cv2
 import numpy as np
 import math
-import time
 from scipy.ndimage.filters import gaussian_filter
 import matplotlib.pyplot as plt
-import matplotlib
 import torch
-from torchvision import transforms
+
+from accelerate import Accelerator
 
 from src import util
 from src.model import bodypose_model
@@ -16,6 +15,11 @@ def __init__(self, model_path):
         self.model = bodypose_model()
         if torch.cuda.is_available():
             self.model = self.model.cuda()
+            self.device = torch.device("cuda")
+        else:
+            self.device = Accelerator().device
+            self.model = self.model.to(self.device)
+
         model_dict = util.transfer(self.model, torch.load(model_path))
         self.model.load_state_dict(model_dict)
         self.model.eval()
@@ -39,9 +43,7 @@ def __call__(self, oriImg):
             im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
             im = np.ascontiguousarray(im)
 
-            data = torch.from_numpy(im).float()
-            if torch.cuda.is_available():
-                data = data.cuda()
+            data = torch.from_numpy(im).float().to(self.device)
             # data = data.permute([2, 0, 1]).unsqueeze(0).float()
             with torch.no_grad():
                 Mconv7_stage6_L1, Mconv7_stage6_L2 = self.model(data)
diff --git a/src/hand.py b/src/hand.py
index 808aa13a..790651d1 100644
--- a/src/hand.py
+++ b/src/hand.py
@@ -1,13 +1,10 @@
 import cv2
-import json
 import numpy as np
-import math
-import time
 from scipy.ndimage.filters import gaussian_filter
-import matplotlib.pyplot as plt
-import matplotlib
 import torch
+
 from skimage.measure import label
+from accelerate import Accelerator
 
 from src.model import handpose_model
 from src import util
@@ -17,6 +14,11 @@ def __init__(self, model_path):
         self.model = handpose_model()
         if torch.cuda.is_available():
             self.model = self.model.cuda()
+            self.device = torch.device("cuda")
+        else:
+            self.device = Accelerator().device
+            self.model = self.model.to(self.device)
+
         model_dict = util.transfer(self.model, torch.load(model_path))
         self.model.load_state_dict(model_dict)
         self.model.eval()
@@ -40,8 +42,7 @@ def __call__(self, oriImg):
             im = np.ascontiguousarray(im)
 
             data = torch.from_numpy(im).float()
-            if torch.cuda.is_available():
-                data = data.cuda()
+            data = data.to(self.device)
             # data = data.permute([2, 0, 1]).unsqueeze(0).float()
             with torch.no_grad():
                 output = self.model(data).cpu().numpy()
diff --git a/src/hand_model_outputsize.py b/src/hand_model_outputsize.py
index 57dd0705..b8e4bbeb 100644
--- a/src/hand_model_outputsize.py
+++ b/src/hand_model_outputsize.py
@@ -2,6 +2,8 @@
 from tqdm import tqdm
 import json
 
+from accelerate import Accelerator
+
 from src.model import handpose_model
 
 model = handpose_model()
@@ -11,6 +13,9 @@
     data = torch.randn(1, 3, i, i)
     if torch.cuda.is_available():
         data = data.cuda()
+    else:
+        data = data.to(Accelerator().device)
+
     size[i] = model(data).size(2)
 
 with open('hand_model_output_size.json') as f: