suragnair · vochicong · Mar 27, 2019 · Mar 27, 2019 · Apr 1, 2019 · Apr 2, 2019
diff --git a/Coach.py b/Coach.py
@@ -76,15 +76,15 @@ def learn(self):
             # examples of the iteration
             if not self.skipFirstSelfPlay or i>1:
                 iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)
-    
+
                 eps_time = AverageMeter()
                 bar = Bar('Self Play', max=self.args.numEps)
                 end = time.time()
-    
+
                 for eps in range(self.args.numEps):
                     self.mcts = MCTS(self.game, self.nnet, self.args)   # reset search tree
                     iterationTrainExamples += self.executeEpisode()
-    
+
                     # bookkeeping + plot progress
                     eps_time.update(time.time() - end)
                     end = time.time()
@@ -93,16 +93,16 @@ def learn(self):
                     bar.next()
                 bar.finish()
 
-                # save the iteration examples to the history 
+                # save the iteration examples to the history
                 self.trainExamplesHistory.append(iterationTrainExamples)
-                
+
             if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
                 print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
                 self.trainExamplesHistory.pop(0)
             # backup history to a file
-            # NB! the examples were collected using the model from the previous iteration, so (i-1)  
+            # NB! the examples were collected using the model from the previous iteration, so (i-1)
             self.saveTrainExamples(i-1)
-            
+
             # shuffle examples before training
             trainExamples = []
             for e in self.trainExamplesHistory:
@@ -113,7 +113,7 @@ def learn(self):
             self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
             self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
             pmcts = MCTS(self.game, self.pnet, self.args)
-            
+
             self.nnet.train(trainExamples)
             nmcts = MCTS(self.game, self.nnet, self.args)
 
@@ -129,7 +129,7 @@ def learn(self):
             else:
                 print('ACCEPTING NEW MODEL')
                 self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
-                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')                
+                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
 
     def getCheckpointFile(self, iteration):
         return 'checkpoint_' + str(iteration) + '.pth.tar'
@@ -139,20 +139,21 @@ def saveTrainExamples(self, iteration):
         if not os.path.exists(folder):
             os.makedirs(folder)
         filename = os.path.join(folder, self.getCheckpointFile(iteration)+".examples")
+        print("Save training examples to file {}".format(filename))
         with open(filename, "wb+") as f:
             Pickler(f).dump(self.trainExamplesHistory)
         f.closed
 
     def loadTrainExamples(self):
         modelFile = os.path.join(self.args.load_folder_file[0], self.args.load_folder_file[1])
         examplesFile = modelFile+".examples"
+        print("Load trainExamples from file {}".format(examplesFile))
         if not os.path.isfile(examplesFile):
             print(examplesFile)
             r = input("File with trainExamples not found. Continue? [y|n]")
             if r != "y":
                 sys.exit()
         else:
-            print("File with trainExamples found. Read it.")
             with open(examplesFile, "rb") as f:
                 self.trainExamplesHistory = Unpickler(f).load()
             f.closed

diff --git a/README.md b/README.md
@@ -2,9 +2,9 @@
 
 A simplified, highly flexible, commented and (hopefully) easy to understand implementation of self-play based reinforcement learning based on the AlphaGo Zero paper (Silver et al). It is designed to be easy to adopt for any two-player turn-based adversarial game and any deep learning framework of your choice. A sample implementation has been provided for the game of Othello in PyTorch, Keras, TensorFlow and Chainer. An accompanying tutorial can be found [here](http://web.stanford.edu/~surag/posts/alphazero.html). We also have implementations for GoBang and TicTacToe.
 
-To use a game of your choice, subclass the classes in ```Game.py``` and ```NeuralNet.py``` and implement their functions. Example implementations for Othello can be found in ```othello/OthelloGame.py``` and ```othello/{pytorch,keras,tensorflow,chainer}/NNet.py```. 
+To use a game of your choice, subclass the classes in ```Game.py``` and ```NeuralNet.py``` and implement their functions. Example implementations for Othello can be found in ```othello/OthelloGame.py``` and ```othello/{pytorch,keras,tensorflow,chainer}/NNet.py```.
 
-```Coach.py``` contains the core training loop and ```MCTS.py``` performs the Monte Carlo Tree Search. The parameters for the self-play can be specified in ```main.py```. Additional neural network parameters are in ```othello/{pytorch,keras,tensorflow,chainer}/NNet.py``` (cuda flag, batch size, epochs, learning rate etc.). 
+```Coach.py``` contains the core training loop and ```MCTS.py``` performs the Monte Carlo Tree Search. The parameters for the self-play can be specified in ```main.py```. Additional neural network parameters are in ```othello/{pytorch,keras,tensorflow,chainer}/NNet.py``` (cuda flag, batch size, epochs, learning rate etc.).
 
 To start training a model for Othello:
 ```bash
@@ -13,14 +13,19 @@ python main.py
 Choose your framework and game in ```main.py```.
 
 ### Docker Installation
-For easy environment setup, we can use [nvidia-docker](https://github.com/NVIDIA/nvidia-docker). Once you have nvidia-docker set up, we can then simply run:
+For easy environment setup, we can use [nvidia-docker](https://github.com/NVIDIA/nvidia-docker)
+and [docker-compose](https://docs.docker.com/compose/compose-file/compose-file-v2/).
+Once you have them set up, simply run:
 ```
-./setup_env.sh
+docker-compose up
 ```
-to set up a (default: pyTorch) Jupyter docker container. We can now open a new terminal and enter:
+to start a (default: pyTorch) Jupyter docker container
+available at http://localhost:8888.
+We can also open a new terminal and enter:
 ```
-docker exec -ti pytorch_notebook python main.py
+docker-compose exec pytorch_notebook bash
 ```
+to get a Bash prompt inside the container.
 
 ### Experiments
 We trained a PyTorch model for 6x6 Othello (~80 iterations, 100 episodes per iteration and 25 MCTS simulations per turn). This took about 3 days on an NVIDIA Tesla K80. The pretrained model (PyTorch) can be found in ```pretrained_models/othello/pytorch/```. You can play a game against it using ```pit.py```. Below is the performance of the model against a random and a greedy baseline with the number of iterations.
@@ -33,7 +38,7 @@ While the current code is fairly functional, we could benefit from the following
 * Game logic files for more games that follow the specifications in ```Game.py```, along with their neural networks
 * Neural networks in other frameworks
 * Pre-trained models for different game configurations
-* An asynchronous version of the code- parallel processes for self-play, neural net training and model comparison. 
+* An asynchronous version of the code- parallel processes for self-play, neural net training and model comparison.
 * Asynchronous MCTS as described in the paper
 
 ### Contributors and Credits
@@ -44,4 +49,3 @@ While the current code is fairly functional, we could benefit from the following
 * [Jernej Habjan](https://github.com/JernejHabjan) contributed RTS game.
 
 Thanks to [pytorch-classification](https://github.com/bearpaw/pytorch-classification) and [progress](https://github.com/verigak/progress).
-
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,19 @@
+version: '2.3'
+services:
+  pytorch_notebook:
+    image: pytorch:0.4.0-py3-gpu
+    runtime: nvidia
+    build:
+      context: docker
+      dockerfile: Dockerfile.pytorch
+      shm_size: '8G'
+    volumes:
+      - .:/workspace
+      - ./docker/jupyter_notebook_config.py:/root/.jupyter/jupyter_notebook_config.py
+    ports:
+      - ${NOTEBOOK_PORT:-8888}:8888
+      - ${VISDOM_PORT:-8097}:8097
+    environment:
+      - JUPYTER_ENABLE_LAB=1
+      - NOTEBOOK_PORT=
+      - VISDOM_PORT=${VISDOM_PORT:-8097}
diff --git a/docker/Dockerfile.pytorch b/docker/Dockerfile.pytorch
@@ -24,18 +24,19 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 RUN curl -o ~/miniconda.sh -O  https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh  && \
 chmod +x ~/miniconda.sh && \
 ~/miniconda.sh -b -p /opt/conda && \
-rm ~/miniconda.sh && \
-/opt/conda/bin/conda install numpy pyyaml scipy cython jupyter ipython mkl mkl-include && \
-/opt/conda/bin/conda install -c soumith magma-cuda90 && \
-/opt/conda/bin/conda install pytorch=0.4.0 -c pytorch && \
-/opt/conda/bin/conda clean -ya
-
+rm ~/miniconda.sh
 ENV PATH /opt/conda/bin:$PATH
+RUN conda install numpy pyyaml scipy cython jupyter ipython mkl mkl-include && \
+conda install -c soumith magma-cuda90 && \
+conda install pytorch=0.4.0 cuda90 -c pytorch && \
+conda clean -ya
+# conda install pytorch torchvision cuda90 -c pytorch &&\
+
 
 # This must be done before pip so that requirements.txt is available.
 WORKDIR /opt/pytorch
 # pip install custom module listed in requirements.txt
-COPY ./docker/requirements.txt .
+COPY requirements.txt .
 RUN pip install -U pip && pip install -r requirements.txt
 
 WORKDIR /workspace
@@ -47,12 +48,12 @@ EXPOSE 8888
 EXPOSE 8097
 
 # Set up our notebook config.
-COPY ./docker/jupyter_notebook_config.py /root/.jupyter/
+COPY jupyter_notebook_config.py /root/.jupyter/
 
 # Jupyter has issues with being run directly:
 #   https://github.com/ipython/ipython/issues/7062
 # We just add a little wrapper script.
-COPY ./docker/run_jupyter.sh /
+COPY run_jupyter.sh /
 RUN chmod +x /run_jupyter.sh
 
 CMD ["/run_jupyter.sh", "--allow-root"]
diff --git a/docker/jupyter_notebook_config.py b/docker/jupyter_notebook_config.py
@@ -19,13 +19,4 @@
 c.NotebookApp.ip = '0.0.0.0'
 c.NotebookApp.port = int(os.getenv('PORT', 8888))
 c.NotebookApp.open_browser = False
-
-# sets a password if PASSWORD is set in the environment
-if 'PASSWORD' in os.environ:
-  password = os.environ['PASSWORD']
-  if password:
-    c.NotebookApp.password = passwd(password)
-  else:
-    c.NotebookApp.password = ''
-    c.NotebookApp.token = ''
-  del os.environ['PASSWORD']
+c.NotebookApp.token = os.getenv('TOKEN', u'')
diff --git a/main.py b/main.py
@@ -29,6 +29,5 @@
 
     c = Coach(g, nnet, args)
     if args.load_model:
-        print("Load trainExamples from file")
         c.loadTrainExamples()
     c.learn()
diff --git a/othello/pytorch/NNet.py b/othello/pytorch/NNet.py
@@ -132,10 +132,8 @@ def loss_v(self, targets, outputs):
     def save_checkpoint(self, folder='checkpoint', filename='checkpoint.pth.tar'):
         filepath = os.path.join(folder, filename)
         if not os.path.exists(folder):
-            print("Checkpoint Directory does not exist! Making directory {}".format(folder))
             os.mkdir(folder)
-        else:
-            print("Checkpoint Directory exists! ")
+        print("Save checkpoint to file {}".format(filepath))
         torch.save({
             'state_dict' : self.nnet.state_dict(),
         }, filepath)

diff --git a/setup_env.sh b/setup_env.sh