diff --git a/Coach.py b/Coach.py index e37c027f3..3a64cf80f 100644 --- a/Coach.py +++ b/Coach.py @@ -76,15 +76,15 @@ def learn(self): # examples of the iteration if not self.skipFirstSelfPlay or i>1: iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue) - + eps_time = AverageMeter() bar = Bar('Self Play', max=self.args.numEps) end = time.time() - + for eps in range(self.args.numEps): self.mcts = MCTS(self.game, self.nnet, self.args) # reset search tree iterationTrainExamples += self.executeEpisode() - + # bookkeeping + plot progress eps_time.update(time.time() - end) end = time.time() @@ -93,16 +93,16 @@ def learn(self): bar.next() bar.finish() - # save the iteration examples to the history + # save the iteration examples to the history self.trainExamplesHistory.append(iterationTrainExamples) - + if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory: print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples") self.trainExamplesHistory.pop(0) # backup history to a file - # NB! the examples were collected using the model from the previous iteration, so (i-1) + # NB! the examples were collected using the model from the previous iteration, so (i-1) self.saveTrainExamples(i-1) - + # shuffle examples before training trainExamples = [] for e in self.trainExamplesHistory: @@ -113,7 +113,7 @@ def learn(self): self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar') pmcts = MCTS(self.game, self.pnet, self.args) - + self.nnet.train(trainExamples) nmcts = MCTS(self.game, self.nnet, self.args) @@ -129,7 +129,7 @@ def learn(self): else: print('ACCEPTING NEW MODEL') self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i)) - self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar') + self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar') def getCheckpointFile(self, iteration): return 'checkpoint_' + str(iteration) + '.pth.tar' @@ -139,6 +139,7 @@ def saveTrainExamples(self, iteration): if not os.path.exists(folder): os.makedirs(folder) filename = os.path.join(folder, self.getCheckpointFile(iteration)+".examples") + print("Save training examples to file {}".format(filename)) with open(filename, "wb+") as f: Pickler(f).dump(self.trainExamplesHistory) f.closed @@ -146,13 +147,13 @@ def saveTrainExamples(self, iteration): def loadTrainExamples(self): modelFile = os.path.join(self.args.load_folder_file[0], self.args.load_folder_file[1]) examplesFile = modelFile+".examples" + print("Load trainExamples from file {}".format(examplesFile)) if not os.path.isfile(examplesFile): print(examplesFile) r = input("File with trainExamples not found. Continue? [y|n]") if r != "y": sys.exit() else: - print("File with trainExamples found. Read it.") with open(examplesFile, "rb") as f: self.trainExamplesHistory = Unpickler(f).load() f.closed diff --git a/README.md b/README.md index df227b74d..b3831a278 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@ A simplified, highly flexible, commented and (hopefully) easy to understand implementation of self-play based reinforcement learning based on the AlphaGo Zero paper (Silver et al). It is designed to be easy to adopt for any two-player turn-based adversarial game and any deep learning framework of your choice. A sample implementation has been provided for the game of Othello in PyTorch, Keras, TensorFlow and Chainer. An accompanying tutorial can be found [here](http://web.stanford.edu/~surag/posts/alphazero.html). We also have implementations for GoBang and TicTacToe. -To use a game of your choice, subclass the classes in ```Game.py``` and ```NeuralNet.py``` and implement their functions. Example implementations for Othello can be found in ```othello/OthelloGame.py``` and ```othello/{pytorch,keras,tensorflow,chainer}/NNet.py```. +To use a game of your choice, subclass the classes in ```Game.py``` and ```NeuralNet.py``` and implement their functions. Example implementations for Othello can be found in ```othello/OthelloGame.py``` and ```othello/{pytorch,keras,tensorflow,chainer}/NNet.py```. -```Coach.py``` contains the core training loop and ```MCTS.py``` performs the Monte Carlo Tree Search. The parameters for the self-play can be specified in ```main.py```. Additional neural network parameters are in ```othello/{pytorch,keras,tensorflow,chainer}/NNet.py``` (cuda flag, batch size, epochs, learning rate etc.). +```Coach.py``` contains the core training loop and ```MCTS.py``` performs the Monte Carlo Tree Search. The parameters for the self-play can be specified in ```main.py```. Additional neural network parameters are in ```othello/{pytorch,keras,tensorflow,chainer}/NNet.py``` (cuda flag, batch size, epochs, learning rate etc.). To start training a model for Othello: ```bash @@ -13,14 +13,19 @@ python main.py Choose your framework and game in ```main.py```. ### Docker Installation -For easy environment setup, we can use [nvidia-docker](https://github.com/NVIDIA/nvidia-docker). Once you have nvidia-docker set up, we can then simply run: +For easy environment setup, we can use [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) +and [docker-compose](https://docs.docker.com/compose/compose-file/compose-file-v2/). +Once you have them set up, simply run: ``` -./setup_env.sh +docker-compose up ``` -to set up a (default: pyTorch) Jupyter docker container. We can now open a new terminal and enter: +to start a (default: pyTorch) Jupyter docker container +available at http://localhost:8888. +We can also open a new terminal and enter: ``` -docker exec -ti pytorch_notebook python main.py +docker-compose exec pytorch_notebook bash ``` +to get a Bash prompt inside the container. ### Experiments We trained a PyTorch model for 6x6 Othello (~80 iterations, 100 episodes per iteration and 25 MCTS simulations per turn). This took about 3 days on an NVIDIA Tesla K80. The pretrained model (PyTorch) can be found in ```pretrained_models/othello/pytorch/```. You can play a game against it using ```pit.py```. Below is the performance of the model against a random and a greedy baseline with the number of iterations. @@ -33,7 +38,7 @@ While the current code is fairly functional, we could benefit from the following * Game logic files for more games that follow the specifications in ```Game.py```, along with their neural networks * Neural networks in other frameworks * Pre-trained models for different game configurations -* An asynchronous version of the code- parallel processes for self-play, neural net training and model comparison. +* An asynchronous version of the code- parallel processes for self-play, neural net training and model comparison. * Asynchronous MCTS as described in the paper ### Contributors and Credits @@ -44,4 +49,3 @@ While the current code is fairly functional, we could benefit from the following * [Jernej Habjan](https://github.com/JernejHabjan) contributed RTS game. Thanks to [pytorch-classification](https://github.com/bearpaw/pytorch-classification) and [progress](https://github.com/verigak/progress). - diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..d48dde563 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,19 @@ +version: '2.3' +services: + pytorch_notebook: + image: pytorch:0.4.0-py3-gpu + runtime: nvidia + build: + context: docker + dockerfile: Dockerfile.pytorch + shm_size: '8G' + volumes: + - .:/workspace + - ./docker/jupyter_notebook_config.py:/root/.jupyter/jupyter_notebook_config.py + ports: + - ${NOTEBOOK_PORT:-8888}:8888 + - ${VISDOM_PORT:-8097}:8097 + environment: + - JUPYTER_ENABLE_LAB=1 + - NOTEBOOK_PORT= + - VISDOM_PORT=${VISDOM_PORT:-8097} diff --git a/docker/Dockerfile.pytorch b/docker/Dockerfile.pytorch index e75d5849e..1099a44a3 100644 --- a/docker/Dockerfile.pytorch +++ b/docker/Dockerfile.pytorch @@ -24,18 +24,19 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ chmod +x ~/miniconda.sh && \ ~/miniconda.sh -b -p /opt/conda && \ -rm ~/miniconda.sh && \ -/opt/conda/bin/conda install numpy pyyaml scipy cython jupyter ipython mkl mkl-include && \ -/opt/conda/bin/conda install -c soumith magma-cuda90 && \ -/opt/conda/bin/conda install pytorch=0.4.0 -c pytorch && \ -/opt/conda/bin/conda clean -ya - +rm ~/miniconda.sh ENV PATH /opt/conda/bin:$PATH +RUN conda install numpy pyyaml scipy cython jupyter ipython mkl mkl-include && \ +conda install -c soumith magma-cuda90 && \ +conda install pytorch=0.4.0 cuda90 -c pytorch && \ +conda clean -ya +# conda install pytorch torchvision cuda90 -c pytorch &&\ + # This must be done before pip so that requirements.txt is available. WORKDIR /opt/pytorch # pip install custom module listed in requirements.txt -COPY ./docker/requirements.txt . +COPY requirements.txt . RUN pip install -U pip && pip install -r requirements.txt WORKDIR /workspace @@ -47,12 +48,12 @@ EXPOSE 8888 EXPOSE 8097 # Set up our notebook config. -COPY ./docker/jupyter_notebook_config.py /root/.jupyter/ +COPY jupyter_notebook_config.py /root/.jupyter/ # Jupyter has issues with being run directly: # https://github.com/ipython/ipython/issues/7062 # We just add a little wrapper script. -COPY ./docker/run_jupyter.sh / +COPY run_jupyter.sh / RUN chmod +x /run_jupyter.sh CMD ["/run_jupyter.sh", "--allow-root"] diff --git a/docker/jupyter_notebook_config.py b/docker/jupyter_notebook_config.py index c8f58371a..ebe11a65c 100644 --- a/docker/jupyter_notebook_config.py +++ b/docker/jupyter_notebook_config.py @@ -19,13 +19,4 @@ c.NotebookApp.ip = '0.0.0.0' c.NotebookApp.port = int(os.getenv('PORT', 8888)) c.NotebookApp.open_browser = False - -# sets a password if PASSWORD is set in the environment -if 'PASSWORD' in os.environ: - password = os.environ['PASSWORD'] - if password: - c.NotebookApp.password = passwd(password) - else: - c.NotebookApp.password = '' - c.NotebookApp.token = '' - del os.environ['PASSWORD'] +c.NotebookApp.token = os.getenv('TOKEN', u'') diff --git a/main.py b/main.py index b5d1cfe2b..89986e4f5 100644 --- a/main.py +++ b/main.py @@ -29,6 +29,5 @@ c = Coach(g, nnet, args) if args.load_model: - print("Load trainExamples from file") c.loadTrainExamples() c.learn() diff --git a/othello/pytorch/NNet.py b/othello/pytorch/NNet.py index 858f33738..5145c3fce 100644 --- a/othello/pytorch/NNet.py +++ b/othello/pytorch/NNet.py @@ -132,10 +132,8 @@ def loss_v(self, targets, outputs): def save_checkpoint(self, folder='checkpoint', filename='checkpoint.pth.tar'): filepath = os.path.join(folder, filename) if not os.path.exists(folder): - print("Checkpoint Directory does not exist! Making directory {}".format(folder)) os.mkdir(folder) - else: - print("Checkpoint Directory exists! ") + print("Save checkpoint to file {}".format(filepath)) torch.save({ 'state_dict' : self.nnet.state_dict(), }, filepath) diff --git a/setup_env.sh b/setup_env.sh deleted file mode 100755 index 4e3b6ccb9..000000000 --- a/setup_env.sh +++ /dev/null @@ -1,15 +0,0 @@ -DL_ENV=${1:"pytorch"} -NOTEBOOK_PORT=${2:-8888} -VISDOM_PORT=${3:-8097} -PYTORCH_IMAGE=pytorch:0.4.0-py3-gpu - -if [ ${DL_ENV}=="pytorch" ]; then - if [[ ! $(docker images -q ${PYTORCH_IMAGE}) ]]; then - docker build . -t ${PYTORCH_IMAGE} -f ./docker/Dockerfile.pytorch - fi - # this should run a pytorch notebook container - docker run --runtime=nvidia --shm-size 8G -v `pwd`:/workspace -p ${NOTEBOOK_PORT}:8888 -p ${VISDOM_PORT}:8097 --name pytorch_notebook ${PYTORCH_IMAGE} - docker exec pytorch_notebook jupyter notebook list -else - exit 1 -fi