diff --git a/README.md b/README.md index 7445a5d..da951f8 100644 --- a/README.md +++ b/README.md @@ -2,16 +2,19 @@ About ===== Implementation of [Neural Programmer-Interpreters](http://arxiv.org/abs/1511.06279) with Keras. +Modified to use Python2 instead of Python3 and with example Jupyter notebooks (in the notebooks directory). How to Demo =========== [Demo Movie](https://youtu.be/s7PuBqwI2YA) +[View the Notebook](notebooks/TrainModel.ipynb) + requirement ----------- -* Python3 +* Python2 setup ----- diff --git a/notebooks/TrainModel.ipynb b/notebooks/TrainModel.ipynb new file mode 100644 index 0000000..0ade0ae --- /dev/null +++ b/notebooks/TrainModel.ipynb @@ -0,0 +1,947 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train the Model\n", + "The notebook is a copy of the training_model.py script designed for interactive use. Use the notebook after you have already creating training data using the following instructions. To run them inside jupyter add an extra ../ to the beginning.\n", + "\n", + "### create training dataset\n", + "```\n", + "sh src/run_create_addition_data.sh\n", + "```\n", + "\n", + "### create training dataset with showing steps on terminal\n", + "```\n", + "DEBUG=1 sh src/run_create_addition_data.sh\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using Theano backend.\n" + ] + } + ], + "source": [ + "# coding: utf-8\n", + "import sys\n", + "\n", + "from __future__ import with_statement\n", + "from __future__ import absolute_import\n", + "import os\n", + "import pickle\n", + "sys.path.append('../src') # for the NPI tools\n", + "from npi.add.config import FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH\n", + "from npi.add.lib import AdditionEnv, AdditionProgramSet, AdditionTeacher, create_char_map, create_questions, run_npi\n", + "from npi.add.model import AdditionNPIModel\n", + "from npi.core import ResultLogger, RuntimeSystem\n", + "from npi.terminal_core import TerminalNPIRunner, Terminal\n", + "from io import open\n", + "from glob import glob\n", + "from IPython.display import SVG" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "train_datasets = glob('../data/*.pkl')\n", + "assert len(train_datasets) > 0\n", + "model_path = '../data/addition.model'" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "DEBUG_MODE = os.environ.get('DEBUG')\n", + "system = RuntimeSystem()\n", + "program_set = AdditionProgramSet()\n", + "with open(train_datasets[0], 'rb') as f:\n", + " steps_list = pickle.load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "npi_model = AdditionNPIModel(system, model_path, program_set)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "G\n", + "\n", + "\n", + "4599299216\n", + "\n", + "f_enc_convert (Sequential)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "[(1, 44), (1, 30)]\n", + "\n", + "(1, 1, 128)\n", + "\n", + "\n", + "5033807632\n", + "\n", + "merge_2 (Merge)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "[(1, 1, 128), (1, 1, 5)]\n", + "\n", + "(1, 1, 133)\n", + "\n", + "\n", + "4599299216->5033807632\n", + "\n", + "\n", + "\n", + "\n", + "4625154448\n", + "\n", + "program_embedding (Sequential)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 1)\n", + "\n", + "(1, 1, 5)\n", + "\n", + "\n", + "4625154448->5033807632\n", + "\n", + "\n", + "\n", + "\n", + "5009279184\n", + "\n", + "lstm_1 (LSTM)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 1, 133)\n", + "\n", + "(1, 256)\n", + "\n", + "\n", + "5033807632->5009279184\n", + "\n", + "\n", + "\n", + "\n", + "5074307728\n", + "\n", + "relu_lstm_1 (Activation)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 256)\n", + "\n", + "(1, 256)\n", + "\n", + "\n", + "5009279184->5074307728\n", + "\n", + "\n", + "\n", + "\n", + "4508958928\n", + "\n", + "repeatvector_2 (RepeatVector)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 256)\n", + "\n", + "(1, 1, 256)\n", + "\n", + "\n", + "5074307728->4508958928\n", + "\n", + "\n", + "\n", + "\n", + "4738189776\n", + "\n", + "lstm_2 (LSTM)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 1, 256)\n", + "\n", + "(1, 256)\n", + "\n", + "\n", + "4508958928->4738189776\n", + "\n", + "\n", + "\n", + "\n", + "4599983056\n", + "\n", + "relu_lstm_2 (Activation)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 256)\n", + "\n", + "(1, 256)\n", + "\n", + "\n", + "4738189776->4599983056\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "SVG('f_lstm.svg')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "G\n", + "\n", + "\n", + "4599299216\n", + "\n", + "f_enc_convert (Sequential)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "[(1, 44), (1, 30)]\n", + "\n", + "(1, 1, 128)\n", + "\n", + "\n", + "5033807632\n", + "\n", + "merge_2 (Merge)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "[(1, 1, 128), (1, 1, 5)]\n", + "\n", + "(1, 1, 133)\n", + "\n", + "\n", + "4599299216->5033807632\n", + "\n", + "\n", + "\n", + "\n", + "4625154448\n", + "\n", + "program_embedding (Sequential)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 1)\n", + "\n", + "(1, 1, 5)\n", + "\n", + "\n", + "4625154448->5033807632\n", + "\n", + "\n", + "\n", + "\n", + "5009279184\n", + "\n", + "lstm_1 (LSTM)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 1, 133)\n", + "\n", + "(1, 256)\n", + "\n", + "\n", + "5033807632->5009279184\n", + "\n", + "\n", + "\n", + "\n", + "5074307728\n", + "\n", + "relu_lstm_1 (Activation)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 256)\n", + "\n", + "(1, 256)\n", + "\n", + "\n", + "5009279184->5074307728\n", + "\n", + "\n", + "\n", + "\n", + "4508958928\n", + "\n", + "repeatvector_2 (RepeatVector)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 256)\n", + "\n", + "(1, 1, 256)\n", + "\n", + "\n", + "5074307728->4508958928\n", + "\n", + "\n", + "\n", + "\n", + "4738189776\n", + "\n", + "lstm_2 (LSTM)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 1, 256)\n", + "\n", + "(1, 256)\n", + "\n", + "\n", + "4508958928->4738189776\n", + "\n", + "\n", + "\n", + "\n", + "4599983056\n", + "\n", + "relu_lstm_2 (Activation)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 256)\n", + "\n", + "(1, 256)\n", + "\n", + "\n", + "4738189776->4599983056\n", + "\n", + "\n", + "\n", + "\n", + "4495123344\n", + "\n", + "dense_2 (Dense)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 256)\n", + "\n", + "(1, 5)\n", + "\n", + "\n", + "4599983056->4495123344\n", + "\n", + "\n", + "\n", + "\n", + "4743679824\n", + "\n", + "dense_3 (Dense)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 5)\n", + "\n", + "(1, 10)\n", + "\n", + "\n", + "4495123344->4743679824\n", + "\n", + "\n", + "\n", + "\n", + "5072433936\n", + "\n", + "softmax_prog (Activation)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 10)\n", + "\n", + "(1, 10)\n", + "\n", + "\n", + "4743679824->5072433936\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "SVG('f_prog.svg')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "G\n", + "\n", + "\n", + "4599299216\n", + "\n", + "f_enc_convert (Sequential)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "[(1, 44), (1, 30)]\n", + "\n", + "(1, 1, 128)\n", + "\n", + "\n", + "5033807632\n", + "\n", + "merge_2 (Merge)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "[(1, 1, 128), (1, 1, 5)]\n", + "\n", + "(1, 1, 133)\n", + "\n", + "\n", + "4599299216->5033807632\n", + "\n", + "\n", + "\n", + "\n", + "4625154448\n", + "\n", + "program_embedding (Sequential)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 1)\n", + "\n", + "(1, 1, 5)\n", + "\n", + "\n", + "4625154448->5033807632\n", + "\n", + "\n", + "\n", + "\n", + "5009279184\n", + "\n", + "lstm_1 (LSTM)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 1, 133)\n", + "\n", + "(1, 256)\n", + "\n", + "\n", + "5033807632->5009279184\n", + "\n", + "\n", + "\n", + "\n", + "5074307728\n", + "\n", + "relu_lstm_1 (Activation)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 256)\n", + "\n", + "(1, 256)\n", + "\n", + "\n", + "5009279184->5074307728\n", + "\n", + "\n", + "\n", + "\n", + "4508958928\n", + "\n", + "repeatvector_2 (RepeatVector)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 256)\n", + "\n", + "(1, 1, 256)\n", + "\n", + "\n", + "5074307728->4508958928\n", + "\n", + "\n", + "\n", + "\n", + "4738189776\n", + "\n", + "lstm_2 (LSTM)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 1, 256)\n", + "\n", + "(1, 256)\n", + "\n", + "\n", + "4508958928->4738189776\n", + "\n", + "\n", + "\n", + "\n", + "4599983056\n", + "\n", + "relu_lstm_2 (Activation)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 256)\n", + "\n", + "(1, 256)\n", + "\n", + "\n", + "4738189776->4599983056\n", + "\n", + "\n", + "\n", + "\n", + "4495123344\n", + "\n", + "dense_2 (Dense)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 256)\n", + "\n", + "(1, 5)\n", + "\n", + "\n", + "4599983056->4495123344\n", + "\n", + "\n", + "\n", + "\n", + "4968395088\n", + "\n", + "dense_1 (Dense)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 256)\n", + "\n", + "(1, 1)\n", + "\n", + "\n", + "4599983056->4968395088\n", + "\n", + "\n", + "\n", + "\n", + "4602434768\n", + "\n", + "dense_4 (Dense)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 256)\n", + "\n", + "(1, 10)\n", + "\n", + "\n", + "4599983056->4602434768\n", + "\n", + "\n", + "\n", + "\n", + "4971146064\n", + "\n", + "dense_5 (Dense)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 256)\n", + "\n", + "(1, 10)\n", + "\n", + "\n", + "4599983056->4971146064\n", + "\n", + "\n", + "\n", + "\n", + "4765728400\n", + "\n", + "dense_6 (Dense)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 256)\n", + "\n", + "(1, 10)\n", + "\n", + "\n", + "4599983056->4765728400\n", + "\n", + "\n", + "\n", + "\n", + "4743679824\n", + "\n", + "dense_3 (Dense)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 5)\n", + "\n", + "(1, 10)\n", + "\n", + "\n", + "4495123344->4743679824\n", + "\n", + "\n", + "\n", + "\n", + "4448914256\n", + "\n", + "sigmoid_end (Activation)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 1)\n", + "\n", + "(1, 1)\n", + "\n", + "\n", + "4968395088->4448914256\n", + "\n", + "\n", + "\n", + "\n", + "5072433936\n", + "\n", + "softmax_prog (Activation)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 10)\n", + "\n", + "(1, 10)\n", + "\n", + "\n", + "4743679824->5072433936\n", + "\n", + "\n", + "\n", + "\n", + "4971145232\n", + "\n", + "softmax_arg1 (Activation)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 10)\n", + "\n", + "(1, 10)\n", + "\n", + "\n", + "4602434768->4971145232\n", + "\n", + "\n", + "\n", + "\n", + "4823026000\n", + "\n", + "softmax_arg2 (Activation)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 10)\n", + "\n", + "(1, 10)\n", + "\n", + "\n", + "4971146064->4823026000\n", + "\n", + "\n", + "\n", + "\n", + "4744310288\n", + "\n", + "softmax_arg3 (Activation)\n", + "\n", + "input:\n", + "\n", + "output:\n", + "\n", + "(1, 10)\n", + "\n", + "(1, 10)\n", + "\n", + "\n", + "4765728400->4744310288\n", + "\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "SVG('model.svg')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fitting\n", + "The actual code to fit the model with the training data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training f_enc\n", + "ep 0: loss=1.03819\n", + "ep 1: loss=0.242629\n", + "ep 2: loss=0.094752\n", + "ep 3: loss=0.0467588\n", + "ep 4: loss=0.0263074\n", + "ep 5: loss=0.0160311\n", + "ep 6: loss=0.0102503\n", + "ep 7: loss=0.00677445\n", + "ep 8: loss=0.00456079\n", + "ep 9: loss=0.00311668\n", + "ep 10: loss=0.00214863\n", + "ep 11: loss=0.00149294\n", + "ep 12: loss=0.00103956\n", + "ep 13: loss=0.000728405\n", + "ep 14: loss=0.000511663\n", + "ep 15: loss=0.000359279\n", + "ep 16: loss=0.000252763\n", + "ep 17: loss=0.000177881\n", + "ep 18: loss=0.000125569\n", + "ep 19: loss=8.86349e-05\n", + "ep 20: loss=6.25864e-05\n", + "ep 21: loss=4.4173e-05\n", + "ep 22: loss=3.12042e-05\n", + "ep 23: loss=2.20769e-05\n", + "ep 24: loss=1.56068e-05\n", + "ep 25: loss=1.10763e-05\n", + "ep 26: loss=7.82133e-06\n", + "ep 27: loss=5.57107e-06\n", + "ep 28: loss=3.95074e-06\n", + "ep 29: loss=2.81476e-06\n", + "ep 30: loss=2.01948e-06\n", + "ep 31: loss=1.45326e-06\n", + "ep 32: loss=1.0526e-06\n", + "ep 33: loss=7.7144e-07\n", + "Re-Compile Model lr=0.0001 aw=1.0\n", + "training questions of a<100 and b<100" + ] + } + ], + "source": [ + "npi_model.fit(steps_list)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/requirements.txt b/requirements.txt index 5f3ff60..3620361 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ -h5py==2.6.0 -Keras==1.0.2 -numpy==1.11.0 -pydot-ng==1.0.0 -pyparsing==2.1.1 -PyYAML==3.11 -scipy==0.17.0 -six==1.10.0 -Theano==0.8.2 +h5py>=2.6.0 +Keras>=1.0.2 +numpy>=1.11.0 +pydot-ng>=1.0.0 +pyparsing>=2.1.1 +PyYAML>=3.11 +scipy>=0.17.0 +six>=1.10.0 +Theano>=0.8.2 diff --git a/src/npi/add/create_training_data.py b/src/npi/add/create_training_data.py index aeab650..08d7143 100644 --- a/src/npi/add/create_training_data.py +++ b/src/npi/add/create_training_data.py @@ -1,4 +1,6 @@ # coding: utf-8 +from __future__ import with_statement +from __future__ import absolute_import import os import curses import pickle @@ -8,9 +10,10 @@ from npi.add.lib import AdditionEnv, AdditionProgramSet, AdditionTeacher, create_char_map, create_questions, run_npi from npi.core import ResultLogger from npi.terminal_core import TerminalNPIRunner, Terminal +from io import open -def main(stdscr, filename: str, num: int, result_logger: ResultLogger): +def main(stdscr, filename, num, result_logger): terminal = Terminal(stdscr, create_char_map()) terminal.init_window(FIELD_WIDTH, FIELD_ROW) program_set = AdditionProgramSet() @@ -45,4 +48,4 @@ def main(stdscr, filename: str, num: int, result_logger: ResultLogger): num_data = int(sys.argv[2]) if len(sys.argv) > 2 else 1000 log_filename = sys.argv[3] if len(sys.argv) > 3 else 'result.log' curses.wrapper(main, output_filename, num_data, ResultLogger(log_filename)) - print("create %d training data" % num_data) + print "create %d training data" % num_data diff --git a/src/npi/add/lib.py b/src/npi/add/lib.py index 5aa21de..5f083fe 100644 --- a/src/npi/add/lib.py +++ b/src/npi/add/lib.py @@ -1,4 +1,5 @@ # coding: utf-8 +from __future__ import absolute_import from random import random import numpy as np @@ -9,7 +10,7 @@ __author__ = 'k_morishita' -class AdditionEnv: +class AdditionEnv(object): """ Environment of Addition """ @@ -23,9 +24,9 @@ def reset(self): self.screen.fill(0) self.pointers = [self.screen.width-1] * self.screen.height # rightmost - def get_observation(self) -> np.ndarray: + def get_observation(self): value = [] - for row in range(len(self.pointers)): + for row in xrange(len(self.pointers)): value.append(self.to_one_hot(self.screen[row, self.pointers[row]])) return np.array(value) # shape of FIELD_ROW * FIELD_DEPTH @@ -70,7 +71,7 @@ class MovePtrProgram(Program): TO_LEFT = 0 TO_RIGHT = 1 - def do(self, env: AdditionEnv, args: IntegerArguments): + def do(self, env, args): ptr_kind = args.decode_at(0) left_or_right = args.decode_at(1) env.move_pointer(ptr_kind, left_or_right) @@ -81,13 +82,13 @@ class WriteProgram(Program): WRITE_TO_CARRY = 0 WRITE_TO_OUTPUT = 1 - def do(self, env: AdditionEnv, args: IntegerArguments): + def do(self, env, args): row = 2 if args.decode_at(0) == self.WRITE_TO_CARRY else 3 digit = args.decode_at(1) env.write(row, digit+1) -class AdditionProgramSet: +class AdditionProgramSet(object): NOP = Program('NOP') MOVE_PTR = MovePtrProgram('MOVE_PTR', 4, 2) # PTR_KIND(4), LEFT_OR_RIGHT(2) WRITE = WriteProgram('WRITE', 2, 10) # CARRY_OR_OUT(2), DIGITS(10) @@ -109,17 +110,17 @@ def __init__(self): self.register(self.LSHIFT) self.register(self.RSHIFT) - def register(self, pg: Program): + def register(self, pg): pg.program_id = self.program_id self.map[pg.program_id] = pg self.program_id += 1 - def get(self, i: int): + def get(self, i): return self.map.get(i) class AdditionTeacher(NPIStep): - def __init__(self, program_set: AdditionProgramSet): + def __init__(self, program_set): self.pg_set = program_set self.step_queue = None self.step_queue_stack = [] @@ -141,7 +142,7 @@ def register_subprogram(self, pg, method): self.sub_program[pg.program_id] = method @staticmethod - def decode_params(env_observation: np.ndarray, arguments: IntegerArguments): + def decode_params(env_observation, arguments): return env_observation.argmax(axis=1), arguments.decode_all() def enter_function(self): @@ -151,7 +152,7 @@ def enter_function(self): def exit_function(self): self.step_queue = self.step_queue_stack.pop() - def step(self, env_observation: np.ndarray, pg: Program, arguments: IntegerArguments) -> StepOutput: + def step(self, env_observation, pg, arguments): if not self.step_queue: self.step_queue = self.sub_program[pg.program_id](env_observation, arguments) if self.step_queue: @@ -162,17 +163,17 @@ def step(self, env_observation: np.ndarray, pg: Program, arguments: IntegerArgum return ret @staticmethod - def convert_for_step_return(step_values: tuple) -> StepOutput: + def convert_for_step_return(step_values): if len(step_values) == 2: return StepOutput(PG_CONTINUE, step_values[0], IntegerArguments(step_values[1])) else: return StepOutput(step_values[0], step_values[1], IntegerArguments(step_values[2])) @staticmethod - def pg_primitive(env_observation: np.ndarray, arguments: IntegerArguments): + def pg_primitive(env_observation, arguments): return None - def pg_add(self, env_observation: np.ndarray, arguments: IntegerArguments): + def pg_add(self, env_observation, arguments): ret = [] (in1, in2, carry, output), (a1, a2, a3) = self.decode_params(env_observation, arguments) if in1 == 0 and in2 == 0 and carry == 0: @@ -181,7 +182,7 @@ def pg_add(self, env_observation: np.ndarray, arguments: IntegerArguments): ret.append((self.pg_set.LSHIFT, None)) return ret - def pg_add1(self, env_observation: np.ndarray, arguments: IntegerArguments): + def pg_add1(self, env_observation, arguments): ret = [] p = self.pg_set (in1, in2, carry, output), (a1, a2, a3) = self.decode_params(env_observation, arguments) @@ -200,7 +201,7 @@ def sum_ch_list(ch_list): ret += ch - 1 return ret - def pg_carry(self, env_observation: np.ndarray, arguments: IntegerArguments): + def pg_carry(self, env_observation, arguments): ret = [] p = self.pg_set ret.append((p.MOVE_PTR, (p.MOVE_PTR.PTR_CARRY, p.MOVE_PTR.TO_LEFT))) @@ -208,7 +209,7 @@ def pg_carry(self, env_observation: np.ndarray, arguments: IntegerArguments): ret.append((PG_RETURN, p.MOVE_PTR, (p.MOVE_PTR.PTR_CARRY, p.MOVE_PTR.TO_RIGHT))) return ret - def pg_lshift(self, env_observation: np.ndarray, arguments: IntegerArguments): + def pg_lshift(self, env_observation, arguments): ret = [] p = self.pg_set ret.append((p.MOVE_PTR, (p.MOVE_PTR.PTR_IN1, p.MOVE_PTR.TO_LEFT))) @@ -217,7 +218,7 @@ def pg_lshift(self, env_observation: np.ndarray, arguments: IntegerArguments): ret.append((PG_RETURN, p.MOVE_PTR, (p.MOVE_PTR.PTR_OUT, p.MOVE_PTR.TO_LEFT))) return ret - def pg_rshift(self, env_observation: np.ndarray, arguments: IntegerArguments): + def pg_rshift(self, env_observation, arguments): ret = [] p = self.pg_set ret.append((p.MOVE_PTR, (p.MOVE_PTR.PTR_IN1, p.MOVE_PTR.TO_RIGHT))) @@ -228,21 +229,21 @@ def pg_rshift(self, env_observation: np.ndarray, arguments: IntegerArguments): def create_char_map(): - char_map = dict((i+1, "%s" % i) for i in range(10)) + char_map = dict((i+1, "%s" % i) for i in xrange(10)) char_map[0] = ' ' return char_map def create_questions(num=100, max_number=10000): questions = [] - for in1 in range(10): - for in2 in range(10): + for in1 in xrange(10): + for in2 in xrange(10): questions.append(dict(in1=in1, in2=in2)) - for _ in range(100): + for _ in xrange(100): questions.append(dict(in1=int(random() * 100), in2=int(random() * 100))) - for _ in range(100): + for _ in xrange(100): questions.append(dict(in1=int(random() * 1000), in2=int(random() * 1000))) questions += [ @@ -255,7 +256,7 @@ def create_questions(num=100, max_number=10000): def create_random_questions(num=100, max_number=10000): questions = [] - for _ in range(num): + for _ in xrange(num): questions.append(dict(in1=int(random() * max_number), in2=int(random() * max_number))) return questions diff --git a/src/npi/add/model.py b/src/npi/add/model.py index 42a8e4c..7cb6dc3 100644 --- a/src/npi/add/model.py +++ b/src/npi/add/model.py @@ -1,5 +1,7 @@ #!/usr/bin/env python # coding: utf-8 +from __future__ import division +from __future__ import absolute_import import os from collections import Counter from copy import copy @@ -14,7 +16,8 @@ from keras.models import Sequential, model_from_yaml from keras.optimizers import Adam from keras.regularizers import l1, l2 -from keras.utils.visualize_util import plot +from keras.utils.visualize_util import model_to_dot + from npi.add.config import FIELD_ROW, FIELD_DEPTH, PROGRAM_VEC_SIZE, PROGRAM_KEY_VEC_SIZE, FIELD_WIDTH from npi.add.lib import AdditionProgramSet, AdditionEnv, run_npi, create_questions, AdditionTeacher, \ @@ -22,15 +25,21 @@ from npi.core import NPIStep, Program, IntegerArguments, StepOutput, RuntimeSystem, PG_RETURN, StepInOut, StepInput, \ to_one_hot_array from npi.terminal_core import TerminalNPIRunner +from itertools import izip __author__ = 'k_morishita' +def plot(in_model, to_file, **kwargs): + """ + A SVG-based version of the keras version + """ + return model_to_dot(in_model, **kwargs).write_svg(to_file) class AdditionNPIModel(NPIStep): model = None f_enc = None - def __init__(self, system: RuntimeSystem, model_path: str=None, program_set: AdditionProgramSet=None): + def __init__(self, system, model_path=None, program_set=None): self.system = system self.model_path = model_path self.program_set = program_set @@ -66,7 +75,7 @@ def build(self): f_lstm.add(RepeatVector(1)) f_lstm.add(LSTM(256, return_sequences=False, stateful=True, W_regularizer=l2(0.0000001))) f_lstm.add(Activation('relu', name='relu_lstm_2')) - # plot(f_lstm, to_file='f_lstm.png', show_shapes=True) + plot(f_lstm, to_file='f_lstm.svg', show_shapes=True) f_end = Sequential(name='f_end') f_end.add(f_lstm) @@ -78,10 +87,10 @@ def build(self): f_prog.add(Dense(PROGRAM_KEY_VEC_SIZE, activation="relu")) f_prog.add(Dense(PROGRAM_VEC_SIZE, W_regularizer=l2(0.0001))) f_prog.add(Activation('softmax', name='softmax_prog')) - # plot(f_prog, to_file='f_prog.png', show_shapes=True) + plot(f_prog, to_file='f_prog.svg', show_shapes=True) f_args = [] - for ai in range(1, IntegerArguments.max_arg_num+1): + for ai in xrange(1, IntegerArguments.max_arg_num+1): f_arg = Sequential(name='f_arg%s' % ai) f_arg.add(f_lstm) f_arg.add(Dense(IntegerArguments.depth, W_regularizer=l2(0.0001))) @@ -93,7 +102,7 @@ def build(self): [f_end.output, f_prog.output] + [fa.output for fa in f_args], name="npi") self.compile_model() - plot(self.model, to_file='model.png', show_shapes=True) + plot(self.model, to_file='model.svg', show_shapes=True) def reset(self): super(AdditionNPIModel, self).reset() @@ -149,17 +158,17 @@ def filter_question(condition_func): # print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) q_type = "training questions of a<100 and b<100" - print(q_type) + print q_type pr = 0.8 all_ok = self.fit_to_subset(filter_question(lambda a, b: a < 100 and b < 100), pass_rate=pr) - print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) + print "%s is pass_rate >= %s: %s" % (q_type, pr, all_ok) while True: if self.test_and_learn([10, 100, 1000]): break q_type = "training questions of ALL" - print(q_type) + print q_type q_num = 100 skip_correct = False @@ -168,10 +177,10 @@ def filter_question(condition_func): np.random.shuffle(questions) questions = questions[:q_num] all_ok = self.fit_to_subset(questions, pass_rate=pr, skip_correct=skip_correct) - print("%s is pass_rate >= %s: %s" % (q_type, pr, all_ok)) + print "%s is pass_rate >= %s: %s" % (q_type, pr, all_ok) def fit_to_subset(self, steps_list, pass_rate=1.0, skip_correct=False): - for i in range(10): + for i in xrange(10): all_ok = self.do_learn(steps_list, 100, pass_rate=pass_rate, skip_correct=skip_correct) if all_ok: return True @@ -179,10 +188,10 @@ def fit_to_subset(self, steps_list, pass_rate=1.0, skip_correct=False): def test_and_learn(self, num_questions): for num in num_questions: - print("test all type of %d questions" % num) + print "test all type of %d questions" % num cc, wc, wrong_questions = self.test_to_subset(create_random_questions(num)) acc_rate = cc/(cc+wc) - print("Accuracy %s(OK=%d, NG=%d)" % (acc_rate, cc, wc)) + print "Accuracy %s(OK=%d, NG=%d)" % (acc_rate, cc, wc) if wc > 0: self.fit_to_subset(wrong_questions, pass_rate=1.0, skip_correct=False) return False @@ -207,7 +216,7 @@ def test_to_subset(self, questions): @staticmethod def dict_to_str(d): - return str(tuple([(k, d[k]) for k in sorted(d)])) + return unicode(tuple([(k, d[k]) for k in sorted(d)])) def do_learn(self, steps_list, epoch, pass_rate=1.0, skip_correct=False): addition_env = AdditionEnv(FIELD_ROW, FIELD_WIDTH, FIELD_DEPTH) @@ -216,7 +225,7 @@ def do_learn(self, steps_list, epoch, pass_rate=1.0, skip_correct=False): correct_count = Counter() no_change_count = 0 last_loss = 1000 - for ep in range(1, epoch+1): + for ep in xrange(1, epoch+1): correct_new = wrong_new = 0 losses = [] ok_rate = [] @@ -228,7 +237,7 @@ def do_learn(self, steps_list, epoch, pass_rate=1.0, skip_correct=False): if correct_count[question_key] == 0: correct_new += 1 correct_count[question_key] += 1 - print("GOOD!: ep=%2d idx=%3d :%s CorrectCount=%s" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) + print "GOOD!: ep=%2d idx=%3d :%s CorrectCount=%s" % (ep, idx, self.dict_to_str(question), correct_count[question_key]) ok_rate.append(1) cc = correct_count[question_key] if skip_correct or int(math.sqrt(cc)) ** 2 != cc: @@ -236,7 +245,7 @@ def do_learn(self, steps_list, epoch, pass_rate=1.0, skip_correct=False): else: ok_rate.append(0) if correct_count[question_key] > 0: - print("Degraded: ep=%2d idx=%3d :%s CorrectCount=%s -> 0" % (ep, idx, self.dict_to_str(question), correct_count[question_key])) + print "Degraded: ep=%2d idx=%3d :%s CorrectCount=%s -> 0" % (ep, idx, self.dict_to_str(question), correct_count[question_key]) correct_count[question_key] = 0 wrong_new += 1 @@ -252,18 +261,17 @@ def do_learn(self, steps_list, epoch, pass_rate=1.0, skip_correct=False): self.reset() - for i, (x, y, w) in enumerate(zip(xs, ys, ws)): + for i, (x, y, w) in enumerate(izip(xs, ys, ws)): loss = self.model.train_on_batch(x, y, sample_weight=w) if not np.isfinite(loss): - print("Loss is not finite!, Last Input=%s" % ([i, (x, y, w)])) + print "Loss is not finite!, Last Input=%s" % ([i, (x, y, w)]) self.print_weights(last_weights, detail=True) raise RuntimeError("Loss is not finite!") losses.append(loss) last_weights = self.model.get_weights() if losses: cur_loss = np.average(losses) - print("ep=%2d: ok_rate=%.2f%% (+%s -%s): ave loss %s (%s samples)" % - (ep, np.average(ok_rate)*100, correct_new, wrong_new, cur_loss, len(steps_list))) + print "ep=%2d: ok_rate=%.2f%% (+%s -%s): ave loss %s (%s samples)" % (ep, np.average(ok_rate)*100, correct_new, wrong_new, cur_loss, len(steps_list)) # self.print_weights() if correct_new + wrong_new == 0: no_change_count += 1 @@ -271,21 +279,21 @@ def do_learn(self, steps_list, epoch, pass_rate=1.0, skip_correct=False): no_change_count = 0 if math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5: - print("math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5:") + print "math.fabs(1 - cur_loss/last_loss) < 0.001 and no_change_count > 5:" return False last_loss = cur_loss - print("=" * 80) + print "=" * 80 self.save() if np.average(ok_rate) >= pass_rate: return True return False def update_learning_rate(self, learning_rate, arg_weight=1.): - print("Re-Compile Model lr=%s aw=%s" % (learning_rate, arg_weight)) + print "Re-Compile Model lr=%s aw=%s" % (learning_rate, arg_weight) self.compile_model(learning_rate, arg_weight=arg_weight) def train_f_enc(self, steps_list, epoch=50): - print("training f_enc") + print "training f_enc" f_add0 = Sequential(name='f_add0') f_add0.add(self.f_enc) f_add0.add(Dense(FIELD_DEPTH)) @@ -299,7 +307,7 @@ def train_f_enc(self, steps_list, epoch=50): env_model = Model(self.f_enc.inputs, [f_add0.output, f_add1.output], name="env_model") env_model.compile(optimizer='adam', loss=['categorical_crossentropy']*2) - for ep in range(epoch): + for ep in xrange(epoch): losses = [] for idx, steps_dict in enumerate(steps_list): prev = None @@ -319,7 +327,7 @@ def train_f_enc(self, steps_list, epoch=50): y = [yy.reshape((self.batch_size, -1)) for yy in [y0, y1]] loss = env_model.train_on_batch(x, y) losses.append(loss) - print("ep %3d: loss=%s" % (ep, np.average(losses))) + print "ep %3d: loss=%s" % (ep, np.average(losses)) if np.average(losses) < 1e-06: break @@ -334,12 +342,12 @@ def question_test(self, addition_env, npi_runner, question): pass return False - def convert_input(self, p_in: StepInput): + def convert_input(self, p_in): x_pg = np.array((p_in.program.program_id,)) x = [xx.reshape((self.batch_size, -1)) for xx in (p_in.env, p_in.arguments.values, x_pg)] return x - def convert_output(self, p_out: StepOutput): + def convert_output(self, p_out): y = [np.array((p_out.r,))] weights = [[1.]] if p_out.program: @@ -359,7 +367,7 @@ def convert_output(self, p_out: StepOutput): weights = [np.array(w) for w in weights] return [yy.reshape((self.batch_size, -1)) for yy in y], weights - def step(self, env_observation: np.ndarray, pg: Program, arguments: IntegerArguments) -> StepOutput: + def step(self, env_observation, pg, arguments): x = self.convert_input(StepInput(env_observation, pg, arguments)) results = self.model.predict(x, batch_size=1) # if batch_size==1, returns single row @@ -379,10 +387,10 @@ def load_weights(self): def print_weights(self, weights=None, detail=False): weights = weights or self.model.get_weights() for w in weights: - print("w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w))) + print "w%s: sum(w)=%s, ave(w)=%s" % (w.shape, np.sum(w), np.average(w)) if detail: for w in weights: - print("%s: %s" % (w.shape, w)) + print "%s: %s" % (w.shape, w) @staticmethod def size_of_env_observation(): diff --git a/src/npi/add/test_model.py b/src/npi/add/test_model.py index c456c4c..5dd6c27 100644 --- a/src/npi/add/test_model.py +++ b/src/npi/add/test_model.py @@ -1,4 +1,6 @@ # coding: utf-8 +from __future__ import division +from __future__ import absolute_import import curses import os import pickle @@ -10,7 +12,7 @@ from npi.terminal_core import TerminalNPIRunner, Terminal -def main(stdscr, model_path: str, num: int, result_logger: ResultLogger): +def main(stdscr, model_path, num, result_logger): terminal = Terminal(stdscr, create_char_map()) terminal.init_window(FIELD_WIDTH, FIELD_ROW) program_set = AdditionProgramSet() @@ -43,4 +45,4 @@ def main(stdscr, model_path: str, num: int, result_logger: ResultLogger): num_data = int(sys.argv[2]) if len(sys.argv) > 2 else 1000 log_filename = sys.argv[3] if len(sys.argv) > 3 else 'result.log' cc, wc = curses.wrapper(main, model_path_, num_data, ResultLogger(log_filename)) - print("Accuracy %s(OK=%d, NG=%d)" % (cc/(cc+wc), cc, wc)) + print "Accuracy %s(OK=%d, NG=%d)" % (cc/(cc+wc), cc, wc) diff --git a/src/npi/add/training_model.py b/src/npi/add/training_model.py index 6fdf8cb..f89f2e5 100644 --- a/src/npi/add/training_model.py +++ b/src/npi/add/training_model.py @@ -1,4 +1,6 @@ # coding: utf-8 +from __future__ import with_statement +from __future__ import absolute_import import os import pickle @@ -7,9 +9,10 @@ from npi.add.model import AdditionNPIModel from npi.core import ResultLogger, RuntimeSystem from npi.terminal_core import TerminalNPIRunner, Terminal +from io import open -def main(filename: str, model_path: str): +def main(filename, model_path): system = RuntimeSystem() program_set = AdditionProgramSet() diff --git a/src/npi/core.py b/src/npi/core.py index 3d63a13..f06026b 100644 --- a/src/npi/core.py +++ b/src/npi/core.py @@ -1,9 +1,12 @@ # coding: utf-8 +from __future__ import with_statement +from __future__ import absolute_import import json from copy import copy import numpy as np +from io import open MAX_ARG_NUM = 3 ARG_DEPTH = 10 # 0~9 digit. one-hot. @@ -12,12 +15,12 @@ PG_RETURN = 1 -class IntegerArguments: +class IntegerArguments(object): depth = ARG_DEPTH max_arg_num = MAX_ARG_NUM size_of_arguments = depth * max_arg_num - def __init__(self, args: list=None, values: np.ndarray=None): + def __init__(self, args=None, values=None): if values is not None: self.values = values.reshape((self.max_arg_num, self.depth)) else: @@ -33,12 +36,12 @@ def copy(self): return obj def decode_all(self): - return [self.decode_at(i) for i in range(len(self.values))] + return [self.decode_at(i) for i in xrange(len(self.values))] - def decode_at(self, index: int) -> int: + def decode_at(self, index): return self.values[index].argmax() - def update_to(self, index: int, integer: int): + def update_to(self, index, integer): self.values[index] = 0 self.values[index, int(np.clip(integer, 0, self.depth-1))] = 1 @@ -46,7 +49,7 @@ def __str__(self): return "" % self.decode_all() -class Program: +class Program(object): output_to_env = False def __init__(self, name, *args): @@ -54,31 +57,31 @@ def __init__(self, name, *args): self.args = args self.program_id = None - def description_with_args(self, args: IntegerArguments) -> str: + def description_with_args(self, args): int_args = args.decode_all() - return "%s(%s)" % (self.name, ", ".join([str(x) for x in int_args])) + return "%s(%s)" % (self.name, ", ".join([unicode(x) for x in int_args])) def to_one_hot(self, size, dtype=np.float): ret = np.zeros((size,), dtype=dtype) ret[self.program_id] = 1 return ret - def do(self, env, args: IntegerArguments): + def do(self, env, args): raise NotImplementedError() def __str__(self): return "" % self.name -class StepInput: - def __init__(self, env: np.ndarray, program: Program, arguments: IntegerArguments): +class StepInput(object): + def __init__(self, env, program, arguments): self.env = env self.program = program self.arguments = arguments -class StepOutput: - def __init__(self, r: float, program: Program=None, arguments: IntegerArguments=None): +class StepOutput(object): + def __init__(self, r, program=None, arguments=None): self.r = r self.program = program self.arguments = arguments @@ -87,23 +90,24 @@ def __str__(self): return "" % (self.r, self.program, self.arguments) -class StepInOut: - def __init__(self, input: StepInput, output: StepOutput): +class StepInOut(object): + def __init__(self, input, output): self.input = input self.output = output -class ResultLogger: +class ResultLogger(object): def __init__(self, filename): self.filename = filename def write(self, obj): with open(self.filename, "a") as f: - json.dump(obj, f) - f.write("\n") + #json.dump(obj, f) + #f.write('\n') + pass -class NPIStep: +class NPIStep(object): def reset(self): pass @@ -113,11 +117,11 @@ def enter_function(self): def exit_function(self): pass - def step(self, env_observation: np.ndarray, pg: Program, arguments: IntegerArguments) -> StepOutput: + def step(self, env_observation, pg, arguments): raise NotImplementedError() -class RuntimeSystem: +class RuntimeSystem(object): def __init__(self, terminal=None): self.terminal = terminal @@ -125,7 +129,7 @@ def logging(self, message): if self.terminal: self.terminal.add_log(message) else: - print(message) + print message def to_one_hot_array(idx, size, dtype=np.int8): diff --git a/src/npi/terminal_core.py b/src/npi/terminal_core.py index 670fdbf..c5c22f9 100644 --- a/src/npi/terminal_core.py +++ b/src/npi/terminal_core.py @@ -1,5 +1,6 @@ #!/usr/bin/env python # coding: utf-8 +from __future__ import absolute_import import curses import numpy as np @@ -8,7 +9,7 @@ __author__ = 'k_morishita' -class Screen: +class Screen(object): data = None def __init__(self, height, width): @@ -32,7 +33,7 @@ def __getitem__(self, item): return self.data[item] -class Terminal: +class Terminal(object): W_TOP = 1 W_LEFT = 1 LOG_WINDOW_HEIGHT = 10 @@ -45,9 +46,9 @@ class Terminal: log_window = None def __init__(self, stdscr, char_map=None): - print(type(stdscr)) + print type(stdscr) self.stdscr = stdscr - self.char_map = char_map or dict((ch, chr(ch)) for ch in range(128)) + self.char_map = char_map or dict((ch, unichr(ch)) for ch in xrange(128)) self.log_list = [] def init_window(self, width, height): @@ -69,7 +70,7 @@ def wait_for_key(self): self.stdscr.getch() def update_main_screen(self, screen): - for y in range(screen.height): + for y in xrange(screen.height): line = "".join([self.char_map[ch] for ch in screen[y]]) self.ignore_error_add_str(self.main_window, y, 0, line) @@ -87,11 +88,11 @@ def update_info_screen(self, info_list): self.info_window.refresh() def add_log(self, line): - self.log_list.insert(0, str(line)[:self.LOG_WINDOW_WIDTH]) + self.log_list.insert(0, unicode(line)[:self.LOG_WINDOW_WIDTH]) self.log_list = self.log_list[:self.LOG_WINDOW_HEIGHT-1] self.log_window.clear() for i, line in enumerate(self.log_list): - line = str(line) + " " * (self.LOG_WINDOW_WIDTH - len(str(line))) + line = unicode(line) + " " * (self.LOG_WINDOW_WIDTH - len(unicode(line))) self.log_window.addstr(i, 0, line) self.log_window.refresh() @@ -111,8 +112,8 @@ def show_env_to_terminal(terminal, env): terminal.refresh_main_window() -class TerminalNPIRunner: - def __init__(self, terminal: Terminal, model: NPIStep=None, recording=True, max_depth=10, max_step=1000): +class TerminalNPIRunner(object): + def __init__(self, terminal, model=None, recording=True, max_depth=10, max_step=1000): self.terminal = terminal self.model = model self.steps = 0 @@ -132,7 +133,7 @@ def display_env(self, env, force=False): if (self.verbose or force) and self.terminal: show_env_to_terminal(self.terminal, env) - def display_information(self, program: Program, arguments: IntegerArguments, result: StepOutput, depth: int): + def display_information(self, program, arguments, result, depth): if self.verbose and self.terminal: information = [ "Step %2d Depth: %2d" % (self.steps, depth), @@ -144,7 +145,7 @@ def display_information(self, program: Program, arguments: IntegerArguments, res self.terminal.update_info_screen(information) self.wait() - def npi_program_interface(self, env, program: Program, arguments: IntegerArguments, depth=0): + def npi_program_interface(self, env, program, arguments, depth=0): if self.max_depth < depth or self.max_step < self.steps: raise StopIteration()