From 95aac48daa1dec35264030ce9ecfdef03c74cd5c Mon Sep 17 00:00:00 2001 From: Jacob Edelman Date: Sun, 31 May 2015 11:07:36 -0400 Subject: [PATCH 01/12] Changed the CharSplitLMMinibatchLoader to words Changed the CharSplitLMMinibatchLoader to words where words are continuous strings of letters or any non-letter symbol. --- ...der.lua => WordSplitLMMinibatchLoader.lua} | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) rename util/{CharSplitLMMinibatchLoader.lua => WordSplitLMMinibatchLoader.lua} (86%) diff --git a/util/CharSplitLMMinibatchLoader.lua b/util/WordSplitLMMinibatchLoader.lua similarity index 86% rename from util/CharSplitLMMinibatchLoader.lua rename to util/WordSplitLMMinibatchLoader.lua index 1fafe398..e382fe92 100644 --- a/util/CharSplitLMMinibatchLoader.lua +++ b/util/WordSplitLMMinibatchLoader.lua @@ -95,9 +95,18 @@ function CharSplitLMMinibatchLoader.text_to_tensor(in_textfile, out_vocabfile, o print('creating vocabulary mapping...') -- record all characters to a set local unordered = {} - for char in rawdata:gmatch'.' do - if not unordered[char] then unordered[char] = true end + local length = 0 + for char1,char2 in rawdata:gmatch'(%a*)(.?)' do + if char1 ~= "" then + if not unordered[char1] then unordered[cha1] = true end + length = length + 1 + end + if char2 ~= "" then + if not unordered[char2] then unordered[char2] = true end + length = length + 1 + end end + -- sort into a table (i.e. keys become 1..N) local ordered = {} for char in pairs(unordered) do ordered[#ordered + 1] = char end @@ -109,9 +118,17 @@ function CharSplitLMMinibatchLoader.text_to_tensor(in_textfile, out_vocabfile, o end -- construct a tensor with all the data print('putting data into tensor...') - local data = torch.ByteTensor(#rawdata) -- store it into 1D first, then rearrange - for i=1, #rawdata do - data[i] = vocab_mapping[rawdata:sub(i, i)] -- lua has no string indexing using [] + local data = torch.ByteTensor(length) -- store it into 1D first, then rearrange + local i = 0 + for char1,char2 in rawdata:gmatch'(%a*)(.?)' do + if char1 ~= "" then + data[i] = char1 + i = i + 1 + end + if char2 ~= "" then + data[i] = char2 + i= i + 1 + end end -- save output preprocessed files From 44f4e4f520c3970d6570624f2667011ebc4e49d2 Mon Sep 17 00:00:00 2001 From: Jacob Edelman Date: Sun, 31 May 2015 11:12:09 -0400 Subject: [PATCH 02/12] Update train.lua --- train.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.lua b/train.lua index 3136acb6..1d013d3d 100644 --- a/train.lua +++ b/train.lua @@ -21,7 +21,7 @@ require 'lfs' require 'util.OneHot' require 'util.misc' -local CharSplitLMMinibatchLoader = require 'util.CharSplitLMMinibatchLoader' +local CharSplitLMMinibatchLoader = require 'util.WordSplitLMMinibatchLoader' local model_utils = require 'util.model_utils' local LSTM = require 'model.LSTM' From d6e05b95f809a81a827d9ec3eca5d658e18f20d4 Mon Sep 17 00:00:00 2001 From: Jacob Edelman Date: Sun, 31 May 2015 11:14:05 -0400 Subject: [PATCH 03/12] Update WordSplitLMMinibatchLoader.lua --- util/WordSplitLMMinibatchLoader.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/WordSplitLMMinibatchLoader.lua b/util/WordSplitLMMinibatchLoader.lua index e382fe92..7d9b30ca 100644 --- a/util/WordSplitLMMinibatchLoader.lua +++ b/util/WordSplitLMMinibatchLoader.lua @@ -97,11 +97,11 @@ function CharSplitLMMinibatchLoader.text_to_tensor(in_textfile, out_vocabfile, o local unordered = {} local length = 0 for char1,char2 in rawdata:gmatch'(%a*)(.?)' do - if char1 ~= "" then + if char1 ~= nil then if not unordered[char1] then unordered[cha1] = true end length = length + 1 end - if char2 ~= "" then + if char2 ~= nil then if not unordered[char2] then unordered[char2] = true end length = length + 1 end From 2f55b6e46aa34a2f50b07594fda983340f34e5cb Mon Sep 17 00:00:00 2001 From: Jacob Edelman Date: Sun, 31 May 2015 11:14:46 -0400 Subject: [PATCH 04/12] Update WordSplitLMMinibatchLoader.lua --- util/WordSplitLMMinibatchLoader.lua | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/util/WordSplitLMMinibatchLoader.lua b/util/WordSplitLMMinibatchLoader.lua index 7d9b30ca..fda85be0 100644 --- a/util/WordSplitLMMinibatchLoader.lua +++ b/util/WordSplitLMMinibatchLoader.lua @@ -97,11 +97,13 @@ function CharSplitLMMinibatchLoader.text_to_tensor(in_textfile, out_vocabfile, o local unordered = {} local length = 0 for char1,char2 in rawdata:gmatch'(%a*)(.?)' do - if char1 ~= nil then + if char1 ~= "" then + print(char1) if not unordered[char1] then unordered[cha1] = true end length = length + 1 end - if char2 ~= nil then + if char2 ~= "" then + print(char2) if not unordered[char2] then unordered[char2] = true end length = length + 1 end From b391315dec2cc7a5ec9db7057efca86b9a616445 Mon Sep 17 00:00:00 2001 From: Jacob Edelman Date: Sun, 31 May 2015 11:15:55 -0400 Subject: [PATCH 05/12] Update WordSplitLMMinibatchLoader.lua --- util/WordSplitLMMinibatchLoader.lua | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/util/WordSplitLMMinibatchLoader.lua b/util/WordSplitLMMinibatchLoader.lua index fda85be0..927a5143 100644 --- a/util/WordSplitLMMinibatchLoader.lua +++ b/util/WordSplitLMMinibatchLoader.lua @@ -98,12 +98,10 @@ function CharSplitLMMinibatchLoader.text_to_tensor(in_textfile, out_vocabfile, o local length = 0 for char1,char2 in rawdata:gmatch'(%a*)(.?)' do if char1 ~= "" then - print(char1) - if not unordered[char1] then unordered[cha1] = true end + if not unordered[char1] then unordered[char1] = true end length = length + 1 end if char2 ~= "" then - print(char2) if not unordered[char2] then unordered[char2] = true end length = length + 1 end From 30d02206745e81c9e828aa4e0222345fd58dab77 Mon Sep 17 00:00:00 2001 From: JacobEdelman Date: Mon, 1 Jun 2015 08:19:41 -0400 Subject: [PATCH 06/12] Made work but has ram overload. --- util/WordSplitLMMinibatchLoader.lua | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/util/WordSplitLMMinibatchLoader.lua b/util/WordSplitLMMinibatchLoader.lua index 927a5143..4d3097a7 100644 --- a/util/WordSplitLMMinibatchLoader.lua +++ b/util/WordSplitLMMinibatchLoader.lua @@ -29,14 +29,14 @@ function CharSplitLMMinibatchLoader.create(data_dir, batch_size, seq_length, spl local len = data:size(1) if len % (batch_size * seq_length) ~= 0 then print('cutting off end of data so that the batches/sequences divide evenly') - data = data:sub(1, batch_size * seq_length + data = data:sub(1, batch_size * seq_length * math.floor(len / (batch_size * seq_length))) end -- count vocab self.vocab_size = 0 - for _ in pairs(self.vocab_mapping) do - self.vocab_size = self.vocab_size + 1 + for _ in pairs(self.vocab_mapping) do + self.vocab_size = self.vocab_size + 1 end -- self.batches is a table of tensors @@ -119,14 +119,14 @@ function CharSplitLMMinibatchLoader.text_to_tensor(in_textfile, out_vocabfile, o -- construct a tensor with all the data print('putting data into tensor...') local data = torch.ByteTensor(length) -- store it into 1D first, then rearrange - local i = 0 + local i = 1 for char1,char2 in rawdata:gmatch'(%a*)(.?)' do if char1 ~= "" then - data[i] = char1 + data[i] = vocab_mapping[char1] i = i + 1 end if char2 ~= "" then - data[i] = char2 + data[i] = vocab_mapping[char2] i= i + 1 end end @@ -139,4 +139,3 @@ function CharSplitLMMinibatchLoader.text_to_tensor(in_textfile, out_vocabfile, o end return CharSplitLMMinibatchLoader - From 587674a55370d040446226c33f7aaf6ac8ecf3fc Mon Sep 17 00:00:00 2001 From: JacobEdelman Date: Mon, 1 Jun 2015 13:11:34 -0400 Subject: [PATCH 07/12] Made work fully but has ram overload. --- Readme.md | 4 +- train.lua | 34 +++++--- util/CharSplitLMMinibatchLoader.lua | 124 ++++++++++++++++++++++++++++ util/WordSplitLMMinibatchLoader.lua | 3 +- util/model_utils.lua | 2 +- 5 files changed, 151 insertions(+), 16 deletions(-) create mode 100644 util/CharSplitLMMinibatchLoader.lua diff --git a/Readme.md b/Readme.md index 2c6a3d2b..272c02bd 100644 --- a/Readme.md +++ b/Readme.md @@ -1,7 +1,7 @@ # char-rnn -This code implements **multi-layer Recurrent Neural Network** (RNN, LSTM, and GRU) for training/sampling from character-level language models. The input is a single text file and the model learns to predict the next character in the sequence. +This code implements **multi-layer Recurrent Neural Network** (RNN, LSTM, and GRU) for training/sampling from character-level language models. The input is a single text file and the model learns to predict the next character in the sequence. The context of this code base is described in detail in my [blog post](http://karpathy.github.io/2015/05/21/rnn-effectiveness/). @@ -16,7 +16,7 @@ This code is written in Lua and requires [Torch](http://torch.ch/). Additionally, you need to install the `nngraph` and `optim` packages using [LuaRocks](https://luarocks.org/) ```bash -$ luarocks install nngraph +$ luarocks install nngraph $ luarocks install optim ``` diff --git a/train.lua b/train.lua index 1d013d3d..e3e93f8b 100644 --- a/train.lua +++ b/train.lua @@ -3,11 +3,11 @@ This file trains a character-level multi-layer RNN on text data -Code is based on implementation in +Code is based on implementation in https://github.com/oxford-cs-ml-2015/practical6 but modified to have multi-layer support, GPU support, as well as many other common model/optimization bells and whistles. -The practical6 code is in turn based on +The practical6 code is in turn based on https://github.com/wojciechz/learning_to_execute which is turn based on other stuff in Torch, etc... (long lineage) @@ -21,7 +21,7 @@ require 'lfs' require 'util.OneHot' require 'util.misc' -local CharSplitLMMinibatchLoader = require 'util.WordSplitLMMinibatchLoader' + local model_utils = require 'util.model_utils' local LSTM = require 'model.LSTM' @@ -35,6 +35,7 @@ cmd:option('-data_dir','data/tinyshakespeare','data directory. Should contain th -- model params cmd:option('-rnn_size', 100, 'size of LSTM internal state') cmd:option('-num_layers', 2, 'number of layers in the LSTM') +cmd:option('-words', false, 'whether the model operates on words (as opposed to chars)') cmd:option('-model', 'lstm', 'for now only lstm is supported. keep fixed') -- optimization cmd:option('-learning_rate',2e-3,'learning rate') @@ -59,10 +60,17 @@ cmd:text() -- parse input params opt = cmd:parse(arg) + +if opt.words then + local SplitLMMinibatchLoader = require 'util.CharSplitLMMinibatchLoader' +else + local SplitLMMinibatchLoader = require 'util.WordSplitLMMinibatchLoader' +end + torch.manualSeed(opt.seed) -- train / val / test split for data, in fractions local test_frac = math.max(0, 1 - opt.train_frac - opt.val_frac) -local split_sizes = {opt.train_frac, opt.val_frac, test_frac} +local split_sizes = {opt.train_frac, opt.val_frac, test_frac} if opt.gpuid >= 0 then print('using CUDA on GPU ' .. opt.gpuid .. '...') @@ -71,7 +79,7 @@ if opt.gpuid >= 0 then cutorch.setDevice(opt.gpuid + 1) -- note +1 to make it 0 indexed! sigh lua end -- create the data loader class -local loader = CharSplitLMMinibatchLoader.create(opt.data_dir, opt.batch_size, opt.seq_length, split_sizes) +local loader = SplitLMMinibatchLoader.create(opt.data_dir, opt.batch_size, opt.seq_length, split_sizes) local vocab_size = loader.vocab_size -- the number of distinct characters print('vocab size: ' .. vocab_size) -- make sure output directory exists @@ -121,7 +129,7 @@ function eval_split(split_index, max_batches) loader:reset_batch_pointer(split_index) -- move batch iteration pointer for this split to front local loss = 0 local rnn_state = {[0] = init_state} - + for i = 1,n do -- iterate over batches in the split -- fetch a batch local x, y = loader:next_batch(split_index) @@ -193,10 +201,10 @@ function feval(x) local dlst = clones.rnn[t]:backward({embeddings[t], unpack(rnn_state[t-1])}, drnn_statet_passin) drnn_state[t-1] = {} for k,v in pairs(dlst) do - if k == 1 then + if k == 1 then dembeddings[t] = v else - -- note we do k-1 because first item is dembeddings, and then follow the + -- note we do k-1 because first item is dembeddings, and then follow the -- derivatives of the state, starting at index 2. I know... drnn_state[t-1][k-1] = v end @@ -251,9 +259,13 @@ for i = 1, iterations do end if i % opt.print_every == 0 then - print(string.format("%d/%d (epoch %.3f), train_bpc = %6.8f, grad/param norm = %6.4e, time/batch = %.2fs", i, iterations, epoch, train_bpc, grad_params:norm() / params:norm(), time)) + if op.words then + print(string.format("%d/%d (epoch %.3f), train_bpc = %6.8f, grad/param norm = %6.4e, time/batch = %.2fs", i, iterations, epoch, train_bpc, grad_params:norm() / params:norm(), time)) + else + print(string.format("%d/%d (epoch %.3f), train_bpw = %6.8f, grad/param norm = %6.4e, time/batch = %.2fs", i, iterations, epoch, train_bpc, grad_params:norm() / params:norm(), time)) + end end - + if i % 10 == 0 then collectgarbage() end -- handle early stopping if things are going really bad @@ -263,5 +275,3 @@ for i = 1, iterations do break -- halt end end - - diff --git a/util/CharSplitLMMinibatchLoader.lua b/util/CharSplitLMMinibatchLoader.lua new file mode 100644 index 00000000..3bc2795f --- /dev/null +++ b/util/CharSplitLMMinibatchLoader.lua @@ -0,0 +1,124 @@ + +-- Modified from https://github.com/oxford-cs-ml-2015/practical6 +-- the modification included support for train/val/test splits + +local CharSplitLMMinibatchLoader = {} +CharSplitLMMinibatchLoader.__index = CharSplitLMMinibatchLoader + +function CharSplitLMMinibatchLoader.create(data_dir, batch_size, seq_length, split_fractions) + -- split_fractions is e.g. {0.9, 0.05, 0.05} + + local self = {} + setmetatable(self, CharSplitLMMinibatchLoader) + + local input_file = path.join(data_dir, 'input.txt') + local vocab_file = path.join(data_dir, 'vocab.t7') + local tensor_file = path.join(data_dir, 'data.t7') + + -- construct a tensor with all the data + if not (path.exists(vocab_file) or path.exists(tensor_file)) then + print('one-time setup: preprocessing input text file ' .. input_file .. '...') + CharSplitLMMinibatchLoader.text_to_tensor(input_file, vocab_file, tensor_file) + end + + print('loading data files...') + local data = torch.load(tensor_file) + self.vocab_mapping = torch.load(vocab_file) + + -- cut off the end so that it divides evenly + local len = data:size(1) + if len % (batch_size * seq_length) ~= 0 then + print('cutting off end of data so that the batches/sequences divide evenly') + data = data:sub(1, batch_size * seq_length + * math.floor(len / (batch_size * seq_length))) + end + + -- count vocab + self.vocab_size = 0 + for _ in pairs(self.vocab_mapping) do + self.vocab_size = self.vocab_size + 1 + end + + -- self.batches is a table of tensors + print('reshaping tensor...') + self.batch_size = batch_size + self.seq_length = seq_length + + local ydata = data:clone() + ydata:sub(1,-2):copy(data:sub(2,-1)) + ydata[-1] = data[1] + self.x_batches = data:view(batch_size, -1):split(seq_length, 2) -- #rows = #batches + self.nbatches = #self.x_batches + self.y_batches = ydata:view(batch_size, -1):split(seq_length, 2) -- #rows = #batches + assert(#self.x_batches == #self.y_batches) + + self.ntrain = math.floor(self.nbatches * split_fractions[1]) + self.nval = math.floor(self.nbatches * split_fractions[2]) + self.ntest = self.nbatches - self.nval - self.ntrain -- the rest goes to test (to ensure this adds up exactly) + + self.split_sizes = {self.ntrain, self.nval, self.ntest} + self.batch_ix = {0,0,0} + + print(string.format('data load done. Number of batches in train: %d, val: %d, test: %d', self.ntrain, self.nval, self.ntest)) + collectgarbage() + return self +end + +function CharSplitLMMinibatchLoader:reset_batch_pointer(split_index, batch_index) + batch_index = batch_index or 0 + self.batch_ix[split_index] = batch_index +end + +function CharSplitLMMinibatchLoader:next_batch(split_index) + -- split_index is integer: 1 = train, 2 = val, 3 = test + self.batch_ix[split_index] = self.batch_ix[split_index] + 1 + if self.batch_ix[split_index] > self.split_sizes[split_index] then + self.batch_ix[split_index] = 1 -- cycle around to beginning + end + -- pull out the correct next batch + local ix = self.batch_ix[split_index] + if split_index == 2 then ix = ix + self.ntrain end -- offset by train set size + if split_index == 3 then ix = ix + self.ntrain + self.nval end -- offset by train + test + return self.x_batches[ix], self.y_batches[ix] +end + +-- *** STATIC method *** +function CharSplitLMMinibatchLoader.text_to_tensor(in_textfile, out_vocabfile, out_tensorfile) + local timer = torch.Timer() + + print('loading text file...') + local f = torch.DiskFile(in_textfile) + local rawdata = f:readString('*a') -- NOTE: this reads the whole file at once + f:close() + + -- create vocabulary if it doesn't exist yet + print('creating vocabulary mapping...') + -- record all characters to a set + local unordered = {} + for char in rawdata:gmatch'.' do + if not unordered[char] then unordered[char] = true end + end + -- sort into a table (i.e. keys become 1..N) + local ordered = {} + for char in pairs(unordered) do ordered[#ordered + 1] = char end + table.sort(ordered) + -- invert `ordered` to create the char->int mapping + local vocab_mapping = {} + for i, char in ipairs(ordered) do + vocab_mapping[char] = i + end + -- construct a tensor with all the data + print('putting data into tensor...') + local data = torch.ByteTensor(#rawdata) -- store it into 1D first, then rearrange + for i=1, #rawdata do + data[i] = vocab_mapping[rawdata:sub(i, i)] -- lua has no string indexing using [] + end + + -- save output preprocessed files + print('saving ' .. out_vocabfile) + torch.save(out_vocabfile, vocab_mapping) + print('saving ' .. out_tensorfile) + torch.save(out_tensorfile, data) +end + +return CharSplitLMMinibatchLoader diff --git a/util/WordSplitLMMinibatchLoader.lua b/util/WordSplitLMMinibatchLoader.lua index 4d3097a7..9701f5f0 100644 --- a/util/WordSplitLMMinibatchLoader.lua +++ b/util/WordSplitLMMinibatchLoader.lua @@ -1,6 +1,7 @@ -- Modified from https://github.com/oxford-cs-ml-2015/practical6 -- the modification included support for train/val/test splits +-- Further modified from CharSplitLMMiniBatchLoader to seperate by word by Jacob Edelman local CharSplitLMMinibatchLoader = {} CharSplitLMMinibatchLoader.__index = CharSplitLMMinibatchLoader @@ -118,7 +119,7 @@ function CharSplitLMMinibatchLoader.text_to_tensor(in_textfile, out_vocabfile, o end -- construct a tensor with all the data print('putting data into tensor...') - local data = torch.ByteTensor(length) -- store it into 1D first, then rearrange + local data = torch.Tensor(length) -- store it into 1D first, then rearrange local i = 1 for char1,char2 in rawdata:gmatch'(%a*)(.?)' do if char1 ~= "" then diff --git a/util/model_utils.lua b/util/model_utils.lua index 2edbed16..99422630 100644 --- a/util/model_utils.lua +++ b/util/model_utils.lua @@ -1,7 +1,7 @@ -- adapted from https://github.com/wojciechz/learning_to_execute -- utilities for combining/flattening parameters in a model --- the code in this script is more general than it needs to be, which is +-- the code in this script is more general than it needs to be, which is -- why it is kind of a large require 'torch' From bf7b56acd4a8b6a30c7961a44bb968337ac4df14 Mon Sep 17 00:00:00 2001 From: JacobEdelman Date: Mon, 1 Jun 2015 13:15:15 -0400 Subject: [PATCH 08/12] Fix commands. --- train.lua | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/train.lua b/train.lua index e3e93f8b..461d6ab0 100644 --- a/train.lua +++ b/train.lua @@ -60,11 +60,11 @@ cmd:text() -- parse input params opt = cmd:parse(arg) - +local SplitLMMinibatchLoader if opt.words then - local SplitLMMinibatchLoader = require 'util.CharSplitLMMinibatchLoader' + SplitLMMinibatchLoader = require 'util.WordSplitLMMinibatchLoader' else - local SplitLMMinibatchLoader = require 'util.WordSplitLMMinibatchLoader' + SplitLMMinibatchLoader = require 'util.CharSplitLMMinibatchLoader' end torch.manualSeed(opt.seed) @@ -259,10 +259,10 @@ for i = 1, iterations do end if i % opt.print_every == 0 then - if op.words then - print(string.format("%d/%d (epoch %.3f), train_bpc = %6.8f, grad/param norm = %6.4e, time/batch = %.2fs", i, iterations, epoch, train_bpc, grad_params:norm() / params:norm(), time)) - else + if opt.words then print(string.format("%d/%d (epoch %.3f), train_bpw = %6.8f, grad/param norm = %6.4e, time/batch = %.2fs", i, iterations, epoch, train_bpc, grad_params:norm() / params:norm(), time)) + else + print(string.format("%d/%d (epoch %.3f), train_bpc = %6.8f, grad/param norm = %6.4e, time/batch = %.2fs", i, iterations, epoch, train_bpc, grad_params:norm() / params:norm(), time)) end end From 477b19a1cbb0b130cd1a12f32d3d50803a731403 Mon Sep 17 00:00:00 2001 From: JacobEdelman Date: Wed, 3 Jun 2015 09:39:59 -0400 Subject: [PATCH 09/12] Tried making more RAM friendly, is now broken. --- train.lua | 24 ++++++++++++++++---- util/Embedding.lua | 53 ++++++++++++++++++++++++++++++++++++++++++++ util/model_utils.lua | 1 - 3 files changed, 73 insertions(+), 5 deletions(-) create mode 100644 util/Embedding.lua diff --git a/train.lua b/train.lua index 8b09e98c..de2cabdf 100644 --- a/train.lua +++ b/train.lua @@ -20,6 +20,7 @@ require 'optim' require 'lfs' require 'util.OneHot' +require 'util.Embedding' require 'util.misc' local model_utils = require 'util.model_utils' @@ -87,9 +88,19 @@ if not path.exists(opt.checkpoint_dir) then lfs.mkdir(opt.checkpoint_dir) end -- define the model: prototypes for one timestep, then clone them in time protos = {} -protos.embed = OneHot(vocab_size) +local embeded_size = 100 +local input_size, embeded_size +if opt.words then + print('using an embedding transform for input...') + embeded_size = 100 + protos.embed = Embedding(vocab_size, embeded_size) +else + print('using one-hot for input...') + embeded_size = vocab_size + protos.embed = OneHot(vocab_size) +end print('creating an LSTM with ' .. opt.num_layers .. ' layers') -protos.rnn = LSTM.lstm(vocab_size, opt.rnn_size, opt.num_layers, opt.dropout) +protos.rnn = LSTM.lstm(embeded_size, opt.rnn_size, opt.num_layers, opt.dropout) -- the initial state of the cell/hidden states init_state = {} for L=1,opt.num_layers do @@ -100,7 +111,7 @@ for L=1,opt.num_layers do end state_predict_index = #init_state -- index of blob to make prediction from -- classifier on top -protos.softmax = nn.Sequential():add(nn.Linear(opt.rnn_size, vocab_size)):add(nn.LogSoftMax()) +protos.softmax = nn.Sequential():add(nn.Linear(opt.rnn_size, embeded_size)):add(nn.LogSoftMax()) -- training criterion (negative log likelihood) protos.criterion = nn.ClassNLLCriterion() @@ -182,7 +193,10 @@ function feval(x) rnn_state[t] = clones.rnn[t]:forward{embeddings[t], unpack(rnn_state[t-1])} -- the following line is needed because nngraph tries to be clever if type(rnn_state[t]) ~= 'table' then rnn_state[t] = {rnn_state[t]} end + predictions[t] = clones.softmax[t]:forward(rnn_state[t][state_predict_index]) + + -- predictions should be 200 me thinks loss = loss + clones.criterion[t]:forward(predictions[t], y[{{}, t}]) end loss = loss / opt.seq_length @@ -227,16 +241,18 @@ local optim_state = {learningRate = opt.learning_rate, alpha = opt.decay_rate} local iterations = opt.max_epochs * loader.ntrain local iterations_per_epoch = loader.ntrain local loss0 = nil + for i = 1, iterations do + local epoch = i / loader.ntrain local timer = torch.Timer() + local _, loss = optim.rmsprop(feval, params, optim_state) local time = timer:time().real local train_loss = loss[1] -- the loss is inside a list, pop it train_losses[i] = train_loss - -- every now and then or on last iteration if i % opt.eval_val_every == 0 or i == iterations then -- evaluate loss on validation data diff --git a/util/Embedding.lua b/util/Embedding.lua new file mode 100644 index 00000000..a3c96663 --- /dev/null +++ b/util/Embedding.lua @@ -0,0 +1,53 @@ +--[[ + Copyright 2014 Google Inc. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +]]-- + +local Embedding, parent = torch.class('Embedding', 'nn.Module') + +function Embedding:__init(inputSize, outputSize) + parent.__init(self) + self.outputSize = outputSize + self.weight = torch.Tensor(inputSize, outputSize) + self.gradWeight = torch.Tensor(inputSize, outputSize) +end + +function Embedding:updateOutput(input) + self.output:resize(input:size(1), self.outputSize) + for i = 1, input:size(1) do + self.output[i]:copy(self.weight[input[i]]) + end + return self.output +end + +function Embedding:updateGradInput(input, gradOutput) + if self.gradInput then + self.gradInput:resize(input:size()) + return self.gradInput + end +end + +function Embedding:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + if scale == 0 then + self.gradWeight:zero() + end + for i = 1, input:size(1) do + local word = input[i] + self.gradWeight[word]:add(gradOutput[i]) + end +end + +-- we do not need to accumulate parameters when sharing +Embedding.sharedAccUpdateGradParameters = Embedding.accUpdateGradParameters diff --git a/util/model_utils.lua b/util/model_utils.lua index 99422630..923bdd28 100644 --- a/util/model_utils.lua +++ b/util/model_utils.lua @@ -120,7 +120,6 @@ function model_utils.clone_many_times(net, T) params = {} end end - local paramsNoGrad if net.parametersNoGrad then paramsNoGrad = net:parametersNoGrad() From e4bf6514236a65de9d2643ba6c07fb1381ba8695 Mon Sep 17 00:00:00 2001 From: JacobEdelman Date: Wed, 3 Jun 2015 17:07:26 -0400 Subject: [PATCH 10/12] Fixed and updated to match size of rnn to encoding size. --- model/LSTM.lua | 23 ++++++++++++++--------- train.lua | 11 +++++++++++ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/model/LSTM.lua b/model/LSTM.lua index aa9dd682..92865ede 100644 --- a/model/LSTM.lua +++ b/model/LSTM.lua @@ -1,7 +1,7 @@ local LSTM = {} -function LSTM.lstm(input_size, rnn_size, n, dropout) - dropout = dropout or 0 +function LSTM.lstm(input_size, rnn_size, n, dropout, words) + dropout = dropout or 0 -- there will be 2*n+1 inputs local inputs = {} @@ -18,11 +18,17 @@ function LSTM.lstm(input_size, rnn_size, n, dropout) local prev_h = inputs[L*2+1] local prev_c = inputs[L*2] -- the input to this layer - if L == 1 then - x = OneHot(input_size)(inputs[1]) - input_size_L = input_size - else - x = outputs[(L-1)*2] + if L == 1 then + if not words then + x = OneHot(input_size)(inputs[1]) + input_size_L = input_size + else + x = Embedding(input_size, rnn_size)(inputs[1]) + input_size_L = rnn_size + end + + else + x = outputs[(L-1)*2] input_size_L = rnn_size end -- evaluate the input sums at once for efficiency @@ -47,7 +53,7 @@ function LSTM.lstm(input_size, rnn_size, n, dropout) local next_h = nn.CMulTable()({out_gate, nn.Tanh()(next_c)}) -- add dropout to output, if desired if dropout > 0 then next_h = nn.Dropout(dropout)(next_h) end - + table.insert(outputs, next_c) table.insert(outputs, next_h) end @@ -61,4 +67,3 @@ function LSTM.lstm(input_size, rnn_size, n, dropout) end return LSTM - diff --git a/train.lua b/train.lua index 44f7ca8c..0d4a9960 100644 --- a/train.lua +++ b/train.lua @@ -99,8 +99,16 @@ protos = {} -- embeded_size = vocab_size -- protos.embed = OneHot(vocab_size) -- end +if not opt.words then + print('using one-hot for input...') + protos.embed = OneHot(vocab_size) +else + print('using an embedding transform of size', opt.rnn_size) + protos.embed = Embedding(vocab_size, opt.rnn_size) +end print('creating an LSTM with ' .. opt.num_layers .. ' layers') protos.rnn = LSTM.lstm(vocab_size, opt.rnn_size, opt.num_layers, opt.dropout, opt.words) + -- the initial state of the cell/hidden states init_state = {} for L=1,opt.num_layers do @@ -155,9 +163,11 @@ function eval_split(split_index, max_batches) clones.rnn[t]:evaluate() -- for dropout proper functioning local lst = clones.rnn[t]:forward{x[{{}, t}], unpack(rnn_state[t-1])} rnn_state[t] = {} + for i=1,#init_state do table.insert(rnn_state[t], lst[i]) end prediction = lst[#lst] loss = loss + clones.criterion[t]:forward(prediction, y[{{}, t}]) + end -- carry over lstm state rnn_state[0] = rnn_state[#rnn_state] @@ -169,6 +179,7 @@ function eval_split(split_index, max_batches) end -- do fwd/bwd and return loss, grad_params + local init_state_global = clone_list(init_state) function feval(x) if x ~= params then From 53ae17592e608d2f27959856cbab399905c41a10 Mon Sep 17 00:00:00 2001 From: JacobEdelman Date: Wed, 3 Jun 2015 18:43:13 -0400 Subject: [PATCH 11/12] Made sampling work by adding in Embedding require. --- sample.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sample.lua b/sample.lua index 1cb12866..fb3978c3 100644 --- a/sample.lua +++ b/sample.lua @@ -3,7 +3,7 @@ This file samples characters from a trained model -Code is based on implementation in +Code is based on implementation in https://github.com/oxford-cs-ml-2015/practical6 ]]-- @@ -14,6 +14,7 @@ require 'nngraph' require 'optim' require 'lfs' +require 'util.Embedding' require 'util.OneHot' require 'util.misc' @@ -111,4 +112,3 @@ for i=1, opt.length do io.write(ivocab[prev_char[1]]) end io.write('\n') io.flush() - From 6a3aa3b3b4e21695063afc624b5195793eacd7c5 Mon Sep 17 00:00:00 2001 From: JacobEdelman Date: Wed, 3 Jun 2015 20:42:13 -0400 Subject: [PATCH 12/12] Reformatting some printing. --- train.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train.lua b/train.lua index 0d4a9960..d07c96db 100644 --- a/train.lua +++ b/train.lua @@ -103,7 +103,7 @@ if not opt.words then print('using one-hot for input...') protos.embed = OneHot(vocab_size) else - print('using an embedding transform of size', opt.rnn_size) + print('using an embedding transform of size ' .. opt.rnn_size) protos.embed = Embedding(vocab_size, opt.rnn_size) end print('creating an LSTM with ' .. opt.num_layers .. ' layers')