Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
fix stale grad, rename base s2s class (#17)
Browse files Browse the repository at this point in the history
* fix stale grad, remove base s2s class

* model presets, serialize vocab, load logic, rename blocks

* fix lint

* update per comments

* fix lint

* update
  • Loading branch information
szha authored and sxjscience committed Mar 24, 2018
1 parent b73f157 commit 6875333
Show file tree
Hide file tree
Showing 10 changed files with 407 additions and 126 deletions.
23 changes: 10 additions & 13 deletions example/gluon/word_language_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import mxnet as mx
from mxnet import gluon, autograd
from mxnet.gluon import data, text
from mxnet.gluon.model_zoo.text.lm import RNNModel, AWDLSTM
from mxnet.gluon.model_zoo.text.lm import SimpleRNN, AWDRNN

parser = argparse.ArgumentParser(description='MXNet Autograd RNN/LSTM Language Model on Wikitext-2.')
parser.add_argument('--model', type=str, default='lstm',
Expand All @@ -47,11 +47,9 @@
help='dropout applied to layers (0 = no dropout)')
parser.add_argument('--dropout_h', type=float, default=0.3,
help='dropout applied to hidden layer (0 = no dropout)')
parser.add_argument('--dropout_i', type=float, default=0.4,
parser.add_argument('--dropout_i', type=float, default=0.65,
help='dropout applied to input layer (0 = no dropout)')
parser.add_argument('--dropout_e', type=float, default=0.1,
help='dropout applied to embedding layer (0 = no dropout)')
parser.add_argument('--weight_dropout', type=float, default=0.65,
parser.add_argument('--weight_dropout', type=float, default=0.5,
help='weight dropout applied to h2h weight matrix (0 = no weight dropout)')
parser.add_argument('--tied', action='store_true',
help='tie the word embedding and softmax weights')
Expand Down Expand Up @@ -123,12 +121,11 @@ def index_tokens(data, label):
ntokens = len(vocab)

if args.weight_dropout:
model = AWDLSTM(args.model, vocab, args.emsize, args.nhid, args.nlayers,
args.dropout, args.dropout_h, args.dropout_i, args.dropout_e, args.weight_dropout,
args.tied)
model = AWDRNN(args.model, len(vocab), args.emsize, args.nhid, args.nlayers,
args.tied, args.dropout, args.weight_dropout, args.dropout_h, args.dropout_i)
else:
model = RNNModel(args.model, vocab, args.emsize, args.nhid,
args.nlayers, args.dropout, args.tied)
model = SimpleRNN(args.model, len(vocab), args.emsize, args.nhid, args.nlayers,
args.tied, args.dropout)

model.initialize(mx.init.Xavier(), ctx=context)

Expand All @@ -147,15 +144,15 @@ def index_tokens(data, label):

def detach(hidden):
if isinstance(hidden, (tuple, list)):
hidden = [i.detach() for i in hidden]
hidden = [detach(i) for i in hidden]
else:
hidden = hidden.detach()
return hidden

def eval(data_source):
total_L = 0.0
ntotal = 0
hidden = model.begin_state(func=mx.nd.zeros, batch_size=args.batch_size, ctx=context[0])
hidden = model.begin_state(args.batch_size, func=mx.nd.zeros, ctx=context[0])
for i, (data, target) in enumerate(data_source):
data = data.as_in_context(context[0]).T
target= target.as_in_context(context[0]).T
Expand All @@ -172,7 +169,7 @@ def train():
for epoch in range(args.epochs):
total_L = 0.0
start_epoch_time = time.time()
hiddens = [model.begin_state(func=mx.nd.zeros, batch_size=args.batch_size, ctx=ctx) for ctx in context]
hiddens = [model.begin_state(args.batch_size, func=mx.nd.zeros, ctx=ctx) for ctx in context]
for i, (data, target) in enumerate(train_data):
start_batch_time = time.time()
data = data.T
Expand Down
5 changes: 1 addition & 4 deletions python/mxnet/gluon/data/text/sentiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,8 @@

__all__ = ['IMDB']

import glob
import io
import json
import os
import tarfile

from ..dataset import SimpleDataset
from ...utils import download, check_sha1, _get_repo_file_url
Expand Down Expand Up @@ -67,7 +64,7 @@ def _get_data(self):
path = os.path.join(root, data_file_name)
if not os.path.exists(path) or not check_sha1(path, data_hash):
download(_get_repo_file_url('gluon/dataset/imdb', data_file_name),
path=root, sha1_hash=data_hash)
path=root, sha1_hash=data_hash)


def _read_data(self):
Expand Down
53 changes: 53 additions & 0 deletions python/mxnet/gluon/data/text/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@

"""Utility functions."""

import os

from ...text import Vocabulary

def flatten_samples(samples):
"""Flatten list of list of tokens into a single flattened list of tokens.
Expand Down Expand Up @@ -75,3 +79,52 @@ def collate_pad_length(num_items, seq_len, overlap=0):
step = seq_len-overlap
span = num_items-seq_len
return (span // step + 1) * step - span

_vocab_sha1 = {}

def _load_pretrained_vocab(name, root=os.path.join('~', '.mxnet', 'models')):
"""Load the accompanying vocabulary object for pretrained model.
Parameters
----------
name : str
Name of the model.
root : str, default '~/.mxnet/models'
Location for keeping the model parameters.
Returns
-------
file_path
Path to the requested vocabulary object file of pretrained model.
"""
file_name = '{name}-{short_hash}'.format(name=name,
short_hash=short_hash(name))
root = os.path.expanduser(root)
file_path = os.path.join(root, file_name+'.vocab')
sha1_hash = _vocab_sha1[name]
if os.path.exists(file_path):
if check_sha1(file_path, sha1_hash):
return file_path
else:
print('Detected mismatch in the content of model vocab file. Downloading again.')
else:
print('Vocab file is not found. Downloading.')

if not os.path.exists(root):
os.makedirs(root)

zip_file_path = os.path.join(root, file_name+'.zip')
repo_url = os.environ.get('MXNET_GLUON_REPO', apache_repo_url)
if repo_url[-1] != '/':
repo_url = repo_url + '/'
download(_url_format.format(repo_url=repo_url, file_name=file_name),
path=zip_file_path,
overwrite=True)
with zipfile.ZipFile(zip_file_path) as zf:
zf.extractall(root)
os.remove(zip_file_path)

if check_sha1(file_path, sha1_hash):
return Vocabulary.json_deserialize(open(file_path, "rb").read())
else:
raise ValueError('Downloaded file has different hash. Please try again.')
39 changes: 29 additions & 10 deletions python/mxnet/gluon/model_zoo/text/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,30 @@
This module contains definitions for the following model architectures:
- `AWD`_
You can construct a model with random weights by calling its constructor:
You can construct a model with random weights by calling its constructor. Because NLP models
are tied to vocabularies, you can either specify a dataset name to load and use the vocabulary
of that dataset:
.. code::
from mxnet.gluon.model_zoo import text
# TODO
awd = text.awd_variant()
awd, vocab = text.awd_lstm_lm_1150(dataset_name='wikitext-2')
or directly specify a vocabulary object:
.. code::
from mxnet.gluon.model_zoo import text
awd, vocab = text.awd_lstm_lm_1150(None, vocab=custom_vocab)
We provide pre-trained models for all the listed models.
These models can constructed by passing ``pretrained=True``:
.. code::
from mxnet.gluon.model_zoo import text
# TODO
awd = text.awd_variant(pretrained=True)
awd, vocab = text.awd_lstm_lm_1150(dataset_name='wikitext-2'
pretrained=True)
.. _AWD: https://arxiv.org/abs/1404.5997
"""
Expand All @@ -46,17 +54,26 @@

from . import lm

from .lm import standard_lstm_lm_650, standard_lstm_lm_1500, awd_lstm_lm_1150

def get_model(name, **kwargs):
"""Returns a pre-defined model by name
"""Returns a pre-defined model by name.
Parameters
----------
name : str
Name of the model.
pretrained : bool
dataset_name : str or None, default None
The dataset name on which the pretrained model is trained.
Options are 'wikitext-2'. If specified, then the returned vocabulary is extracted from
the training set of the dataset.
If None, then vocab is required, for specifying embedding weight size, and is directly
returned.
vocab : gluon.text.Vocabulary or None, default None
Vocabulary object to be used with the language model.
Required when dataset_name is not specified.
pretrained : bool, default False
Whether to load the pretrained weights for model.
classes : int
Number of classes for the output layer.
ctx : Context, default CPU
The context in which to load the pretrained weights.
root : str, default '~/.mxnet/models'
Expand All @@ -67,7 +84,9 @@ def get_model(name, **kwargs):
HybridBlock
The model.
"""
#models = {'awd_variant': awd_variant}
models = {'standard_lstm_lm_650': standard_lstm_lm_650,
'standard_lstm_lm_1500': standard_lstm_lm_1500,
'awd_lstm_lm_1150': awd_lstm_lm_1150}
name = name.lower()
if name not in models:
raise ValueError(
Expand Down
68 changes: 22 additions & 46 deletions python/mxnet/gluon/model_zoo/text/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,18 @@
# specific language governing permissions and limitations
# under the License.
"""Building blocks and utility for models."""
__all__ = ['StatefulBlock', 'get_rnn_layer', 'get_rnn_cell',
'RNNCellLayer', 'apply_weight_drop', 'WeightDropParameter']

from ... import Block, HybridBlock, Parameter, contrib, nn, rnn
from ... import Block, HybridBlock, Parameter, contrib, rnn
from .... import nd


class _TextSeq2SeqModel(Block):
def __init__(self, src_vocab, tgt_vocab, **kwargs):
super(_TextSeq2SeqModel, self).__init__(**kwargs)
self._src_vocab = src_vocab
self._tgt_vocab = tgt_vocab
class StatefulBlock(Block):
def __init__(self, **kwargs):
super(StatefulBlock, self).__init__(**kwargs)

def begin_state(self, *args, **kwargs):
return self.encoder.begin_state(*args, **kwargs)

def forward(self, inputs, begin_state=None): # pylint: disable=arguments-differ
embedded_inputs = self.embedding(inputs)
if not begin_state:
begin_state = self.begin_state()
encoded, state = self.encoder(embedded_inputs, begin_state)
out = self.decoder(encoded)
return out, state

raise NotImplementedError()

def apply_weight_drop(block, local_param_name, rate, axes=(),
weight_dropout_mode='training'):
Expand Down Expand Up @@ -94,21 +84,21 @@ def _find_param(block, full_param_name, local_param_name):

return param_dict_results, reg_dict_results

def get_rnn_cell(mode, num_layers, num_embed, num_hidden,
def get_rnn_cell(mode, num_layers, input_size, hidden_size,
dropout, weight_dropout,
var_drop_in, var_drop_state, var_drop_out):
"""create rnn cell given specs"""
rnn_cell = rnn.SequentialRNNCell()
with rnn_cell.name_scope():
for i in range(num_layers):
if mode == 'rnn_relu':
cell = rnn.RNNCell(num_hidden, 'relu', input_size=num_embed)
cell = rnn.RNNCell(hidden_size, 'relu', input_size=input_size)
elif mode == 'rnn_tanh':
cell = rnn.RNNCell(num_hidden, 'tanh', input_size=num_embed)
cell = rnn.RNNCell(hidden_size, 'tanh', input_size=input_size)
elif mode == 'lstm':
cell = rnn.LSTMCell(num_hidden, input_size=num_embed)
cell = rnn.LSTMCell(hidden_size, input_size=input_size)
elif mode == 'gru':
cell = rnn.GRUCell(num_hidden, input_size=num_embed)
cell = rnn.GRUCell(hidden_size, input_size=input_size)
if var_drop_in + var_drop_state + var_drop_out != 0:
cell = contrib.rnn.VariationalDropoutCell(cell,
var_drop_in,
Expand All @@ -125,20 +115,20 @@ def get_rnn_cell(mode, num_layers, num_embed, num_hidden,
return rnn_cell


def get_rnn_layer(mode, num_layers, num_embed, num_hidden, dropout, weight_dropout):
def get_rnn_layer(mode, num_layers, input_size, hidden_size, dropout, weight_dropout):
"""create rnn layer given specs"""
if mode == 'rnn_relu':
block = rnn.RNN(num_hidden, 'relu', num_layers, dropout=dropout,
input_size=num_embed)
block = rnn.RNN(hidden_size, 'relu', num_layers, dropout=dropout,
input_size=input_size)
elif mode == 'rnn_tanh':
block = rnn.RNN(num_hidden, num_layers, dropout=dropout,
input_size=num_embed)
block = rnn.RNN(hidden_size, num_layers, dropout=dropout,
input_size=input_size)
elif mode == 'lstm':
block = rnn.LSTM(num_hidden, num_layers, dropout=dropout,
input_size=num_embed)
block = rnn.LSTM(hidden_size, num_layers, dropout=dropout,
input_size=input_size)
elif mode == 'gru':
block = rnn.GRU(num_hidden, num_layers, dropout=dropout,
input_size=num_embed)
block = rnn.GRU(hidden_size, num_layers, dropout=dropout,
input_size=input_size)
if weight_dropout:
apply_weight_drop(block, 'h2h_weight', rate=weight_dropout)

Expand All @@ -148,7 +138,7 @@ def get_rnn_layer(mode, num_layers, num_embed, num_hidden, dropout, weight_dropo
class RNNCellLayer(Block):
"""A block that takes an rnn cell and makes it act like rnn layer."""
def __init__(self, rnn_cell, layout='TNC', **kwargs):
super(RNNCellBlock, self).__init__(**kwargs)
super(RNNCellLayer, self).__init__(**kwargs)
self.cell = rnn_cell
assert layout == 'TNC' or layout == 'NTC', \
"Invalid layout %s; must be one of ['TNC' or 'NTC']"%layout
Expand Down Expand Up @@ -177,20 +167,6 @@ def forward(self, inputs, states=None): # pylint: disable=arguments-differ
return outputs
return outputs, states

class ExtendedSequential(nn.Sequential):
def forward(self, *x): # pylint: disable=arguments-differ
for block in self._children:
x = block(*x)
return x

class TransformerBlock(Block):
def __init__(self, *blocks, **kwargs):
super(TransformerBlock, self).__init__(**kwargs)
self._blocks = blocks

def forward(self, *inputs):
return [block(data) if block else data for block, data in zip(self._blocks, inputs)]


class WeightDropParameter(Parameter):
"""A Container holding parameters (weights) of Blocks and performs dropout.
Expand Down
Loading

0 comments on commit 6875333

Please sign in to comment.