Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Add constant parameter #9893

Merged
merged 4 commits into from
Feb 28, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/api/python/gluon/gluon.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ in Python and then deploy with symbolic graph in C++ and Scala.
:nosignatures:

Parameter
Constant
ParameterDict
```

Expand Down
84 changes: 82 additions & 2 deletions python/mxnet/gluon/parameter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@
# coding: utf-8
# pylint: disable=
"""Neural network parameter."""
__all__ = ['DeferredInitializationError', 'Parameter', 'ParameterDict',
'tensor_types']
__all__ = ['DeferredInitializationError', 'Parameter', 'Constant',
'ParameterDict', 'tensor_types']


from collections import OrderedDict
import warnings
Expand Down Expand Up @@ -459,6 +460,46 @@ def cast(self, dtype):
autograd.mark_variables(self._data, self._grad, self.grad_req)


class Constant(Parameter):
"""A constant parameter for holding immutable tensors.
`Constant`s are ignored by `autograd` and `Trainer`, thus their values
will not change during training. But you can still update their values
manually with the `set_data` method.

`Constant`s can be created with either::

const = mx.gluon.Constant('const', [[1,2],[3,4]])

or::

class Block(gluon.Block):
def __init__(self, **kwargs):
super(Block, self).__init__(**kwargs)
self.const = self.params.get_constant('const', [[1,2],[3,4]])

Parameter
---------
name : str
Name of the parameter.
value : array-like
Initial value for the constant.
"""
def __init__(self, name, value):
if not isinstance(value, ndarray.NDArray):
value = ndarray.array(value)
self.value = value

class Init(initializer.Initializer):
def _init_weight(self, _, arr):
value.copyto(arr)
init_name = 'Constant_{}_{}'.format(name, id(self))
initializer.alias(init_name)(Init)

super(Constant, self).__init__(
name, grad_req='null', shape=value.shape, dtype=value.dtype,
init=init_name)


class ParameterDict(object):
"""A dictionary managing a set of parameters.

Expand Down Expand Up @@ -548,6 +589,45 @@ def get(self, name, **kwargs):
setattr(param, k, v)
return param

def get_constant(self, name, value=None):
Copy link
Member

@szha szha Feb 27, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When developing for a language model with weight drop, we took the approach of adding a new type of Parameter. While using the new WeightDropParameter, we found it quite difficult as user to properly change the type of parameter in the ParameterDict. Since dropout-on-parameter is a common use case too, should we be open to adding a getter function for such parameter as well? Or should we consider a different pattern?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I've met some similar problems implementing the weight normalization, which also transforms the parameters.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the pattern here is that we need parameter transformation, and it can be done through overriding the data() (and maybe grad()) method. This currently requires inheriting the Parameter class, and there is no existing method in casting the parameter type. So in this case, what's needed is a way to cast the parameter.

One alternative is to provide a place to plug in transformation functions in data() and grad().

"""Retrieves a :py:class:`Constant` with name ``self.prefix+name``. If not found,
:py:func:`get` will first try to retrieve it from "shared" dict. If still not
found, :py:func:`get` will create a new :py:class:`Constant` with key-word
arguments and insert it to self.

Constants
----------
name : str
Name of the desired Constant. It will be prepended with this dictionary's
prefix.
value : array-like
Initial value of constant.

Returns
-------
Constant
The created or retrieved :py:class:`Constant`.
"""
name = self.prefix + name
param = self._get_impl(name)
if param is None:
if value is None:
raise KeyError("No constant named {}. Please specify value " \
"if you want to create a new constant.".format(
name))
param = Constant(name, value)
self._params[name] = param
elif value is not None:
assert isinstance(param, Constant), \
"Parameter {} already exists but it is not a constant.".format(
name)
if isinstance(value, nd.NDArray):
value = value.asnumpy()
assert param.shape == value.shape and \
(param.value.asnumpy() == value).all(), \
"Constant {} already exists but it's value doesn't match new value"
return param

def update(self, other):
"""Copies all Parameters in ``other`` to self."""
for k, v in other.items():
Expand Down
3 changes: 2 additions & 1 deletion python/mxnet/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ def register(klass, name=None):
assert issubclass(klass, base_class), \
"Can only register subclass of %s"%base_class.__name__
if name is None:
name = klass.__name__.lower()
name = klass.__name__
name = name.lower()
if name in registry:
warnings.warn(
"\033[91mNew %s %s.%s registered with name %s is"
Expand Down
28 changes: 28 additions & 0 deletions tests/python/unittest/test_gluon.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,34 @@ def test_paramdict():
params.load('test.params', mx.cpu())


@with_seed()
def test_constant():
class Test(gluon.HybridBlock):
def __init__(self, **kwargs):
super(Test, self).__init__(**kwargs)
self.value = np.asarray([[1,2], [3,4]])
self.const = self.params.get_constant('const', self.value)

def hybrid_forward(self, F, x, const):
return x + const
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should it be x + self.const?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const is the parameter, and block handles it.


test = Test()
test.initialize()
trainer = gluon.Trainer(test.collect_params(), 'sgd',
{'learning_rate': 1.0, 'momentum': 0.5})

with mx.autograd.record():
x = mx.nd.ones((2,2))
x.attach_grad()
y = test(x)
y.backward()

trainer.step(1)

assert (test.const.data().asnumpy() == test.value).all()
assert (x.grad.asnumpy() == 1).all()


@with_seed()
def test_parameter_sharing():
class Net(gluon.Block):
Expand Down