From e568f1da5083e06cd7d13caa57d0109e399c07dd Mon Sep 17 00:00:00 2001 From: zhanghaichao Date: Sat, 17 Sep 2016 06:00:41 -0700 Subject: [PATCH] split dotmul_projection and dotmul_operator --- python/paddle/trainer/config_parser.py | 30 ++++++-- .../paddle/trainer_config_helpers/layers.py | 74 +++++++++---------- .../tests/layers_test_config.py | 5 +- 3 files changed, 66 insertions(+), 43 deletions(-) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index f2f67f9bd66a4..fdab43b621770 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -516,6 +516,7 @@ def __init__( # calculate the output_size given input_size. return 0 # to indicate using the size from Layer config def calc_output_size(self, input_layer_config): + print("inner output size %s" %(self.size)) return self.size def calc_parameter_size(self, input_size, output_size): raise NotimplementedError @@ -2430,7 +2431,8 @@ def __init__( config_assert(inputs, 'inputs cannot be empty') super(MixedLayer, self).__init__( name, 'mixed', size, inputs=inputs, **xargs) - + print("+++++++++++++++++++") + print(size) operator_input_index = [] for operator in self.operators: operator_conf = operator.operator_conf @@ -2445,21 +2447,39 @@ def __init__( input_layer = self.get_input_layer(input_index) operator_conf.input_sizes.append(input_layer.size) operator_input_index.append(input_index) - if self.config.size == 0: + if self.config.size == 0: size = operator.calc_output_size(operator_conf.input_sizes) if size != 0: self.set_layer_size(size) - + else: + size = operator.calc_output_size(operator_conf.input_sizes) + if size != 0: + config_assert(size == self.config.size, + "different inputs have different size: %s vs. %s" % + (size, self.config.size)) + print(size) + print("==========================%s" %(name)) + print(operator_input_index) + print(self.config.size) for input_index in xrange(len(self.inputs)): + print("input_index %s" %(input_index)) input_layer = self.get_input_layer(input_index) input = self.inputs[input_index] if input_index not in operator_input_index: config_assert(isinstance(input, Projection), "input should be projection or operation") - if self.config.size == 0 and isinstance(input, Projection): + if self.config.size == 0 and isinstance(input, Projection): size = input.calc_output_size(input_layer) if size != 0: self.set_layer_size(size) - + elif isinstance(input, Projection): + print("before------%s" %(size)) + sz = input.calc_output_size(input_layer) + print("after------%s" %(size)) + if sz != 0: + config_assert(sz == self.config.size, + "different inputs have different size: %s vs. %s" % + (sz, self.config.size)) + print(size) config_assert(size != 0, "size is not set") for input_index in xrange(len(self.inputs)): diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index bda0b4f5d60e8..fab7e6e091863 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -28,7 +28,7 @@ import copy __all__ = ["full_matrix_projection", "AggregateLevel", "ExpandLevel", - "identity_projection", "dotmul_projection", + "identity_projection", "dotmul_projection", "dotmul_operator", "table_projection", "mixed_layer", "data_layer", "embedding_layer", "fc_layer", "grumemory", "pooling_layer", "lstmemory", "last_seq", "first_seq", @@ -389,7 +389,7 @@ def identity_projection(input, offset=None): @wrap_param_attr_default() def dotmul_projection(input, param_attr=None, scale=1): """ - 1. DotMulProjection if input is a layer. + DotMulProjection with a layer as input. It performs element-wise multiplication with weight. .. math:: @@ -403,48 +403,45 @@ def dotmul_projection(input, param_attr=None, scale=1): proj = dotmul_projection(input=layer) - 2. DotMulOperator if input is a list or tuple. - It takes two inputs, performs element-wise multiplication: - - .. math:: - out.row[i] += scale * (in1.row[i] .* in2.row[i]) - - where :math:`.*` means element-wise multiplication, and - scale is a config scalar, its default value is one. - - The example usage is: - - .. code-block:: python - - op = dotmul_projection(input=[layer1, layer2], - scale=2.0) - :param input: Input layer. - :type input: LayerOutput|list|tuple + :type input: LayerOutput :param param_attr: Parameter config, None if use default. :type param_attr: ParameterAttribute :param scale: config scalar, default value is one. :type scale: float - :return: A DotMulProjection or DotMulOperator Object. - :rtype: DotMulProjection or DotMulOperator + :return: A DotMulProjection Object. + :rtype: DotMulProjection """ - if isinstance(input, LayerOutput): - proj = DotMulProjection(input_layer_name=input.name, + proj = DotMulProjection(input_layer_name=input.name, size=input.size, **param_attr.attr) - proj.origin = input - proj.origin.projection = "dot_mul" - return proj - else: - assert isinstance(input, list) or isinstance(input, tuple) - assert len(input) == 2 - assert param_attr is None - op = DotMulOperator(input_layer_name=[x.name for x in input], - scale=scale) - op.origin = input - op.origin.operator = "dot_mul" - return op + proj.origin = input + return proj +def dotmul_operator(x, y, scale=1): + """ + DotMulOperator takes two inputs and performs element-wise multiplication: + .. math:: + out.row[i] += scale * (in1.row[i] .* in2.row[i]) + where :math:`.*` means element-wise multiplication, and + scale is a config scalar, its default value is one. + The example usage is: + .. code-block:: python + op = dotmul_operator(x, y, + scale=1) + :param input: Input layer + :type input: LayerOutput + :param scale: config scalar, default value is one. + :type scale: float + :return: A DotMulOperator Object. + :rtype: DotMulOperator + """ + assert isinstance(x, LayerOutput) + assert isinstance(y, LayerOutput) + op = DotMulOperator(input_layer_names=[x.name, y.name], + scale=scale) + op.origin = [x, y] + return op @wrap_bias_attr_default(['padding_attr']) def context_projection(input, context_len, context_start=None, @@ -539,7 +536,10 @@ def __add__(self, other): if not self.finalized: assert isinstance(other, Projection) or isinstance(other, Operator) self.inputs.append(other) - self.parents.append(other.origin) + if isinstance(other, Projection): + self.parents.append(other.origin) + else: + self.parents.extend(other.origin) return self else: raise MixedLayerType.AddToSealedMixedLayerException() @@ -565,7 +565,7 @@ def __exit__(self, *args, **kwargs): @wrap_act_default(act=LinearActivation()) @wrap_bias_attr_default(has_bias=False) @layer_support(ERROR_CLIPPING, DROPOUT) -def mixed_layer(size, input=None, name=None, act=None, bias_attr=False, +def mixed_layer(size=0, input=None, name=None, act=None, bias_attr=False, layer_attr=None): """ Mixed Layer. A mixed layer will add all inputs together, then activate. diff --git a/python/paddle/trainer_config_helpers/tests/layers_test_config.py b/python/paddle/trainer_config_helpers/tests/layers_test_config.py index 39c85c788eeca..27b22ecb701c5 100644 --- a/python/paddle/trainer_config_helpers/tests/layers_test_config.py +++ b/python/paddle/trainer_config_helpers/tests/layers_test_config.py @@ -38,8 +38,11 @@ outputs(classification_cost(out, data_layer(name="label", size=num_classes))) +dotmul = mixed_layer(input=[dotmul_operator(x=x1, y=y1), + dotmul_projection(input=y1)]) + # for ctc -tmp = fc_layer(input=x1, +tmp = fc_layer(input=[x1, dotmul], size=num_classes + 1, act=SoftmaxActivation()) ctc = ctc_layer(input=tmp,