Generalize scale factor to be a tensor (#467)

* use python idiom instead of if x in scale args * let scale arg take tensors * add test for scale with tensor
blei-lab · Feb 19, 2017 · 501f244 · 501f244
1 parent a399d00
commit 501f244
Show file tree

Hide file tree

Showing 6 changed files with 62 additions and 104 deletions.
diff --git a/edward/inferences/inference.py b/edward/inferences/inference.py
@@ -266,9 +266,11 @@ def initialize(self, n_iter=1000, n_print=None, n_minibatch=None, scale=None,
       subsampling details, see ``tf.train.slice_input_producer`` and
       ``tf.train.batch``.
     scale : dict of RandomVariable to tf.Tensor, optional
-      A scalar value to scale computation for any random variable that
-      it is binded to. For example, this is useful for scaling
-      computations with respect to local latent variables.
+      A tensor to scale computation for any random variable that it is
+      binded to. Its shape must be broadcastable; it is multiplied
+      element-wise to the random variable. For example, this is useful
+      for mini-batch scaling when inferring global variables, or
+      applying masks on a random variable.
     logdir : str, optional
       Directory where event file will be written. For details,
       see ``tf.summary.FileWriter``. Default is to write nothing.

diff --git a/edward/inferences/klqp.py b/edward/inferences/klqp.py
@@ -357,11 +357,8 @@ def build_reparam_loss_and_gradients(inference, var_list):
       # Copy q(z) to obtain new set of posterior samples.
       qz_copy = copy(qz, scope=scope)
       z_sample[z] = qz_copy.value()
-      z_log_prob = tf.reduce_sum(qz_copy.log_prob(z_sample[z]))
-      if z in inference.scale:
-        z_log_prob *= inference.scale[z]
-
-      q_log_prob[s] += z_log_prob
+      q_log_prob[s] += tf.reduce_sum(
+          inference.scale.get(z, 1.0) * qz_copy.log_prob(z_sample[z]))
 
     if inference.model_wrapper is None:
       # Form dictionary in order to replace conditioning on prior or
@@ -377,20 +374,14 @@ def build_reparam_loss_and_gradients(inference, var_list):
 
       for z in six.iterkeys(inference.latent_vars):
         z_copy = copy(z, dict_swap, scope=scope)
-        z_log_prob = tf.reduce_sum(z_copy.log_prob(dict_swap[z]))
-        if z in inference.scale:
-          z_log_prob *= inference.scale[z]
-
-        p_log_prob[s] += z_log_prob
+        p_log_prob[s] += tf.reduce_sum(
+            inference.scale.get(z, 1.0) * z_copy.log_prob(dict_swap[z]))
 
       for x in six.iterkeys(inference.data):
         if isinstance(x, RandomVariable):
           x_copy = copy(x, dict_swap, scope=scope)
-          x_log_prob = tf.reduce_sum(x_copy.log_prob(dict_swap[x]))
-          if x in inference.scale:
-            x_log_prob *= inference.scale[x]
-
-          p_log_prob[s] += x_log_prob
+          p_log_prob[s] += tf.reduce_sum(
+              inference.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))
     else:
       x = inference.data
       p_log_prob[s] = inference.model_wrapper.log_prob(x, z_sample)
@@ -450,11 +441,8 @@ def build_reparam_kl_loss_and_gradients(inference, var_list):
       for x in six.iterkeys(inference.data):
         if isinstance(x, RandomVariable):
           x_copy = copy(x, dict_swap, scope=scope)
-          x_log_lik = tf.reduce_sum(x_copy.log_prob(dict_swap[x]))
-          if x in inference.scale:
-            x_log_lik *= inference.scale[x]
-
-          p_log_lik[s] += x_log_lik
+          p_log_lik[s] += tf.reduce_sum(
+              inference.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))
     else:
       x = inference.data
       p_log_lik[s] = inference.model_wrapper.log_lik(x, z_sample)
@@ -519,20 +507,14 @@ def build_reparam_entropy_loss_and_gradients(inference, var_list):
 
       for z in six.iterkeys(inference.latent_vars):
         z_copy = copy(z, dict_swap, scope=scope)
-        z_log_prob = tf.reduce_sum(z_copy.log_prob(dict_swap[z]))
-        if z in inference.scale:
-          z_log_prob *= inference.scale[z]
-
-        p_log_prob[s] += z_log_prob
+        p_log_prob[s] += tf.reduce_sum(
+            inference.scale.get(z, 1.0) * z_copy.log_prob(dict_swap[z]))
 
       for x in six.iterkeys(inference.data):
         if isinstance(x, RandomVariable):
           x_copy = copy(x, dict_swap, scope=scope)
-          x_log_prob = tf.reduce_sum(x_copy.log_prob(dict_swap[x]))
-          if x in inference.scale:
-            x_log_prob *= inference.scale[x]
-
-          p_log_prob[s] += x_log_prob
+          p_log_prob[s] += tf.reduce_sum(
+              inference.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))
     else:
       x = inference.data
       p_log_prob[s] = inference.model_wrapper.log_prob(x, z_sample)
@@ -568,12 +550,9 @@ def build_score_loss_and_gradients(inference, var_list):
       # Copy q(z) to obtain new set of posterior samples.
       qz_copy = copy(qz, scope=scope)
       z_sample[z] = qz_copy.value()
-      z_log_prob = tf.reduce_sum(
+      q_log_prob[s] += tf.reduce_sum(
+          inference.scale.get(z, 1.0) *
           qz_copy.log_prob(tf.stop_gradient(z_sample[z])))
-      if z in inference.scale:
-        z_log_prob *= inference.scale[z]
-
-      q_log_prob[s] += z_log_prob
 
     if inference.model_wrapper is None:
       # Form dictionary in order to replace conditioning on prior or
@@ -589,20 +568,14 @@ def build_score_loss_and_gradients(inference, var_list):
 
       for z in six.iterkeys(inference.latent_vars):
         z_copy = copy(z, dict_swap, scope=scope)
-        z_log_prob = tf.reduce_sum(z_copy.log_prob(dict_swap[z]))
-        if z in inference.scale:
-          z_log_prob *= inference.scale[z]
-
-        p_log_prob[s] += z_log_prob
+        p_log_prob[s] += tf.reduce_sum(
+            inference.scale.get(z, 1.0) * z_copy.log_prob(dict_swap[z]))
 
       for x in six.iterkeys(inference.data):
         if isinstance(x, RandomVariable):
           x_copy = copy(x, dict_swap, scope=scope)
-          x_log_prob = tf.reduce_sum(x_copy.log_prob(dict_swap[x]))
-          if x in inference.scale:
-            x_log_prob *= inference.scale[x]
-
-          p_log_prob[s] += x_log_prob
+          p_log_prob[s] += tf.reduce_sum(
+              inference.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))
     else:
       x = inference.data
       p_log_prob[s] = inference.model_wrapper.log_prob(x, z_sample)
@@ -644,12 +617,9 @@ def build_score_kl_loss_and_gradients(inference, var_list):
       # Copy q(z) to obtain new set of posterior samples.
       qz_copy = copy(qz, scope=scope)
       z_sample[z] = qz_copy.value()
-      z_log_prob = tf.reduce_sum(
+      q_log_prob[s] += tf.reduce_sum(
+          inference.scale.get(z, 1.0) *
           qz_copy.log_prob(tf.stop_gradient(z_sample[z])))
-      if z in inference.scale:
-        z_log_prob *= inference.scale[z]
-
-      q_log_prob[s] += z_log_prob
 
     if inference.model_wrapper is None:
       # Form dictionary in order to replace conditioning on prior or
@@ -666,11 +636,8 @@ def build_score_kl_loss_and_gradients(inference, var_list):
       for x in six.iterkeys(inference.data):
         if isinstance(x, RandomVariable):
           x_copy = copy(x, dict_swap, scope=scope)
-          x_log_lik = tf.reduce_sum(x_copy.log_prob(dict_swap[x]))
-          if x in inference.scale:
-            x_log_lik *= inference.scale[x]
-
-          p_log_lik[s] += x_log_lik
+          p_log_lik[s] += tf.reduce_sum(
+              inference.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))
     else:
       x = inference.data
       p_log_lik[s] = inference.model_wrapper.log_lik(x, z_sample)
@@ -716,12 +683,9 @@ def build_score_entropy_loss_and_gradients(inference, var_list):
       # Copy q(z) to obtain new set of posterior samples.
       qz_copy = copy(qz, scope=scope)
       z_sample[z] = qz_copy.value()
-      z_log_prob = tf.reduce_sum(
+      q_log_prob[s] += tf.reduce_sum(
+          inference.scale.get(z, 1.0) *
           qz_copy.log_prob(tf.stop_gradient(z_sample[z])))
-      if z in inference.scale:
-        z_log_prob *= inference.scale[z]
-
-      q_log_prob[s] += z_log_prob
 
     if inference.model_wrapper is None:
       # Form dictionary in order to replace conditioning on prior or
@@ -737,20 +701,14 @@ def build_score_entropy_loss_and_gradients(inference, var_list):
 
       for z in six.iterkeys(inference.latent_vars):
         z_copy = copy(z, dict_swap, scope=scope)
-        z_log_prob = tf.reduce_sum(z_copy.log_prob(dict_swap[z]))
-        if z in inference.scale:
-          z_log_prob *= inference.scale[z]
-
-        p_log_prob[s] += z_log_prob
+        p_log_prob[s] += tf.reduce_sum(
+            inference.scale.get(z, 1.0) * z_copy.log_prob(dict_swap[z]))
 
       for x in six.iterkeys(inference.data):
         if isinstance(x, RandomVariable):
           x_copy = copy(x, dict_swap, scope=scope)
-          x_log_prob = tf.reduce_sum(x_copy.log_prob(dict_swap[x]))
-          if x in inference.scale:
-            x_log_prob *= inference.scale[x]
-
-          p_log_prob[s] += x_log_prob
+          p_log_prob[s] += tf.reduce_sum(
+              inference.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))
     else:
       x = inference.data
       p_log_prob[s] = inference.model_wrapper.log_prob(x, z_sample)

diff --git a/edward/inferences/map.py b/edward/inferences/map.py
@@ -123,20 +123,14 @@ def build_loss_and_gradients(self, var_list):
       p_log_prob = 0.0
       for z in six.iterkeys(self.latent_vars):
         z_copy = copy(z, dict_swap, scope=scope)
-        z_log_prob = tf.reduce_sum(z_copy.log_prob(dict_swap[z]))
-        if z in self.scale:
-          z_log_prob *= self.scale[z]
-
-        p_log_prob += z_log_prob
+        p_log_prob += tf.reduce_sum(
+            self.scale.get(z, 1.0) * z_copy.log_prob(dict_swap[z]))
 
       for x in six.iterkeys(self.data):
         if isinstance(x, RandomVariable):
           x_copy = copy(x, dict_swap, scope=scope)
-          x_log_prob = tf.reduce_sum(x_copy.log_prob(dict_swap[x]))
-          if x in self.scale:
-            x_log_prob *= self.scale[x]
-
-          p_log_prob += x_log_prob
+          p_log_prob += tf.reduce_sum(
+              self.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))
     else:
       x = self.data
       p_log_prob = self.model_wrapper.log_prob(x, z_mode)

diff --git a/edward/inferences/sghmc.py b/edward/inferences/sghmc.py
@@ -125,20 +125,14 @@ def _log_joint(self, z_sample):
       log_joint = 0.0
       for z in six.iterkeys(self.latent_vars):
         z_copy = copy(z, dict_swap, scope=scope)
-        z_log_prob = tf.reduce_sum(z_copy.log_prob(dict_swap[z]))
-        if z in self.scale:
-          z_log_prob *= self.scale[z]
-
-        log_joint += z_log_prob
+        log_joint += tf.reduce_sum(
+            self.scale.get(z, 1.0) * z_copy.log_prob(dict_swap[z]))
 
       for x in six.iterkeys(self.data):
         if isinstance(x, RandomVariable):
           x_copy = copy(x, dict_swap, scope=scope)
-          x_log_prob = tf.reduce_sum(x_copy.log_prob(dict_swap[x]))
-          if x in self.scale:
-            x_log_prob *= self.scale[x]
-
-          log_joint += x_log_prob
+          log_joint += tf.reduce_sum(
+              self.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))
     else:
       x = self.data
       log_joint = self.model_wrapper.log_prob(x, z_sample)

diff --git a/edward/inferences/sgld.py b/edward/inferences/sgld.py
@@ -114,20 +114,14 @@ def _log_joint(self, z_sample):
       log_joint = 0.0
       for z in six.iterkeys(self.latent_vars):
         z_copy = copy(z, dict_swap, scope=scope)
-        z_log_prob = tf.reduce_sum(z_copy.log_prob(dict_swap[z]))
-        if z in self.scale:
-          z_log_prob *= self.scale[z]
-
-        log_joint += z_log_prob
+        log_joint += tf.reduce_sum(
+            self.scale.get(z, 1.0) * z_copy.log_prob(dict_swap[z]))
 
       for x in six.iterkeys(self.data):
         if isinstance(x, RandomVariable):
           x_copy = copy(x, dict_swap, scope=scope)
-          x_log_prob = tf.reduce_sum(x_copy.log_prob(dict_swap[x]))
-          if x in self.scale:
-            x_log_prob *= self.scale[x]
-
-          log_joint += x_log_prob
+          log_joint += tf.reduce_sum(
+              self.scale.get(x, 1.0) * x_copy.log_prob(dict_swap[x]))
     else:
       x = self.data
       log_joint = self.model_wrapper.log_prob(x, z_sample)

diff --git a/tests/test-inferences/test_scale.py b/tests/test-inferences/test_scale.py
@@ -3,6 +3,7 @@
 from __future__ import print_function
 
 import edward as ed
+import numpy as np
 import tensorflow as tf
 
 from edward.models import Normal
@@ -19,7 +20,7 @@ def log_prob(self, xs, zs):
 
 class test_inference_scale_class(tf.test.TestCase):
 
-  def test_subgraph(self):
+  def test_scale_0d(self):
     N = 10
     M = 5
     mu = Normal(mu=0.0, sigma=1.0)
@@ -33,6 +34,21 @@ def test_subgraph(self):
     inference.initialize(scale={x: float(N) / M})
     assert inference.scale[x] == float(N) / M
 
+  def test_scale_1d(self):
+    N = 10
+    M = 5
+    mu = Normal(mu=0.0, sigma=1.0)
+    x = Normal(mu=tf.ones(M) * mu, sigma=tf.ones(M))
+
+    qmu = Normal(mu=tf.Variable(0.0), sigma=tf.constant(1.0))
+
+    x_ph = tf.placeholder(tf.float32, [M])
+    data = {x: x_ph}
+    inference = ed.KLqp({mu: qmu}, data)
+    inference.initialize(scale={x: tf.cast(tf.range(M), tf.float32)})
+    with self.test_session():
+      self.assertAllClose(inference.scale[x].eval(), np.arange(M))
+
   def test_minibatch(self):
     N = 10
     M = 5