Set filter_size to 1 when number of time-steps equal 1 when using 3-D convolutions. This avoids unnecessary padding across time for this special case.

MechCoder · Copybara-Service · commit 4d7fea08ae74 · 2019-01-11T14:31:51.000-08:00
PiperOrigin-RevId: 228946248
diff --git a/tensor2tensor/models/research/glow_ops.py b/tensor2tensor/models/research/glow_ops.py
@@ -421,6 +421,7 @@ def conv(name, x, output_channels, filter_size=None, stride=None,
 
   x_shape = common_layers.shape_list(x)
   is_2d = len(x_shape) == 4
+  num_steps = x_shape[1]
 
   # set filter_size, stride and in_channels
   if is_2d:
@@ -435,7 +436,10 @@ def conv(name, x, output_channels, filter_size=None, stride=None,
     conv_filter = tf.nn.conv2d
   else:
     if filter_size is None:
-      filter_size = [2, 3, 3]
+      if num_steps == 1:
+        filter_size = [1, 3, 3]
+      else:
+        filter_size = [2, 3, 3]
     if stride is None:
       stride = [1, 1, 1]
     if dilations is None:
@@ -489,11 +493,17 @@ def conv_block(name, x, mid_channels, dilations=None, activation="relu",
 
     x_shape = common_layers.shape_list(x)
     is_2d = len(x_shape) == 4
+    num_steps = x_shape[1]
     if is_2d:
       first_filter = [3, 3]
       second_filter = [1, 1]
     else:
-      first_filter = [2, 3, 3]
+      # special case when number of steps equal 1 to avoid
+      # padding.
+      if num_steps == 1:
+        first_filter = [1, 3, 3]
+      else:
+        first_filter = [2, 3, 3]
       second_filter = [1, 1, 1]
 
     # Edge Padding + conv2d + actnorm + relu:
@@ -1025,7 +1035,7 @@ def split(name, x, reverse=False, eps=None, eps_std=None, cond_latents=None,
     eps_std: Sample x2 with the provided eps_std.
     cond_latents: optionally condition x2 on cond_latents.
     hparams: next_frame_glow hparams.
-    state: tf.nn.rnn_cell.LSTMStateTuple. Current state of the LSTM over z_2.
+    state: tf.nn.rnn_cell.LSTMStateTuple.. Current state of the LSTM over z_2.
            Used only when hparams.latent_dist_encoder == "conv_lstm"
     condition: bool, Whether or not to condition the distribution on
                cond_latents.
diff --git a/tensor2tensor/models/research/glow_ops_test.py b/tensor2tensor/models/research/glow_ops_test.py
@@ -430,16 +430,18 @@ def test_actnorm_3d(self):
       ("dil_gatu", True, "gatu"), ("no_dil_gatu", False, "gatu"),
       ("dil_relu_drop", True, "relu", 0.1),
       ("dil_gatu_drop", True, "gatu", 0.1),
-      ("dil_gatu_drop_noise", True, "gatu", 0.1, 0.1))
+      ("dil_gatu_drop_noise", True, "gatu", 0.1, 0.1),
+      ("gatu_drop_single_step", False, "gatu", 0.1, 0.1, 1),
+      ("dil_gatu_drop_single_step", True, "gatu", 0.1, 0.1, 1),)
   def test_temporal_latent_to_dist(self, apply_dilation, activation,
-                                   dropout=0.0, noise=0.1):
+                                   dropout=0.0, noise=0.1, num_steps=5):
     with tf.Graph().as_default():
       hparams = self.get_glow_hparams()
       hparams.latent_apply_dilations = apply_dilation
       hparams.latent_activation = activation
       hparams.latent_dropout = dropout
       hparams.latent_noise = noise
-      latent_shape = (16, 5, 32, 32, 48)
+      latent_shape = (16, num_steps, 32, 32, 48)
       latents = tf.random_normal(latent_shape)
       dist = glow_ops.temporal_latent_to_dist(
           "tensor_to_dist", latents, hparams)