Add a Python alternative to seq2seq.gather_tree (tensorflow#1925)

guillaumekln · ashutosh1919 · commit 532e51b01413 · 2020-07-12T17:09:59.000+05:30
* Add a Python alternative to seq2seq.gather_tree
* Enable tests for the Python op
diff --git a/tensorflow_addons/seq2seq/BUILD b/tensorflow_addons/seq2seq/BUILD
@@ -6,6 +6,7 @@ py_library(
     name = "seq2seq",
     srcs = glob(["*.py"]),
     data = [
+        "//tensorflow_addons:options.py",
         "//tensorflow_addons/custom_ops/seq2seq:_beam_search_ops.so",
     ],
     deps = [
diff --git a/tensorflow_addons/seq2seq/beam_search_decoder.py b/tensorflow_addons/seq2seq/beam_search_decoder.py
@@ -19,22 +19,19 @@
 
 import tensorflow as tf
 
+from tensorflow_addons import options
 from tensorflow_addons.seq2seq import attention_wrapper
 from tensorflow_addons.seq2seq import decoder
 from tensorflow_addons.utils import keras_utils
 from tensorflow_addons.utils.resource_loader import LazySO
-from tensorflow_addons.utils.types import FloatTensorLike, TensorLike
+from tensorflow_addons.utils.types import FloatTensorLike, TensorLike, Number
 
 from typeguard import typechecked
 from typing import Callable, Optional
 
 _beam_search_so = LazySO("custom_ops/seq2seq/_beam_search_ops.so")
 
 
-def gather_tree(*args, **kwargs) -> tf.Tensor:
-    return _beam_search_so.ops.addons_gather_tree(*args, **kwargs)
-
-
 class BeamSearchDecoderState(
     collections.namedtuple(
         "BeamSearchDecoderState",
@@ -151,6 +148,107 @@ def tile_batch(t: TensorLike, multiplier: int, name: Optional[str] = None) -> tf
         return tf.nest.map_structure(lambda t_: _tile_batch(t_, multiplier), t)
 
 
+@tf.function(
+    input_signature=(
+        tf.TensorSpec([None, None, None], dtype=tf.int32),
+        tf.TensorSpec([None, None, None], dtype=tf.int32),
+        tf.TensorSpec([None], dtype=tf.int32),
+        tf.TensorSpec([], dtype=tf.int32),
+    )
+)
+def _gather_tree(step_ids, parent_ids, max_sequence_lengths, end_token):
+    input_shape = tf.shape(parent_ids)
+    max_time = input_shape[0]
+    beam_width = input_shape[2]
+    max_sequence_lengths = tf.math.minimum(max_sequence_lengths, max_time)
+    mask = tf.expand_dims(
+        tf.transpose(tf.sequence_mask(max_sequence_lengths, maxlen=max_time)), -1
+    )
+
+    # Mask out of range ids.
+    end_tokens = tf.fill(input_shape, end_token)
+    step_ids = tf.where(mask, x=step_ids, y=end_tokens)
+    parent_ids = tf.where(mask, x=parent_ids, y=tf.zeros_like(parent_ids))
+    assert_op = tf.debugging.Assert(
+        tf.math.reduce_all(
+            tf.math.logical_and(parent_ids >= 0, parent_ids < beam_width)
+        ),
+        ["All parent ids must be positive and less than beam_width"],
+    )
+
+    # Reverse all sequences as we need to gather from the end.
+    with tf.control_dependencies([assert_op]):
+        rev_step_ids = tf.reverse_sequence(
+            step_ids, max_sequence_lengths, seq_axis=0, batch_axis=1
+        )
+        rev_parent_ids = tf.reverse_sequence(
+            parent_ids, max_sequence_lengths, seq_axis=0, batch_axis=1
+        )
+
+    # Initialize output ids and parent based on last step.
+    output_ids = tf.TensorArray(step_ids.dtype, size=max_time, dynamic_size=False)
+    output_ids = output_ids.write(0, rev_step_ids[0])
+    parent = rev_parent_ids[0]
+
+    # For each step, gather ids based on beam origin.
+    for t in tf.range(1, max_time):
+        ids = tf.gather(rev_step_ids[t], parent, batch_dims=1)
+        parent = tf.gather(rev_parent_ids[t], parent, batch_dims=1)
+        output_ids = output_ids.write(t, ids)
+
+    # Reverse sequences to their original order.
+    output_ids = output_ids.stack()
+    output_ids = tf.reverse_sequence(
+        output_ids, max_sequence_lengths, seq_axis=0, batch_axis=1
+    )
+
+    # Ensure that there are only end_token after the first end_token.
+    in_bound_steps = tf.math.cumsum(tf.cast(output_ids == end_token, tf.int32)) == 0
+    output_ids = tf.where(in_bound_steps, x=output_ids, y=end_tokens)
+    return output_ids
+
+
+def gather_tree(
+    step_ids: TensorLike,
+    parent_ids: TensorLike,
+    max_sequence_lengths: TensorLike,
+    end_token: Number,
+) -> tf.Tensor:
+    """Calculates the full beams from the per-step ids and parent beam ids.
+
+    For a given beam, past the time step containing the first decoded
+    `end_token` all values are filled in with `end_token`.
+
+    Args:
+      step_ids: The predicted token IDs.
+        A `int32` `Tensor` of shape `[max_time, batch_size, beam_width]`.
+      parent_ids: The parent beam indices.
+        A `int32` `Tensor` of shape `[max_time, batch_size, beam_width]`.
+      max_sequence_lengths: The maximum sequence length of each batch.
+        A `int32` `Tensor` of shape `[batch_size]`.
+      end_token: The end token ID.
+
+    Returns:
+      The reordered token IDs based on `parent_ids`.
+
+    Raises:
+      InvalidArgumentError: if `parent_ids` contains an invalid index.
+    """
+    if not options.TF_ADDONS_PY_OPS:
+        try:
+            return _beam_search_so.ops.addons_gather_tree(
+                step_ids, parent_ids, max_sequence_lengths, end_token
+            )
+        except tf.errors.NotFoundError:
+            options.warn_fallback("gather_tree")
+
+    step_ids = tf.convert_to_tensor(step_ids, dtype=tf.int32)
+    parent_ids = tf.convert_to_tensor(parent_ids, dtype=tf.int32)
+    max_sequence_lengths = tf.convert_to_tensor(max_sequence_lengths, dtype=tf.int32)
+    end_token = tf.convert_to_tensor(end_token, dtype=tf.int32)
+    return _gather_tree(step_ids, parent_ids, max_sequence_lengths, end_token)
+
+
 def gather_tree_from_array(
     t: TensorLike, parent_ids: TensorLike, sequence_length: TensorLike
 ) -> tf.Tensor:
diff --git a/tensorflow_addons/seq2seq/tests/beam_search_decoder_test.py b/tensorflow_addons/seq2seq/tests/beam_search_decoder_test.py
@@ -22,6 +22,7 @@
 from tensorflow_addons.seq2seq import beam_search_decoder, gather_tree
 
 
+@pytest.mark.usefixtures("run_custom_and_py_ops")
 def test_gather_tree():
     # (max_time = 3, batch_size = 2, beam_width = 3)
 
@@ -103,22 +104,27 @@ def _tile_in_depth(tensor):
     np.testing.assert_equal(expected_array.numpy(), sorted_array.numpy())
 
 
+@pytest.mark.usefixtures("run_custom_and_py_ops")
 def test_gather_tree_from_array_scalar():
     _test_gather_tree_from_array()
 
 
+@pytest.mark.usefixtures("run_custom_and_py_ops")
 def test_gather_tree_from_array_1d():
     _test_gather_tree_from_array(depth_ndims=1)
 
 
+@pytest.mark.usefixtures("run_custom_and_py_ops")
 def test_gather_tree_from_array_1d_with_merged_batch_beam():
     _test_gather_tree_from_array(depth_ndims=1, merged_batch_beam=True)
 
 
+@pytest.mark.usefixtures("run_custom_and_py_ops")
 def test_gather_tree_from_array_2d():
     _test_gather_tree_from_array(depth_ndims=2)
 
 
+@pytest.mark.usefixtures("run_custom_and_py_ops")
 def test_gather_tree_from_array_complex_trajectory():
     # Max. time = 7, batch = 1, beam = 5.
     array = np.expand_dims(
@@ -538,6 +544,7 @@ def get_probs():
     "cell_class", [tf.keras.layers.LSTMCell, tf.keras.layers.GRUCell]
 )
 @pytest.mark.usefixtures("maybe_run_functions_eagerly")
+@pytest.mark.usefixtures("run_custom_and_py_ops")
 def test_beam_search_decoder(
     cell_class, time_major, has_attention, with_alignment_history
 ):
diff --git a/tensorflow_addons/seq2seq/tests/beam_search_ops_test.py b/tensorflow_addons/seq2seq/tests/beam_search_ops_test.py
@@ -20,13 +20,15 @@
 import pytest
 import tensorflow as tf
 
+from tensorflow_addons import options
 from tensorflow_addons.seq2seq import gather_tree
 
 
 def _transpose_batch_time(x):
     return np.transpose(x, [1, 0, 2]).astype(np.int32)
 
 
+@pytest.mark.usefixtures("run_custom_and_py_ops")
 def test_gather_tree_one():
     # (max_time = 4, batch_size = 1, beams = 3)
     end_token = 10
@@ -47,6 +49,7 @@ def test_gather_tree_one():
     np.testing.assert_equal(expected_result, beams.numpy())
 
 
+@pytest.mark.usefixtures("run_custom_and_py_ops")
 def test_bad_parent_values_on_cpu():
     # (batch_size = 1, max_time = 4, beams = 3)
     # bad parent in beam 1 time 1
@@ -57,7 +60,7 @@ def test_bad_parent_values_on_cpu():
     )
     max_sequence_lengths = [3]
 
-    with pytest.raises(tf.errors.InvalidArgumentError):
+    with pytest.raises(tf.errors.InvalidArgumentError, match="parent id"):
         _ = gather_tree(
             step_ids=step_ids,
             parent_ids=parent_ids,
@@ -67,6 +70,7 @@ def test_bad_parent_values_on_cpu():
 
 
 @pytest.mark.with_device(["gpu"])
+@pytest.mark.usefixtures("run_custom_and_py_ops")
 def test_bad_parent_values_on_gpu():
     # (max_time = 4, batch_size = 1, beams = 3)
     # bad parent in beam 1 time 1; appears as a negative index at time 0
@@ -79,15 +83,26 @@ def test_bad_parent_values_on_gpu():
     expected_result = _transpose_batch_time(
         [[[2, -1, 2], [6, 5, 6], [7, 8, 9], [10, 10, 10]]]
     )
-    beams = gather_tree(
-        step_ids=step_ids,
-        parent_ids=parent_ids,
-        max_sequence_lengths=max_sequence_lengths,
-        end_token=end_token,
-    )
-    np.testing.assert_equal(expected_result, beams.numpy())
+    if options.TF_ADDONS_PY_OPS:
+        # The Python version has the same behavior on CPU and GPU.
+        with pytest.raises(tf.errors.InvalidArgumentError, match="parent id"):
+            _ = gather_tree(
+                step_ids=step_ids,
+                parent_ids=parent_ids,
+                max_sequence_lengths=max_sequence_lengths,
+                end_token=end_token,
+            )
+    else:
+        beams = gather_tree(
+            step_ids=step_ids,
+            parent_ids=parent_ids,
+            max_sequence_lengths=max_sequence_lengths,
+            end_token=end_token,
+        )
+        np.testing.assert_equal(expected_result, beams.numpy())
 
 
+@pytest.mark.usefixtures("run_custom_and_py_ops")
 def test_gather_tree_batch():
     batch_size = 10
     beam_width = 15
@@ -123,7 +138,7 @@ def test_gather_tree_batch():
             found = np.where(v == end_token)[0]
             found = found[0]  # First occurrence of end_token.
             # If an end_token is found, everything before it should be a
-            # valid id and everything after it should be -1.
+            # valid id and everything after it should be end_token.
             if found > 0:
                 np.testing.assert_equal(
                     v[: found - 1] >= 0, np.ones_like(v[: found - 1], dtype=bool),