Merge pull request #774 from philstahlfeld:feature/batched-observer-u…

…nbatching PiperOrigin-RevId: 477196444 Change-Id: Ib8d322753a787fb3c9b0de2a73c758dd56d5ad66
tensorflow · Sep 27, 2022 · eef5c67 · eef5c67
2 parents d74adbb + c8037a5
commit eef5c67
Show file tree

Hide file tree

Showing 2 changed files with 209 additions and 0 deletions.
diff --git a/tf_agents/utils/batched_observer_unbatching.py b/tf_agents/utils/batched_observer_unbatching.py
@@ -0,0 +1,66 @@
+# coding=utf-8
+# Copyright 2020 The TF-Agents Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding=utf-8
+# Copyright 2022 The TF-Agents Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Adapter for using unbatched observers in batched contexts."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from typing import Callable
+
+from tf_agents.trajectories import trajectory as trajectory_lib
+from tf_agents.utils import nest_utils
+
+
+class BatchedObserverUnbatching(object):
+  """Creates an unbatching observer.
+
+  Creates an observer that takes batched trajectories, unbatches them, and
+  delegates them to multiple observers.
+
+  The unbatched trajectories are delegated to observers that don't support
+  batch dimensions (e.g. ReverbAddEpisodeObserver).
+
+  Note that the batch size is assumed to be fixed and it is not validated.
+  """
+
+  def __init__(self, create_delegated_observer_fn: Callable[[], Callable[
+      [trajectory_lib.Trajectory], None]], batch_size: int):
+    self._delegated_observers = [
+        create_delegated_observer_fn() for _ in range(batch_size)
+    ]
+
+  def __call__(self, batched_trajectory: trajectory_lib.Trajectory):
+    unbatched_trajectories = nest_utils.unstack_nested_arrays(
+        batched_trajectory)
+    for obs, traj in zip(self._delegated_observers, unbatched_trajectories):
+      # The for loop can be optimized by parallelizing running delegated
+      # observers in the future.
+      obs(traj)
diff --git a/tf_agents/utils/batched_observer_unbatching_test.py b/tf_agents/utils/batched_observer_unbatching_test.py
@@ -0,0 +1,143 @@
+# coding=utf-8
+# Copyright 2020 The TF-Agents Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# coding=utf-8
+# Copyright 2022 The TF-Agents Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for tf_agents.utils.batched_observer_unbatching."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import reverb
+import tensorflow as tf  # pylint: disable=g-explicit-tensorflow-version-import
+
+from tf_agents.drivers import py_driver
+from tf_agents.environments import parallel_py_environment
+from tf_agents.environments import suite_gym
+from tf_agents.policies import random_py_policy
+from tf_agents.replay_buffers import reverb_utils
+from tf_agents.specs import tensor_spec
+from tf_agents.trajectories import trajectory as trajectory_lib
+from tf_agents.utils import batched_observer_unbatching
+
+
+class BatchedObserverUnbatchingTest(tf.test.TestCase):
+
+  def test_call(self):
+    trajectories = []
+
+    def observer(traj):
+      trajectories.append(traj)
+
+    def observer_fn():
+      return observer
+
+    unbatcher = batched_observer_unbatching.BatchedObserverUnbatching(
+        observer_fn, batch_size=2)
+
+    trajectory = trajectory_lib.Trajectory(
+        action=tf.constant([0, 1]),
+        discount=tf.constant([0, 0]),
+        next_step_type=tf.constant([1, 2]),
+        observation={
+            "a": tf.constant([24, 42]),
+            "b": tf.constant([100, 200]),
+        },
+        policy_info=tf.constant([500, 1000]),
+        reward=tf.constant([25, 50]),
+        step_type=tf.constant([13, 26]),
+    )
+    unbatcher(trajectory)
+
+    self.assertEqual(
+        trajectories,
+        [
+            trajectory_lib.Trajectory(
+                action=tf.constant([0]),
+                discount=tf.constant([0]),
+                next_step_type=tf.constant([1]),
+                observation={
+                    "a": tf.constant([24]),
+                    "b": tf.constant([100]),
+                },
+                policy_info=tf.constant([500]),
+                reward=tf.constant([25]),
+                step_type=tf.constant([13]),
+            ),
+            trajectory_lib.Trajectory(
+                action=tf.constant([1]),
+                discount=tf.constant([0]),
+                next_step_type=tf.constant([2]),
+                observation={
+                    "a": tf.constant([42]),
+                    "b": tf.constant([200]),
+                },
+                policy_info=tf.constant([1000]),
+                reward=tf.constant([50]),
+                step_type=tf.constant([26]),
+            ),
+        ],
+    )
+
+  def test_reverb_integration(self):
+    num_envs = 3
+    env = parallel_py_environment.ParallelPyEnvironment(
+        [lambda: suite_gym.load("CartPole-v0")] * num_envs)
+
+    policy = random_py_policy.RandomPyPolicy(env.time_step_spec(),
+                                             env.action_spec())
+
+    replay_buffer_signature = tensor_spec.from_spec(policy.collect_data_spec)
+    replay_buffer_signature = tensor_spec.add_outer_dim(replay_buffer_signature)
+    table = reverb.Table(
+        "experience",
+        max_size=100,
+        sampler=reverb.selectors.Uniform(),
+        remover=reverb.selectors.Fifo(),
+        rate_limiter=reverb.rate_limiters.MinSize(1),
+        signature=replay_buffer_signature,
+    )
+    reverb_server = reverb.Server([table])
+
+    def create_add_episode_observer():
+      return reverb_utils.ReverbAddEpisodeObserver(
+          reverb_server.localhost_client(),
+          table_name="experience",
+          max_sequence_length=200,
+      )
+
+    rb_observer = batched_observer_unbatching.BatchedObserverUnbatching(
+        create_add_episode_observer, batch_size=num_envs)
+
+    driver = py_driver.PyDriver(
+        env, policy, observers=[rb_observer], max_episodes=30)
+    driver.run(env.reset())
+
+
+if __name__ == "__main__":
+  tf.test.main()