From 95db333261c99a330c61bd3d17a4ee6ff30af3fe Mon Sep 17 00:00:00 2001 From: TF-Agents Team Date: Fri, 10 Mar 2023 02:57:15 -0800 Subject: [PATCH] Fixes `FalconRewardPredictionPolicyTest` PiperOrigin-RevId: 515585162 Change-Id: I1b6485ad513fe6255d0b91e81ceba5afbc52df51 --- .../bandits/policies/falcon_reward_prediction_policy_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf_agents/bandits/policies/falcon_reward_prediction_policy_test.py b/tf_agents/bandits/policies/falcon_reward_prediction_policy_test.py index 73d240e00..90ca054cd 100644 --- a/tf_agents/bandits/policies/falcon_reward_prediction_policy_test.py +++ b/tf_agents/bandits/policies/falcon_reward_prediction_policy_test.py @@ -151,7 +151,7 @@ def split_fn(obs): time_step_spec=self._time_step_with_mask_spec, action_spec=self._action_spec, reward_network=DummyNet(self._obs_spec), - exploitation_coefficient=0.0, + exploitation_coefficient=exploitation_coefficient, num_samples_list=num_samples_list, emit_policy_info=(utils.InfoFields.LOG_PROBABILITY,), observation_and_action_constraint_splitter=split_fn)