pytorch · vmoens · Aug 6, 2024 · Aug 6, 2024 · Aug 6, 2024 · Aug 6, 2024
diff --git a/.github/unittest/linux/scripts/environment.yml b/.github/unittest/linux/scripts/environment.yml
@@ -24,7 +24,8 @@ dependencies:
     - tensorboard
     - imageio==2.26.0
     - wandb
-    - dm_control
+    - dm_control<1.0.21
+    - mujoco<3.2.1
     - mlflow
     - av
     - coverage

diff --git a/.github/unittest/linux/scripts/run_all.sh b/.github/unittest/linux/scripts/run_all.sh
@@ -91,7 +91,7 @@ echo "installing gymnasium"
 pip3 install "gymnasium"
 pip3 install ale_py
 pip3 install mo-gymnasium[mujoco]  # requires here bc needs mujoco-py
-pip3 install mujoco -U
+pip3 install "mujoco<3.2.1" -U
 
 # sanity check: remove?
 python3 -c """

diff --git a/.github/unittest/linux_distributed/scripts/environment.yml b/.github/unittest/linux_distributed/scripts/environment.yml
@@ -23,7 +23,8 @@ dependencies:
     - tensorboard
     - imageio==2.26.0
     - wandb
-    - dm_control
+    - dm_control<1.0.21
+    - mujoco<3.2.1
     - mlflow
     - av
     - coverage

diff --git a/.github/unittest/linux_examples/scripts/environment.yml b/.github/unittest/linux_examples/scripts/environment.yml
@@ -21,7 +21,8 @@ dependencies:
     - scipy
     - hydra-core
     - imageio==2.26.0
-    - dm_control
+    - dm_control<1.0.21
+    - mujoco<3.2.1
     - mlflow
     - av
     - coverage

diff --git a/.github/unittest/linux_libs/scripts_envpool/environment.yml b/.github/unittest/linux_libs/scripts_envpool/environment.yml
@@ -18,5 +18,6 @@ dependencies:
     - expecttest
     - pyyaml
     - scipy
-    - dm_control
+    - dm_control<1.0.21
+    - mujoco<3.2.1
     - coverage
diff --git a/.github/unittest/linux_olddeps/scripts_gym_0_13/environment.yml b/.github/unittest/linux_olddeps/scripts_gym_0_13/environment.yml
@@ -22,6 +22,7 @@ dependencies:
     - scipy
     - hydra-core
     - dm_control -e git+https://github.com/deepmind/dm_control.git@c053360edea6170acfd9c8f65446703307d9d352#egg={dm_control}
+    - mujoco<3.2.1
     - patchelf
     - pyopengl==3.1.4
     - ray

diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
@@ -35,7 +35,7 @@ jobs:
           python3 setup.py develop
           python3 -m pip install pytest pytest-benchmark
           python3 -m pip install "gym[accept-rom-license,atari]"
-          python3 -m pip install dm_control
+          python3 -m pip install "dm_control<1.0.21" "mujoco<3.2.1"
           export TD_GET_DEFAULTS_TO_NONE=1
       - name: Run benchmarks
         run: |
@@ -97,7 +97,7 @@ jobs:
           python3 setup.py develop
           python3 -m pip install pytest pytest-benchmark
           python3 -m pip install "gym[accept-rom-license,atari]"
-          python3 -m pip install dm_control
+          python3 -m pip install "dm_control<1.0.21" "mujoco<3.2.1"
           export TD_GET_DEFAULTS_TO_NONE=1
       - name: check GPU presence
         run: |

diff --git a/.github/workflows/benchmarks_pr.yml b/.github/workflows/benchmarks_pr.yml
@@ -34,7 +34,7 @@ jobs:
           python3 setup.py develop
           python3 -m pip install pytest pytest-benchmark
           python3 -m pip install "gym[accept-rom-license,atari]"
-          python3 -m pip install dm_control
+          python3 -m pip install "dm_control<1.0.21" "mujoco<3.2.1"
           export TD_GET_DEFAULTS_TO_NONE=1
       - name: Setup benchmarks
         run: |
@@ -108,7 +108,7 @@ jobs:
           python3 setup.py develop
           python3 -m pip install pytest pytest-benchmark
           python3 -m pip install "gym[accept-rom-license,atari]"
-          python3 -m pip install dm_control
+          python3 -m pip install "dm_control<1.0.21" "mujoco<3.2.1"
           export TD_GET_DEFAULTS_TO_NONE=1
       - name: check GPU presence
         run: |

diff --git a/README.md b/README.md
@@ -478,7 +478,7 @@ And it is `functorch` and `torch.compile` compatible!
   policy_explore = EGreedyWrapper(policy)
   with set_exploration_type(ExplorationType.RANDOM):
       tensordict = policy_explore(tensordict)  # will use eps-greedy
-  with set_exploration_type(ExplorationType.MODE):
+  with set_exploration_type(ExplorationType.DETERMINISTIC):
       tensordict = policy_explore(tensordict)  # will not use eps-greedy
   ```
   </details>

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -14,7 +14,8 @@ docutils
 sphinx_design
 
 torchvision
-dm_control
+dm_control<1.0.21
+mujoco<3.2.1
 atari-py
 ale-py
 gym[classic_control,accept-rom-license]

diff --git a/docs/source/reference/modules.rst b/docs/source/reference/modules.rst
@@ -319,7 +319,7 @@ Regular modules
     Conv3dNet
     SqueezeLayer
     Squeeze2dLayer
-    BatchRenorm
+    BatchRenorm1d
 
 Algorithm-specific modules
 ~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/docs/source/reference/objectives.rst b/docs/source/reference/objectives.rst
@@ -157,7 +157,7 @@ CrossQ
     :toctree: generated/
     :template: rl_template_noinherit.rst
 
-    CrossQ
+    CrossQLoss
 
 IQL
 ----

diff --git a/sota-implementations/crossq/crossq.py b/sota-implementations/crossq/crossq.py
@@ -203,7 +203,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
 
         # Evaluation
         if abs(collected_frames % eval_iter) < frames_per_batch:
-            with set_exploration_type(ExplorationType.MODE), torch.no_grad():
+            with set_exploration_type(ExplorationType.DETERMINISTIC), torch.no_grad():
                 eval_start = time.time()
                 eval_rollout = eval_env.rollout(
                     eval_rollout_steps,

diff --git a/sota-implementations/td3_bc/td3_bc.py b/sota-implementations/td3_bc/td3_bc.py
@@ -128,7 +128,7 @@ def main(cfg: "DictConfig"):  # noqa: F821
 
         # evaluation
         if i % evaluation_interval == 0:
-            with set_exploration_type(ExplorationType.MODE), torch.no_grad():
+            with set_exploration_type(ExplorationType.DETERMINISTIC), torch.no_grad():
                 eval_td = eval_env.rollout(
                     max_steps=eval_steps, policy=model[0], auto_cast_to_device=True
                 )

diff --git a/test/test_exploration.py b/test/test_exploration.py
@@ -644,7 +644,7 @@ def test_no_spec_error(self, device):
 @pytest.mark.parametrize("safe", [True, False])
 @pytest.mark.parametrize("device", get_default_devices())
 @pytest.mark.parametrize(
-    "exploration_type", [InteractionType.RANDOM, InteractionType.MODE]
+    "exploration_type", [InteractionType.RANDOM, InteractionType.DETERMINISTIC]
 )
 def test_gsde(
     state_dim, action_dim, gSDE, device, safe, exploration_type, batch=16, bound=0.1
@@ -708,7 +708,10 @@ def test_gsde(
         with set_exploration_type(exploration_type):
             action1 = module(td).get("action")
         action2 = actor(td.exclude("action")).get("action")
-        if gSDE or exploration_type == InteractionType.MODE:
+        if gSDE or exploration_type in (
+            InteractionType.DETERMINISTIC,
+            InteractionType.MODE,
+        ):
             torch.testing.assert_close(action1, action2)
         else:
             with pytest.raises(AssertionError):

diff --git a/test/test_tensordictmodules.py b/test/test_tensordictmodules.py
@@ -189,7 +189,7 @@ def test_stateful(self, safe, spec_type, lazy):
     @pytest.mark.parametrize("out_keys", [["loc", "scale"], ["loc_1", "scale_1"]])
     @pytest.mark.parametrize("lazy", [True, False])
     @pytest.mark.parametrize(
-        "exp_mode", [InteractionType.MODE, InteractionType.RANDOM, None]
+        "exp_mode", [InteractionType.DETERMINISTIC, InteractionType.RANDOM, None]
     )
     def test_stateful_probabilistic(self, safe, spec_type, lazy, exp_mode, out_keys):
         torch.manual_seed(0)

diff --git a/torchrl/modules/__init__.py b/torchrl/modules/__init__.py
@@ -20,6 +20,7 @@
     TruncatedNormal,
 )
 from .models import (
+    BatchRenorm1d,
     Conv3dNet,
     ConvNet,
     DdpgCnnActor,

diff --git a/torchrl/objectives/__init__.py b/torchrl/objectives/__init__.py
@@ -29,5 +29,3 @@
     SoftUpdate,
     ValueEstimators,
 )
-
-# from .value import bellman_max, c_val, dv_val, vtrace, GAE, TDLambdaEstimate, TDEstimate
diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -672,7 +672,7 @@ def get_loss_module(actor, gamma):
     frame_skip=1,
     policy_exploration=actor_explore,
     environment=test_env,
-    exploration_type=ExplorationType.MODE,
+    exploration_type=ExplorationType.DETERMINISTIC,
     log_keys=[("next", "reward")],
     out_keys={("next", "reward"): "rewards"},
     log_pbar=True,

diff --git a/tutorials/sphinx-tutorials/dqn_with_rnn.py b/tutorials/sphinx-tutorials/dqn_with_rnn.py
@@ -440,7 +440,7 @@
     exploration_module.step(data.numel())
     updater.step()
 
-    with set_exploration_type(ExplorationType.MODE), torch.no_grad():
+    with set_exploration_type(ExplorationType.DETERMINISTIC), torch.no_grad():
         rollout = env.rollout(10000, stoch_policy)
         traj_lens.append(rollout.get(("next", "step_count")).max().item())
 

diff --git a/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py b/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py
@@ -817,7 +817,7 @@ def process_batch(batch: TensorDictBase) -> TensorDictBase:
             target_updaters[group].step()
 
         # Exploration sigma anneal update
-        exploration_policies[group].step(current_frames)
+        exploration_policies[group][-1].step(current_frames)
 
     # Stop training a certain group when a condition is met (e.g., number of training iterations)
     if iteration == iteration_when_stop_training_evaders:
@@ -903,7 +903,7 @@ def process_batch(batch: TensorDictBase) -> TensorDictBase:
         env_with_render = env_with_render.append_transform(
             VideoRecorder(logger=video_logger, tag="vmas_rendered")
         )
-        with set_exploration_type(ExplorationType.MODE):
+        with set_exploration_type(ExplorationType.DETERMINISTIC):
             print("Rendering rollout...")
             env_with_render.rollout(100, policy=agents_exploration_policy)
         print("Saving the video...")

diff --git a/tutorials/sphinx-tutorials/torchrl_demo.py b/tutorials/sphinx-tutorials/torchrl_demo.py
@@ -652,7 +652,7 @@ def exec_sequence(params, data):
     td_module(td)
     print("random:", td["action"])
 
-with set_exploration_type(ExplorationType.MODE):
+with set_exploration_type(ExplorationType.DETERMINISTIC):
     td_module(td)
     print("mode:", td["action"])