diff --git a/.github/unittest/linux/scripts/environment.yml b/.github/unittest/linux/scripts/environment.yml index 30e01cfc4b5..2dca2a6e9ad 100644 --- a/.github/unittest/linux/scripts/environment.yml +++ b/.github/unittest/linux/scripts/environment.yml @@ -24,7 +24,8 @@ dependencies: - tensorboard - imageio==2.26.0 - wandb - - dm_control + - dm_control<1.0.21 + - mujoco<3.2.1 - mlflow - av - coverage diff --git a/.github/unittest/linux/scripts/run_all.sh b/.github/unittest/linux/scripts/run_all.sh index 38235043d3f..17a53648f8c 100755 --- a/.github/unittest/linux/scripts/run_all.sh +++ b/.github/unittest/linux/scripts/run_all.sh @@ -91,7 +91,7 @@ echo "installing gymnasium" pip3 install "gymnasium" pip3 install ale_py pip3 install mo-gymnasium[mujoco] # requires here bc needs mujoco-py -pip3 install mujoco -U +pip3 install "mujoco<3.2.1" -U # sanity check: remove? python3 -c """ diff --git a/.github/unittest/linux_distributed/scripts/environment.yml b/.github/unittest/linux_distributed/scripts/environment.yml index 6d27071791b..d7eabcdea4f 100644 --- a/.github/unittest/linux_distributed/scripts/environment.yml +++ b/.github/unittest/linux_distributed/scripts/environment.yml @@ -23,7 +23,8 @@ dependencies: - tensorboard - imageio==2.26.0 - wandb - - dm_control + - dm_control<1.0.21 + - mujoco<3.2.1 - mlflow - av - coverage diff --git a/.github/unittest/linux_examples/scripts/environment.yml b/.github/unittest/linux_examples/scripts/environment.yml index 688921f826a..e99d6133963 100644 --- a/.github/unittest/linux_examples/scripts/environment.yml +++ b/.github/unittest/linux_examples/scripts/environment.yml @@ -21,7 +21,8 @@ dependencies: - scipy - hydra-core - imageio==2.26.0 - - dm_control + - dm_control<1.0.21 + - mujoco<3.2.1 - mlflow - av - coverage diff --git a/.github/unittest/linux_libs/scripts_envpool/environment.yml b/.github/unittest/linux_libs/scripts_envpool/environment.yml index 9259a2a4a43..9ff3396056b 100644 --- a/.github/unittest/linux_libs/scripts_envpool/environment.yml +++ b/.github/unittest/linux_libs/scripts_envpool/environment.yml @@ -18,5 +18,6 @@ dependencies: - expecttest - pyyaml - scipy - - dm_control + - dm_control<1.0.21 + - mujoco<3.2.1 - coverage diff --git a/.github/unittest/linux_olddeps/scripts_gym_0_13/environment.yml b/.github/unittest/linux_olddeps/scripts_gym_0_13/environment.yml index d34011e7bdc..ba8567450c9 100644 --- a/.github/unittest/linux_olddeps/scripts_gym_0_13/environment.yml +++ b/.github/unittest/linux_olddeps/scripts_gym_0_13/environment.yml @@ -22,6 +22,7 @@ dependencies: - scipy - hydra-core - dm_control -e git+https://github.com/deepmind/dm_control.git@c053360edea6170acfd9c8f65446703307d9d352#egg={dm_control} + - mujoco<3.2.1 - patchelf - pyopengl==3.1.4 - ray diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 8008c8b5bbe..f698f67763f 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -35,7 +35,7 @@ jobs: python3 setup.py develop python3 -m pip install pytest pytest-benchmark python3 -m pip install "gym[accept-rom-license,atari]" - python3 -m pip install dm_control + python3 -m pip install "dm_control<1.0.21" "mujoco<3.2.1" export TD_GET_DEFAULTS_TO_NONE=1 - name: Run benchmarks run: | @@ -97,7 +97,7 @@ jobs: python3 setup.py develop python3 -m pip install pytest pytest-benchmark python3 -m pip install "gym[accept-rom-license,atari]" - python3 -m pip install dm_control + python3 -m pip install "dm_control<1.0.21" "mujoco<3.2.1" export TD_GET_DEFAULTS_TO_NONE=1 - name: check GPU presence run: | diff --git a/.github/workflows/benchmarks_pr.yml b/.github/workflows/benchmarks_pr.yml index e994e860b9c..5bec0f23d1e 100644 --- a/.github/workflows/benchmarks_pr.yml +++ b/.github/workflows/benchmarks_pr.yml @@ -34,7 +34,7 @@ jobs: python3 setup.py develop python3 -m pip install pytest pytest-benchmark python3 -m pip install "gym[accept-rom-license,atari]" - python3 -m pip install dm_control + python3 -m pip install "dm_control<1.0.21" "mujoco<3.2.1" export TD_GET_DEFAULTS_TO_NONE=1 - name: Setup benchmarks run: | @@ -108,7 +108,7 @@ jobs: python3 setup.py develop python3 -m pip install pytest pytest-benchmark python3 -m pip install "gym[accept-rom-license,atari]" - python3 -m pip install dm_control + python3 -m pip install "dm_control<1.0.21" "mujoco<3.2.1" export TD_GET_DEFAULTS_TO_NONE=1 - name: check GPU presence run: | diff --git a/README.md b/README.md index f82a8ff0c4c..9b812a21aa0 100644 --- a/README.md +++ b/README.md @@ -478,7 +478,7 @@ And it is `functorch` and `torch.compile` compatible! policy_explore = EGreedyWrapper(policy) with set_exploration_type(ExplorationType.RANDOM): tensordict = policy_explore(tensordict) # will use eps-greedy - with set_exploration_type(ExplorationType.MODE): + with set_exploration_type(ExplorationType.DETERMINISTIC): tensordict = policy_explore(tensordict) # will not use eps-greedy ``` diff --git a/docs/requirements.txt b/docs/requirements.txt index f6138cac30a..60c94749ee7 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -14,7 +14,8 @@ docutils sphinx_design torchvision -dm_control +dm_control<1.0.21 +mujoco<3.2.1 atari-py ale-py gym[classic_control,accept-rom-license] diff --git a/docs/source/reference/modules.rst b/docs/source/reference/modules.rst index c73ed5083fd..5b05fc32194 100644 --- a/docs/source/reference/modules.rst +++ b/docs/source/reference/modules.rst @@ -319,7 +319,7 @@ Regular modules Conv3dNet SqueezeLayer Squeeze2dLayer - BatchRenorm + BatchRenorm1d Algorithm-specific modules ~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/reference/objectives.rst b/docs/source/reference/objectives.rst index 1d92c390a4e..36f0ebe36d9 100644 --- a/docs/source/reference/objectives.rst +++ b/docs/source/reference/objectives.rst @@ -157,7 +157,7 @@ CrossQ :toctree: generated/ :template: rl_template_noinherit.rst - CrossQ + CrossQLoss IQL ---- diff --git a/sota-implementations/crossq/crossq.py b/sota-implementations/crossq/crossq.py index df34d4ae68d..c5a1b88eea3 100644 --- a/sota-implementations/crossq/crossq.py +++ b/sota-implementations/crossq/crossq.py @@ -203,7 +203,7 @@ def main(cfg: "DictConfig"): # noqa: F821 # Evaluation if abs(collected_frames % eval_iter) < frames_per_batch: - with set_exploration_type(ExplorationType.MODE), torch.no_grad(): + with set_exploration_type(ExplorationType.DETERMINISTIC), torch.no_grad(): eval_start = time.time() eval_rollout = eval_env.rollout( eval_rollout_steps, diff --git a/sota-implementations/td3_bc/td3_bc.py b/sota-implementations/td3_bc/td3_bc.py index 7c43fdc1a12..b3e8ed3b880 100644 --- a/sota-implementations/td3_bc/td3_bc.py +++ b/sota-implementations/td3_bc/td3_bc.py @@ -128,7 +128,7 @@ def main(cfg: "DictConfig"): # noqa: F821 # evaluation if i % evaluation_interval == 0: - with set_exploration_type(ExplorationType.MODE), torch.no_grad(): + with set_exploration_type(ExplorationType.DETERMINISTIC), torch.no_grad(): eval_td = eval_env.rollout( max_steps=eval_steps, policy=model[0], auto_cast_to_device=True ) diff --git a/test/test_exploration.py b/test/test_exploration.py index 83ee4bc4220..b2fd97d986f 100644 --- a/test/test_exploration.py +++ b/test/test_exploration.py @@ -644,7 +644,7 @@ def test_no_spec_error(self, device): @pytest.mark.parametrize("safe", [True, False]) @pytest.mark.parametrize("device", get_default_devices()) @pytest.mark.parametrize( - "exploration_type", [InteractionType.RANDOM, InteractionType.MODE] + "exploration_type", [InteractionType.RANDOM, InteractionType.DETERMINISTIC] ) def test_gsde( state_dim, action_dim, gSDE, device, safe, exploration_type, batch=16, bound=0.1 @@ -708,7 +708,10 @@ def test_gsde( with set_exploration_type(exploration_type): action1 = module(td).get("action") action2 = actor(td.exclude("action")).get("action") - if gSDE or exploration_type == InteractionType.MODE: + if gSDE or exploration_type in ( + InteractionType.DETERMINISTIC, + InteractionType.MODE, + ): torch.testing.assert_close(action1, action2) else: with pytest.raises(AssertionError): diff --git a/test/test_tensordictmodules.py b/test/test_tensordictmodules.py index 38360a464e0..42e0880e6a4 100644 --- a/test/test_tensordictmodules.py +++ b/test/test_tensordictmodules.py @@ -189,7 +189,7 @@ def test_stateful(self, safe, spec_type, lazy): @pytest.mark.parametrize("out_keys", [["loc", "scale"], ["loc_1", "scale_1"]]) @pytest.mark.parametrize("lazy", [True, False]) @pytest.mark.parametrize( - "exp_mode", [InteractionType.MODE, InteractionType.RANDOM, None] + "exp_mode", [InteractionType.DETERMINISTIC, InteractionType.RANDOM, None] ) def test_stateful_probabilistic(self, safe, spec_type, lazy, exp_mode, out_keys): torch.manual_seed(0) diff --git a/torchrl/modules/__init__.py b/torchrl/modules/__init__.py index 0a06e5844a0..c246b553e95 100644 --- a/torchrl/modules/__init__.py +++ b/torchrl/modules/__init__.py @@ -20,6 +20,7 @@ TruncatedNormal, ) from .models import ( + BatchRenorm1d, Conv3dNet, ConvNet, DdpgCnnActor, diff --git a/torchrl/objectives/__init__.py b/torchrl/objectives/__init__.py index aa13a88c7e9..55e2160770b 100644 --- a/torchrl/objectives/__init__.py +++ b/torchrl/objectives/__init__.py @@ -29,5 +29,3 @@ SoftUpdate, ValueEstimators, ) - -# from .value import bellman_max, c_val, dv_val, vtrace, GAE, TDLambdaEstimate, TDEstimate diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py index e9f2085d3df..2da1967e5ad 100644 --- a/tutorials/sphinx-tutorials/coding_dqn.py +++ b/tutorials/sphinx-tutorials/coding_dqn.py @@ -672,7 +672,7 @@ def get_loss_module(actor, gamma): frame_skip=1, policy_exploration=actor_explore, environment=test_env, - exploration_type=ExplorationType.MODE, + exploration_type=ExplorationType.DETERMINISTIC, log_keys=[("next", "reward")], out_keys={("next", "reward"): "rewards"}, log_pbar=True, diff --git a/tutorials/sphinx-tutorials/dqn_with_rnn.py b/tutorials/sphinx-tutorials/dqn_with_rnn.py index 28a9638c6f6..8931f483384 100644 --- a/tutorials/sphinx-tutorials/dqn_with_rnn.py +++ b/tutorials/sphinx-tutorials/dqn_with_rnn.py @@ -440,7 +440,7 @@ exploration_module.step(data.numel()) updater.step() - with set_exploration_type(ExplorationType.MODE), torch.no_grad(): + with set_exploration_type(ExplorationType.DETERMINISTIC), torch.no_grad(): rollout = env.rollout(10000, stoch_policy) traj_lens.append(rollout.get(("next", "step_count")).max().item()) diff --git a/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py b/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py index 77574b765e7..fc1a22d50cf 100644 --- a/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py +++ b/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py @@ -817,7 +817,7 @@ def process_batch(batch: TensorDictBase) -> TensorDictBase: target_updaters[group].step() # Exploration sigma anneal update - exploration_policies[group].step(current_frames) + exploration_policies[group][-1].step(current_frames) # Stop training a certain group when a condition is met (e.g., number of training iterations) if iteration == iteration_when_stop_training_evaders: @@ -903,7 +903,7 @@ def process_batch(batch: TensorDictBase) -> TensorDictBase: env_with_render = env_with_render.append_transform( VideoRecorder(logger=video_logger, tag="vmas_rendered") ) - with set_exploration_type(ExplorationType.MODE): + with set_exploration_type(ExplorationType.DETERMINISTIC): print("Rendering rollout...") env_with_render.rollout(100, policy=agents_exploration_policy) print("Saving the video...") diff --git a/tutorials/sphinx-tutorials/torchrl_demo.py b/tutorials/sphinx-tutorials/torchrl_demo.py index 9d25da0a4cd..29192d1c10e 100644 --- a/tutorials/sphinx-tutorials/torchrl_demo.py +++ b/tutorials/sphinx-tutorials/torchrl_demo.py @@ -652,7 +652,7 @@ def exec_sequence(params, data): td_module(td) print("random:", td["action"]) -with set_exploration_type(ExplorationType.MODE): +with set_exploration_type(ExplorationType.DETERMINISTIC): td_module(td) print("mode:", td["action"])