init

pytorch · Jul 24, 2024 · 65ac139 · 65ac139
1 parent 1ca33a2
commit 65ac139
Show file tree

Hide file tree

Showing 9 changed files with 25 additions and 25 deletions.
diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -11,7 +11,7 @@
 # Overview
 # --------
 #
-# TorchRL separates the training of RL sota-implementations in various pieces that will be
+# TorchRL separates the training of RL algorithms in various pieces that will be
 # assembled in your training script: the environment, the data collection and
 # storage, the model and finally the loss function.
 #
@@ -167,7 +167,7 @@
 # the losses without it. However, we encourage its usage for the following
 # reason.
 #
-# The reason TorchRL does this is that RL sota-implementations often execute the same
+# The reason TorchRL does this is that RL algorithms often execute the same
 # model with different sets of parameters, called "trainable" and "target"
 # parameters.
 # The "trainable" parameters are those that the optimizer needs to fit. The
@@ -272,7 +272,7 @@ def make_value_estimator(self, value_type: ValueEstimators, **hyperparams):
 
 
 ###############################################################################
-# The ``make_value_estimator`` method can but does not need to be called: ifgg
+# The ``make_value_estimator`` method can but does not need to be called: if
 # not, the :class:`~torchrl.objectives.LossModule` will query this method with
 # its default estimator.
 #
@@ -406,7 +406,7 @@ class DDPGLoss(LossModule):
 # Environment
 # -----------
 #
-# In most sota-implementations, the first thing that needs to be taken care of is the
+# In most algorithms, the first thing that needs to be taken care of is the
 # construction of the environment as it conditions the remainder of the
 # training script.
 #
@@ -1061,7 +1061,7 @@ def ceil_div(x, y):
 # Target network updater
 # ~~~~~~~~~~~~~~~~~~~~~~
 #
-# Target networks are a crucial part of off-policy RL sota-implementations.
+# Target networks are a crucial part of off-policy RL algorithms.
 # Updating the target network parameters is made easy thanks to the
 # :class:`~torchrl.objectives.HardUpdate` and :class:`~torchrl.objectives.SoftUpdate`
 # classes. They're built with the loss module as argument, and the update is

diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -42,7 +42,7 @@
 #   estimated return;
 # - how to collect data from your environment efficiently and store them
 #   in a replay buffer;
-# - how to use multi-step, a simple preprocessing step for off-policy sota-implementations;
+# - how to use multi-step, a simple preprocessing step for off-policy algorithms;
 # - and finally how to evaluate your model.
 #
 # **Prerequisites**: We encourage you to get familiar with torchrl through the
@@ -365,7 +365,7 @@ def make_model(dummy_env):
 # Replay buffers
 # ~~~~~~~~~~~~~~
 #
-# Replay buffers play a central role in off-policy RL sota-implementations such as DQN.
+# Replay buffers play a central role in off-policy RL algorithms such as DQN.
 # They constitute the dataset we will be sampling from during training.
 #
 # Here, we will use a regular sampling strategy, although a prioritized RB
@@ -471,13 +471,13 @@ def get_collector(
 # Target parameters
 # ~~~~~~~~~~~~~~~~~
 #
-# Many off-policy RL sota-implementations use the concept of "target parameters" when it
+# Many off-policy RL algorithms use the concept of "target parameters" when it
 # comes to estimate the value of the next state or state-action pair.
 # The target parameters are lagged copies of the model parameters. Because
 # their predictions mismatch those of the current model configuration, they
 # help learning by putting a pessimistic bound on the value being estimated.
 # This is a powerful trick (known as "Double Q-Learning") that is ubiquitous
-# in similar sota-implementations.
+# in similar algorithms.
 #
 
 

diff --git a/tutorials/sphinx-tutorials/coding_ppo.py b/tutorials/sphinx-tutorials/coding_ppo.py
@@ -518,7 +518,7 @@
 # Replay buffer
 # -------------
 #
-# Replay buffers are a common building piece of off-policy RL sota-implementations.
+# Replay buffers are a common building piece of off-policy RL algorithms.
 # In on-policy contexts, a replay buffer is refilled every time a batch of
 # data is collected, and its data is repeatedly consumed for a certain number
 # of epochs.

diff --git a/tutorials/sphinx-tutorials/getting-started-1.py b/tutorials/sphinx-tutorials/getting-started-1.py
@@ -117,7 +117,7 @@
 # Probabilistic policies
 # ----------------------
 #
-# Policy-optimization sota-implementations like
+# Policy-optimization algorithms like
 # `PPO <https://arxiv.org/abs/1707.06347>`_ require the policy to be
 # stochastic: unlike in the examples above, the module now encodes a map from
 # the observation space to a parameter space encoding a distribution over the
@@ -161,7 +161,7 @@
 #
 # - Since we asked for it during the construction of the actor, the
 #   log-probability of the actions given the distribution at that time is
-#   also written. This is necessary for sota-implementations like PPO.
+#   also written. This is necessary for algorithms like PPO.
 # - The parameters of the distribution are returned within the output
 #   tensordict too under the ``"loc"`` and ``"scale"`` entries.
 #

diff --git a/tutorials/sphinx-tutorials/getting-started-2.py b/tutorials/sphinx-tutorials/getting-started-2.py
@@ -39,9 +39,9 @@
 # ----------------------
 #
 # In RL, innovation typically involves the exploration of novel methods
-# for optimizing a policy (i.e., new sota-implementations), rather than focusing
+# for optimizing a policy (i.e., new algorithms), rather than focusing
 # on new architectures, as seen in other domains. Within TorchRL,
-# these sota-implementations are encapsulated within loss modules. A loss
+# these algorithms are encapsulated within loss modules. A loss
 # module orchestrates the various components of your algorithm and
 # yields a set of loss values that can be backpropagated
 # through to train the corresponding components.
@@ -145,7 +145,7 @@
 # -----------------------------------------
 #
 # Another important aspect to consider is the presence of target parameters
-# in off-policy sota-implementations like DDPG. Target parameters typically represent
+# in off-policy algorithms like DDPG. Target parameters typically represent
 # a delayed or smoothed version of the parameters over time, and they play
 # a crucial role in value estimation during policy training. Utilizing target
 # parameters for policy training often proves to be significantly more

diff --git a/tutorials/sphinx-tutorials/getting-started-3.py b/tutorials/sphinx-tutorials/getting-started-3.py
@@ -29,7 +29,7 @@
 # dataloaders are referred to as ``DataCollectors``. Most of the time,
 # data collection does not stop at the collection of raw data,
 # as the data needs to be stored temporarily in a buffer
-# (or equivalent structure for on-policy sota-implementations) before being consumed
+# (or equivalent structure for on-policy algorithms) before being consumed
 # by the :ref:`loss module <gs_optim>`. This tutorial will explore
 # these two classes.
 #
@@ -93,7 +93,7 @@
 
 #################################
 # Data collectors are very useful when it comes to coding state-of-the-art
-# sota-implementations, as performance is usually measured by the capability of a
+# algorithms, as performance is usually measured by the capability of a
 # specific technique to solve a problem in a given number of interactions with
 # the environment (the ``total_frames`` argument in the collector).
 # For this reason, most training loops in our examples look like this:

diff --git a/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py b/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py
@@ -651,7 +651,7 @@
 # Replay buffer
 # -------------
 #
-# Replay buffers are a common building piece of off-policy RL sota-implementations.
+# Replay buffers are a common building piece of off-policy RL algorithms.
 # There are many types of buffers, in this tutorial we use a basic buffer to store and sample tensordict
 # data randomly.
 #
@@ -925,7 +925,7 @@ def process_batch(batch: TensorDictBase) -> TensorDictBase:
 #
 # Now that you are proficient with multi-agent DDPG, you can check out all the TorchRL multi-agent implementations in the
 # GitHub repository.
-# These are code-only scripts of many MARL sota-implementations such as the ones seen in this tutorial,
+# These are code-only scripts of many MARL algorithms such as the ones seen in this tutorial,
 # QMIX, MADDPG, IQL, and many more!
 #
 # Also do remember to check out our tutorial: :doc:`/tutorials/multiagent_ppo`.

diff --git a/tutorials/sphinx-tutorials/multiagent_ppo.py b/tutorials/sphinx-tutorials/multiagent_ppo.py
@@ -55,7 +55,7 @@
 # the foundational policy-optimization algorithm. For more information, see the
 # `Proximal Policy Optimization Algorithms <https://arxiv.org/abs/1707.06347>`_ paper.
 #
-# This type of sota-implementations is usually trained *on-policy*. This means that, at every learning iteration, we have a
+# This type of algorithms is usually trained *on-policy*. This means that, at every learning iteration, we have a
 # **sampling** and a **training** phase. In the **sampling** phase of iteration :math:`t`, rollouts are collected
 # form agents' interactions in the environment using the current policies :math:`\mathbf{\pi}_t`.
 # In the **training** phase, all the collected rollouts are immediately fed to the training process to perform
@@ -551,7 +551,7 @@
 # Replay buffer
 # -------------
 #
-# Replay buffers are a common building piece of off-policy RL sota-implementations.
+# Replay buffers are a common building piece of off-policy RL algorithms.
 # In on-policy contexts, a replay buffer is refilled every time a batch of
 # data is collected, and its data is repeatedly consumed for a certain number
 # of epochs.
@@ -780,7 +780,7 @@
 #
 # Now that you are proficient with multi-agent DDPG, you can check out all the TorchRL multi-agent implementations in the
 # GitHub repository.
-# These are code-only scripts of many popular MARL sota-implementations such as the ones seen in this tutorial,
+# These are code-only scripts of many popular MARL algorithms such as the ones seen in this tutorial,
 # QMIX, MADDPG, IQL, and many more!
 #
 # You can also check out our other multi-agent tutorial on how to train competitive

diff --git a/tutorials/sphinx-tutorials/torchrl_demo.py b/tutorials/sphinx-tutorials/torchrl_demo.py
@@ -162,13 +162,13 @@
 #       │   └── "trainers.py"
 #       └── "version.py"
 #
-# Unlike other domains, RL is less about media than *sota-implementations*. As such, it
+# Unlike other domains, RL is less about media than *algorithms*. As such, it
 # is harder to make truly independent components.
 #
 # What TorchRL is not:
 #
-# * a collection of sota-implementations: we do not intend to provide SOTA implementations of RL sota-implementations,
-#   but we provide these sota-implementations only as examples of how to use the library.
+# * a collection of algorithms: we do not intend to provide SOTA implementations of RL algorithms,
+#   but we provide these algorithms only as examples of how to use the library.
 #
 # * a research framework: modularity in TorchRL comes in two flavours. First, we try
 #   to build re-usable components, such that they can be easily swapped with each other.