sony · sbsekiguchi · Jul 21, 2023 · Jul 14, 2023
diff --git a/nnabla_rl/algorithms/a2c.py b/nnabla_rl/algorithms/a2c.py
@@ -523,7 +523,7 @@ def array_and_dtype(mp_arrays_item):
     def _compute_action(self, s, *, begin_of_episode=False):
         action, info = self._exploration_actor(s, begin_of_episode=begin_of_episode)
         if self._env_info.is_discrete_action_env():
-            return np.int(action), info
+            return np.int32(action), info
         else:
             return action, info
 

diff --git a/nnabla_rl/algorithms/ppo.py b/nnabla_rl/algorithms/ppo.py
@@ -638,7 +638,7 @@ def _compute_action(self, s, *, begin_of_episode=False):
         info = {}
         info['log_prob'] = log_prob
         if self._env_info.is_discrete_action_env():
-            return np.int(action), info
+            return np.int32(action), info
         else:
             return action, info
 

diff --git a/nnabla_rl/distributions/bernoulli.py b/nnabla_rl/distributions/bernoulli.py
@@ -41,7 +41,7 @@ def __init__(self, z):
         self._distribution = NF.concatenate(self._p, 1 - self._p)
         self._log_distribution = NF.concatenate(self._log_p, self._log_1_minus_p)
 
-        labels = np.array([1, 0], dtype=np.int)
+        labels = np.array([1, 0], dtype=np.int32)
         labels = nn.Variable.from_numpy_array(labels)
         self._labels = labels
         for size in reversed(z.shape[0:-1]):

diff --git a/nnabla_rl/distributions/softmax.py b/nnabla_rl/distributions/softmax.py
@@ -42,7 +42,7 @@ def __init__(self, z):
         self._num_class = z.shape[-1]
 
         labels = np.array(
-            [label for label in range(self._num_class)], dtype=np.int)
+            [label for label in range(self._num_class)], dtype=np.int32)
         self._labels = nn.Variable.from_numpy_array(labels)
         self._actions = self._labels
         for size in reversed(z.shape[0:-1]):

diff --git a/tests/algorithms/test_common_utils.py b/tests/algorithms/test_common_utils.py
@@ -1,5 +1,5 @@
 # Copyright 2020,2021 Sony Corporation.
-# Copyright 2021,2022 Sony Group Corporation.
+# Copyright 2021,2022,2023 Sony Group Corporation.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -55,7 +55,7 @@ def _collect_dummy_experience(self, num_episodes=1, episode_length=3, tupled_sta
                 r = np.ones(1, )
                 non_terminal = np.ones(1, )
                 if i == episode_length-1:
-                    non_terminal = 0
+                    non_terminal = np.zeros(1, )
                 experience.append((s_current, a, r, non_terminal, s_next))
         return experience