fix bug in SAC

quantumiracle · quantumiracle · commit 97529f0f1b5f · 2020-05-18T15:25:44.000-04:00
diff --git a/examples/reinforcement_learning/tutorial_SAC.py b/examples/reinforcement_learning/tutorial_SAC.py
@@ -185,7 +185,7 @@ def evaluate(self, state, epsilon=1e-6):
         std = tf.math.exp(log_std)  # no clip in evaluation, clip affects gradients flow
 
         normal = Normal(0, 1)
-        z = normal.sample()
+        z = normal.sample(mean.shape)
         action_0 = tf.math.tanh(mean + std * z)  # TanhNormal distribution as actions; reparameterization trick
         action = self.action_range * action_0
         # according to original paper, with an extra last term for normalizing different action range
@@ -204,7 +204,7 @@ def get_action(self, state, greedy=False):
         std = tf.math.exp(log_std)
 
         normal = Normal(0, 1)
-        z = normal.sample()
+        z = normal.sample(mean.shape)
         action = self.action_range * tf.math.tanh(
             mean + std * z
         )  # TanhNormal distribution as actions; reparameterization trick