Skip to content

Commit 97529f0

Browse files
committed
fix bug in SAC
1 parent b545a1a commit 97529f0

1 file changed

Lines changed: 2 additions & 2 deletions

File tree

examples/reinforcement_learning/tutorial_SAC.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ def evaluate(self, state, epsilon=1e-6):
185185
std = tf.math.exp(log_std) # no clip in evaluation, clip affects gradients flow
186186

187187
normal = Normal(0, 1)
188-
z = normal.sample()
188+
z = normal.sample(mean.shape)
189189
action_0 = tf.math.tanh(mean + std * z) # TanhNormal distribution as actions; reparameterization trick
190190
action = self.action_range * action_0
191191
# according to original paper, with an extra last term for normalizing different action range
@@ -204,7 +204,7 @@ def get_action(self, state, greedy=False):
204204
std = tf.math.exp(log_std)
205205

206206
normal = Normal(0, 1)
207-
z = normal.sample()
207+
z = normal.sample(mean.shape)
208208
action = self.action_range * tf.math.tanh(
209209
mean + std * z
210210
) # TanhNormal distribution as actions; reparameterization trick

0 commit comments

Comments
 (0)