solve comment

susanbao · susanbao · commit c3f53b18a9b7 · 2025-10-03T22:01:58.000Z
diff --git a/src/maxdiffusion/trainers/wan_trainer.py b/src/maxdiffusion/trainers/wan_trainer.py
@@ -465,6 +465,7 @@ def loss_fn(params, latents, encoder_hidden_states, timesteps, rng):
     training_weight = jnp.expand_dims(scheduler.training_weight(scheduler_state, timesteps), axis=(1, 2, 3, 4))
     loss = (training_target - model_pred) ** 2
     loss = loss * training_weight
+    # Calculate the mean loss per sample across all non-batch dimensions.
     loss = loss.reshape(loss.shape[0], -1).mean(axis=1)
 
     return loss