solve comment

susanbao · susanbao · commit e6f495e4e896 · 2025-10-03T21:56:35.000Z
diff --git a/src/maxdiffusion/configs/base_wan_14b.yml b/src/maxdiffusion/configs/base_wan_14b.yml
@@ -318,6 +318,6 @@ quantization_calibration_method: "absmax"
 eval_every: -1
 eval_data_dir: ""
 enable_generate_video_for_eval: False # This will increase the used TPU memory.
-eval_max_number_of_samples_in_bucket: 60
+eval_max_number_of_samples_in_bucket: 60 # The number of samples per bucket for evaluation. This is calculated by num_eval_samples / len(considered_timesteps_list).
 
 enable_ssim: True
diff --git a/src/maxdiffusion/trainers/wan_trainer.py b/src/maxdiffusion/trainers/wan_trainer.py
@@ -472,6 +472,7 @@ def loss_fn(params, latents, encoder_hidden_states, timesteps, rng):
   # --- Key Difference from train_step ---
   # Directly compute the loss without calculating gradients.
   # The model's state.params are used but not updated.
+  # TODO(coolkp): Explore optimizing the creation of PRNGs in a vmap or statically outside of the loop
   bs = len(data["latents"])
   single_batch_size = config.global_batch_size_to_train_on
   losses = jnp.zeros(bs)