remove print log

susanbao · susanbao · commit eb7c4733608e · 2025-09-27T20:45:36.000Z
diff --git a/src/maxdiffusion/configs/base_wan_14b.yml b/src/maxdiffusion/configs/base_wan_14b.yml
@@ -319,3 +319,4 @@ eval_every: -1
 eval_data_dir: ""
 enable_generate_video_for_eval: False # This will increase the used TPU memory.
 eval_max_number_of_samples_in_bucket: 60
+eval_max_processed_batch_size: 8 # This is the max batch size per device for eval step. If the global eval batch size is larger than this, the eval step will be run multiple times.
diff --git a/src/maxdiffusion/trainers/wan_trainer.py b/src/maxdiffusion/trainers/wan_trainer.py
@@ -212,7 +212,7 @@ def start_training(self):
 
     pipeline = self.load_checkpoint()
     # Generate a sample before training to compare against generated sample after training.
-    # pretrained_video_path = generate_sample(self.config, pipeline, filename_prefix="pre-training-")
+    pretrained_video_path = generate_sample(self.config, pipeline, filename_prefix="pre-training-")
 
     if self.config.eval_every == -1 or (not self.config.enable_generate_video_for_eval):
       # save some memory.
@@ -230,8 +230,8 @@ def start_training(self):
     # Returns pipeline with trained transformer state
     pipeline = self.training_loop(pipeline, optimizer, learning_rate_scheduler, train_data_iterator)
 
-    # posttrained_video_path = generate_sample(self.config, pipeline, filename_prefix="post-training-")
-    # print_ssim(pretrained_video_path, posttrained_video_path)
+    posttrained_video_path = generate_sample(self.config, pipeline, filename_prefix="post-training-")
+    print_ssim(pretrained_video_path, posttrained_video_path)
 
   def training_loop(self, pipeline, optimizer, learning_rate_scheduler, train_data_iterator):
     mesh = pipeline.mesh
@@ -440,11 +440,6 @@ def loss_fn(params, latents, encoder_hidden_states, timesteps):
     # Reconstruct the model from its definition and parameters
     model = nnx.merge(state.graphdef, params, state.rest_of_state)
 
-    # Prepare inputs
-    # latents = data["latents"].astype(config.weights_dtype)
-    # encoder_hidden_states = data["encoder_hidden_states"].astype(config.weights_dtype)
-    # timesteps = data["timesteps"].astype("int64")
-
     noise = jax.random.normal(key=new_rng, shape=latents.shape, dtype=latents.dtype)
     noisy_latents = scheduler.add_noise(scheduler_state, latents, noise, timesteps)
 
@@ -469,12 +464,11 @@ def loss_fn(params, latents, encoder_hidden_states, timesteps):
   # Directly compute the loss without calculating gradients.
   # The model's state.params are used but not updated.
   bs = len(data["latents"])
-  single_batch_size = min(8, config.global_batch_size_to_train_on)
+  single_batch_size = min(config.eval_max_processed_batch_size, config.global_batch_size_to_train_on)
   losses = jnp.zeros(bs)
   for i in range(0, bs, single_batch_size):
     start = i
     end = min(i + single_batch_size, bs)
-    jax.debug.print("Eval step processing samples {start} to {end}", start=start, end=end)
     latents= data["latents"][start:end, :].astype(config.weights_dtype)
     encoder_hidden_states = data["encoder_hidden_states"][start:end, :].astype(config.weights_dtype)
     timesteps = data["timesteps"][start:end].astype("int64")
@@ -483,7 +477,6 @@ def loss_fn(params, latents, encoder_hidden_states, timesteps):
 
   # Structure the metrics for logging and aggregation
   metrics = {"scalar": {"learning/eval_loss": losses}}
-  jax.debug.print("Eval step losses: {losses}", losses=losses)
 
   # Return the computed metrics and the new RNG key for the next eval step
   return metrics, new_rng