AI-Hypercomputer
diff --git a/‎src/maxdiffusion/generate_wan_animate.py‎
Lines changed: 40 additions & 43 deletions b/‎src/maxdiffusion/generate_wan_animate.py‎
Lines changed: 40 additions & 43 deletions
@@ -1,27 +1,31 @@
 # Copyright 2026 Google LLC
 # Licensed under the Apache License, Version 2.0 (the "License");
 
-import jax
+"""Wan Animate inference entrypoint."""
+
 import os
 import time
+
 from absl import app
-from maxdiffusion import pyconfig, max_logging, max_utils
-from maxdiffusion.train_utils import transformer_engine_context
-from maxdiffusion.utils import export_to_video
-from maxdiffusion.utils.loading_utils import load_image, load_video
 import flax
-from maxdiffusion.pipelines.wan.wan_pipeline_animate import WanAnimatePipeline
+import jax
 import numpy as np
 from PIL import Image
 
+from maxdiffusion import max_logging, max_utils, pyconfig
+from maxdiffusion.pipelines.wan.wan_pipeline_animate import WanAnimatePipeline
+from maxdiffusion.train_utils import transformer_engine_context
+from maxdiffusion.utils import export_to_video
+from maxdiffusion.utils.loading_utils import load_image, load_video
+
 jax.config.update("jax_use_shardy_partitioner", True)
 
 
 def _get_animate_inference_settings(config):
   """Resolve animate-specific inference settings with upstream defaults."""
   return {
       "segment_frame_length": getattr(config, "segment_frame_length", 77),
-      "prev_segment_conditioning_frames": getattr(config, "prev_segment_conditioning_frames", 1),
+      "prev_segment_conditioning_frames": getattr(config, "prev_segment_conditioning_frames", 5),
       "motion_encode_batch_size": getattr(config, "motion_encode_batch_size", None),
       "guidance_scale": getattr(config, "animate_guidance_scale", 1.0),
   }
@@ -35,6 +39,7 @@ def _frame_summary(name, frames):
 
 
 def run(config):
+  """Run Wan Animate inference and write the generated videos to disk."""
   writer = max_utils.initialize_summary_writer(config)
   if jax.process_index() == 0 and writer:
     max_logging.log(f"TensorBoard logs will be written to: {config.tensorboard_dir}")
@@ -68,9 +73,7 @@ def run(config):
   motion_encoder_size = pipeline.transformer.config.motion_encoder_size
 
   if pose_video_path and face_video_path:
-    max_logging.log(
-        f"Loading preprocessed videos from disk. pose_video={pose_video_path}, face_video={face_video_path}"
-    )
+    max_logging.log(f"Loading preprocessed videos from disk. pose_video={pose_video_path}, face_video={face_video_path}")
     pose_video = load_video(pose_video_path)
     face_video = load_video(face_video_path)
     num_frames = min(num_frames, len(pose_video), len(face_video))
@@ -85,7 +88,9 @@ def run(config):
         "For real outputs provide preprocessed pose_video_path and face_video_path."
     )
     pose_video = [Image.fromarray(np.zeros((height, width, 3), dtype=np.uint8)) for _ in range(num_frames)]
-    face_video = [Image.fromarray(np.zeros((motion_encoder_size, motion_encoder_size, 3), dtype=np.uint8)) for _ in range(num_frames)]
+    face_video = [
+        Image.fromarray(np.zeros((motion_encoder_size, motion_encoder_size, 3), dtype=np.uint8)) for _ in range(num_frames)
+    ]
 
   background_video = None
   mask_video = None
@@ -96,47 +101,37 @@ def run(config):
     mask_video = load_video(mask_video_path)[:num_frames]
 
   max_logging.log(
-      "Wan animate inputs: reference_image=%s, image_size=%s, pose_video_path=%s, face_video_path=%s, %s, %s"
-      % (
-          reference_image_source,
-          getattr(image, "size", None),
-          pose_video_path or "<dummy>",
-          face_video_path or "<dummy>",
-          _frame_summary("pose", pose_video),
-          _frame_summary("face", face_video),
-      )
+      "Wan animate inputs: "
+      f"reference_image={reference_image_source}, "
+      f"image_size={getattr(image, 'size', None)}, "
+      f"pose_video_path={pose_video_path or '<dummy>'}, "
+      f"face_video_path={face_video_path or '<dummy>'}, "
+      f"{_frame_summary('pose', pose_video)}, "
+      f"{_frame_summary('face', face_video)}"
   )
   if mode == "replace":
     max_logging.log(
-        "Wan replace inputs: background_video_path=%s, mask_video_path=%s, %s, %s"
-        % (
-            background_video_path,
-            mask_video_path,
-            _frame_summary("background", background_video),
-            _frame_summary("mask", mask_video),
-        )
+        "Wan replace inputs: "
+        f"background_video_path={background_video_path}, "
+        f"mask_video_path={mask_video_path}, "
+        f"{_frame_summary('background', background_video)}, "
+        f"{_frame_summary('mask', mask_video)}"
     )
 
   animate_settings = _get_animate_inference_settings(config)
   prompt = config.prompt
   negative_prompt = config.negative_prompt if animate_settings["guidance_scale"] > 1.0 else None
 
   max_logging.log(
-      "Num steps: %s, height: %s, width: %s, frames: %s, segment_frame_length: %s, "
-      "prev_segment_conditioning_frames: %s, guidance_scale: %s"
-      % (
-          config.num_inference_steps,
-          height,
-          width,
-          num_frames,
-          animate_settings["segment_frame_length"],
-          animate_settings["prev_segment_conditioning_frames"],
-          animate_settings["guidance_scale"],
-      )
+      "Num steps: "
+      f"{config.num_inference_steps}, height: {height}, width: {width}, frames: {num_frames}, "
+      f"segment_frame_length: {animate_settings['segment_frame_length']}, "
+      f"prev_segment_conditioning_frames: {animate_settings['prev_segment_conditioning_frames']}, "
+      f"guidance_scale: {animate_settings['guidance_scale']}"
   )
 
   s0 = time.perf_counter()
-  
+
   # First pass (compile)
   videos = pipeline(
       image=image,
@@ -155,7 +150,7 @@ def run(config):
       num_inference_steps=config.num_inference_steps,
       mode=mode,
   )
-  
+
   compile_time = time.perf_counter() - s0
   max_logging.log(f"compile_time: {compile_time}")
   if writer and jax.process_index() == 0:
@@ -179,17 +174,17 @@ def run(config):
       num_inference_steps=config.num_inference_steps,
       mode=mode,
   )
-  
+
   generation_time = time.perf_counter() - s0
   max_logging.log(f"generation_time: {generation_time}")
   if writer and jax.process_index() == 0:
     writer.add_scalar("inference/generation_time", generation_time, global_step=0)
 
   filename_prefix = "animate_"
   os.makedirs(config.output_dir, exist_ok=True)
-  for i in range(len(videos)):
+  for i, video in enumerate(videos):
     video_path = os.path.join(config.output_dir, f"{filename_prefix}wan_output_{config.seed}_{i}.mp4")
-    export_to_video(videos[i], video_path, fps=config.fps)
+    export_to_video(video, video_path, fps=config.fps)
     max_logging.log(f"Saved video to {video_path}")
 
   if getattr(config, "enable_profiler", False):
@@ -220,6 +215,7 @@ def run(config):
 
   return videos
 
+
 def main(argv) -> None:
   pyconfig.initialize(argv)
   try:
@@ -228,6 +224,7 @@ def main(argv) -> None:
     pass
   run(pyconfig.config)
 
+
 if __name__ == "__main__":
   with transformer_engine_context():
     app.run(main)