AI-Hypercomputer
diff --git a/‎examples/wan_animate/sample_inputs/animate/image.jpeg‎
118 KB b/‎examples/wan_animate/sample_inputs/animate/image.jpeg‎
118 KB
diff --git a/‎scripts/run_wan_animate_parallelism_sweep.sh‎
Lines changed: 254 additions & 0 deletions b/‎scripts/run_wan_animate_parallelism_sweep.sh‎
Lines changed: 254 additions & 0 deletions
diff --git a/‎src/maxdiffusion/configs/base_wan_animate_27b.yml‎
Lines changed: 10 additions & 15 deletions b/‎src/maxdiffusion/configs/base_wan_animate_27b.yml‎
Lines changed: 10 additions & 15 deletions
diff --git a/‎src/maxdiffusion/generate_wan_animate.py‎
Lines changed: 32 additions & 0 deletions b/‎src/maxdiffusion/generate_wan_animate.py‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎src/maxdiffusion/models/wan/autoencoder_kl_wan.py‎
Lines changed: 3 additions & 6 deletions b/‎src/maxdiffusion/models/wan/autoencoder_kl_wan.py‎
Lines changed: 3 additions & 6 deletions
@@ -0,0 +1,254 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+CONFIG="src/maxdiffusion/configs/base_wan_animate_27b.yml"
+VENV_PATH="/data/maxdiffusion-work/maxdiffusion-venv"
+OUTPUT_ROOT="/data/maxdiffusion-work/outputs/wan-animate-sweeps"
+RUN_PREFIX="wananimate-sweep"
+ENABLE_PROFILER="True"
+SKIP_JAX_DISTRIBUTED_SYSTEM="True"
+NUM_FRAMES_OVERRIDE=""
+DRY_RUN=0
+declare -a SCENARIOS=()
+declare -a EXTRA_OVERRIDES=()
+
+usage() {
+  cat <<'EOF'
+Usage:
+  scripts/run_wan_animate_parallelism_sweep.sh [options]
+
+Runs a sequence of WAN Animate inference jobs with different parallelism layouts.
+Each run gets its own output directory, TensorBoard trace directory, and log file.
+
+Default scenarios:
+  context8:1:1:8:1
+  context4_tensor2:1:1:4:2
+  context2_tensor4:1:1:2:4
+  context2_fsdp4:1:4:2:1
+  fsdp4_tensor2:1:4:1:2
+  cp2_fsdp2_tp2:1:2:2:2
+  fsdp8:1:8:1:1
+
+Scenario format:
+  name:data:fsdp:context:tensor
+
+Options:
+  --config <path>            Config file to run
+  --venv <path>              Virtualenv root containing bin/python
+  --output-root <path>       Root folder for the sweep session
+  --run-prefix <prefix>      Prefix used in run names and session directory
+  --num-frames <int>         Override num_frames for all runs
+  --scenario <spec>          Add a scenario; may be repeated
+  --extra-override <k=v>     Extra config override; may be repeated
+  --no-profiler              Disable profiler for all runs
+  --dry-run                  Print commands without executing them
+  -h, --help                 Show this help
+
+Example:
+  scripts/run_wan_animate_parallelism_sweep.sh \
+    --run-prefix wananimate-xprof \
+    --scenario context8:1:1:8:1 \
+    --scenario context4_tensor2:1:1:4:2
+EOF
+}
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --config)
+      CONFIG="${2:-}"
+      shift 2
+      ;;
+    --venv)
+      VENV_PATH="${2:-}"
+      shift 2
+      ;;
+    --output-root)
+      OUTPUT_ROOT="${2:-}"
+      shift 2
+      ;;
+    --run-prefix)
+      RUN_PREFIX="${2:-}"
+      shift 2
+      ;;
+    --num-frames)
+      NUM_FRAMES_OVERRIDE="${2:-}"
+      shift 2
+      ;;
+    --scenario)
+      SCENARIOS+=("${2:-}")
+      shift 2
+      ;;
+    --extra-override)
+      EXTRA_OVERRIDES+=("${2:-}")
+      shift 2
+      ;;
+    --no-profiler)
+      ENABLE_PROFILER="False"
+      shift
+      ;;
+    --dry-run)
+      DRY_RUN=1
+      shift
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    *)
+      echo "Unknown option: $1" >&2
+      usage
+      exit 1
+      ;;
+  esac
+done
+
+if [[ ${#SCENARIOS[@]} -eq 0 ]]; then
+  SCENARIOS=(
+    "context8:1:1:8:1"
+    "context4_tensor2:1:1:4:2"
+    "context2_tensor4:1:1:2:4"
+    "context2_fsdp4:1:4:2:1"
+    "fsdp4_tensor2:1:4:1:2"
+    "cp2_fsdp2_tp2:1:2:2:2"
+    "fsdp8:1:8:1:1"
+  )
+fi
+
+if [[ ! -f "${CONFIG}" ]]; then
+  echo "Config not found: ${CONFIG}" >&2
+  exit 1
+fi
+
+if [[ ! -x "${VENV_PATH}/bin/python" ]]; then
+  echo "Python not found in venv: ${VENV_PATH}/bin/python" >&2
+  exit 1
+fi
+
+SESSION_ID="$(date -u +%Y%m%d-%H%M%S)"
+SESSION_ROOT="${OUTPUT_ROOT%/}/${RUN_PREFIX}-${SESSION_ID}"
+LOG_DIR="${SESSION_ROOT}/logs"
+mkdir -p "${LOG_DIR}"
+
+SUMMARY_TSV="${SESSION_ROOT}/summary.tsv"
+COMMANDS_SH="${SESSION_ROOT}/commands.sh"
+
+{
+  printf "scenario\trun_name\tstatus\tduration_seconds\toutput_dir\ttensorboard_dir\tlog_file\n"
+} > "${SUMMARY_TSV}"
+
+{
+  echo "#!/usr/bin/env bash"
+  echo "set -euo pipefail"
+  echo
+  echo "# Generated by scripts/run_wan_animate_parallelism_sweep.sh"
+  echo "# Session root: ${SESSION_ROOT}"
+  echo
+} > "${COMMANDS_SH}"
+
+echo "Sweep session root: ${SESSION_ROOT}"
+echo "Summary file: ${SUMMARY_TSV}"
+echo "Commands file: ${COMMANDS_SH}"
+echo "TensorBoard root: ${SESSION_ROOT}"
+echo
+
+for scenario_spec in "${SCENARIOS[@]}"; do
+  IFS=":" read -r scenario_name data_parallelism fsdp_parallelism context_parallelism tensor_parallelism <<< "${scenario_spec}"
+  if [[ -z "${scenario_name}" || -z "${data_parallelism}" || -z "${fsdp_parallelism}" || -z "${context_parallelism}" || -z "${tensor_parallelism}" ]]; then
+    echo "Invalid scenario: ${scenario_spec}" >&2
+    echo "Expected format: name:data:fsdp:context:tensor" >&2
+    exit 1
+  fi
+
+  run_name="${RUN_PREFIX}-${scenario_name}"
+  run_output_dir="${SESSION_ROOT}/artifacts"
+  run_tensorboard_dir="${run_output_dir}/${run_name}/tensorboard"
+  log_file="${LOG_DIR}/${run_name}.log"
+
+  cmd=(
+    "${VENV_PATH}/bin/python"
+    "src/maxdiffusion/generate_wan_animate.py"
+    "${CONFIG}"
+    "run_name=${run_name}"
+    "output_dir=${run_output_dir}"
+    "enable_profiler=${ENABLE_PROFILER}"
+    "skip_jax_distributed_system=${SKIP_JAX_DISTRIBUTED_SYSTEM}"
+    "ici_data_parallelism=${data_parallelism}"
+    "ici_fsdp_parallelism=${fsdp_parallelism}"
+    "ici_context_parallelism=${context_parallelism}"
+    "ici_tensor_parallelism=${tensor_parallelism}"
+  )
+
+  if [[ -n "${NUM_FRAMES_OVERRIDE}" ]]; then
+    cmd+=("num_frames=${NUM_FRAMES_OVERRIDE}")
+  fi
+
+  for extra_override in "${EXTRA_OVERRIDES[@]}"; do
+    cmd+=("${extra_override}")
+  done
+
+  printf "%q " "${cmd[@]}" >> "${COMMANDS_SH}"
+  printf "\n\n" >> "${COMMANDS_SH}"
+
+  echo "========================================================================"
+  echo "Scenario: ${scenario_name}"
+  echo "Run name: ${run_name}"
+  echo "Parallelism: data=${data_parallelism}, fsdp=${fsdp_parallelism}, context=${context_parallelism}, tensor=${tensor_parallelism}"
+  echo "Output dir: ${run_output_dir}/${run_name}"
+  echo "TensorBoard dir: ${run_tensorboard_dir}"
+  echo "Log file: ${log_file}"
+  echo "========================================================================"
+
+  if [[ "${DRY_RUN}" -eq 1 ]]; then
+    printf "DRY RUN: "
+    printf "%q " "${cmd[@]}"
+    printf "\n\n"
+    printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" \
+      "${scenario_name}" \
+      "${run_name}" \
+      "DRY_RUN" \
+      "0" \
+      "${run_output_dir}/${run_name}" \
+      "${run_tensorboard_dir}" \
+      "${log_file}" >> "${SUMMARY_TSV}"
+    continue
+  fi
+
+  start_ts="$(date +%s)"
+  set +e
+  (
+    echo "[$(date -u +%Y-%m-%dT%H:%M:%SZ)] Starting ${run_name}"
+    printf "Command: "
+    printf "%q " "${cmd[@]}"
+    printf "\n"
+    "${cmd[@]}"
+  ) 2>&1 | tee "${log_file}"
+  exit_code=${PIPESTATUS[0]}
+  set -e
+  end_ts="$(date +%s)"
+  duration="$((end_ts - start_ts))"
+
+  if [[ "${exit_code}" -eq 0 ]]; then
+    status="OK"
+  else
+    status="FAIL(${exit_code})"
+  fi
+
+  printf "%s\t%s\t%s\t%s\t%s\t%s\t%s\n" \
+    "${scenario_name}" \
+    "${run_name}" \
+    "${status}" \
+    "${duration}" \
+    "${run_output_dir}/${run_name}" \
+    "${run_tensorboard_dir}" \
+    "${log_file}" >> "${SUMMARY_TSV}"
+
+  echo
+done
+
+chmod +x "${COMMANDS_SH}"
+
+echo "Sweep complete."
+echo "Summary: ${SUMMARY_TSV}"
+echo "Commands: ${COMMANDS_SH}"
+echo "To inspect traces tomorrow:"
+echo "  tensorboard --logdir=${SESSION_ROOT}/artifacts"
@@ -156,22 +156,17 @@ mesh_axes: ['data', 'fsdp', 'context', 'tensor']
 # conv_in : conv.shape[2] weight
 # conv_out : conv.shape[-1] weight
 logical_axis_rules: [
-                      ['batch', 'data'],
-                      ['activation_batch', 'data'],
+                      ['batch', ['data', 'fsdp']],
+                      ['activation_batch', ['data', 'fsdp']],
                       ['activation_self_attn_heads', ['context', 'tensor']],
-                      ['activation_self_attn_q_length', 'context'],
-                      ['activation_self_attn_kv_length', None],
-                      ['activation_cross_attn_q_length', 'context'],
-                      ['activation_cross_attn_heads', 'tensor'],
-                      ['activation_cross_attn_kv_length', None],
+                      ['activation_cross_attn_q_length', ['context', 'tensor']],
                       ['activation_length', 'context'],
                       ['activation_heads', 'tensor'],
-                      ['activation_kv', 'tensor'],
                       ['mlp','tensor'],
-                      ['embed', 'fsdp'],
+                      ['embed', ['context', 'fsdp']],
                       ['heads', 'tensor'],
                       ['norm', 'tensor'],
-                      ['conv_batch', ['data', 'context']],
+                      ['conv_batch', ['data', 'context', 'fsdp']],
                       ['out_channels', 'tensor'],
                       ['conv_out', 'context'],
                     ]
@@ -183,12 +178,12 @@ data_sharding: [['data', 'fsdp', 'context', 'tensor']]
 # and product of the ICI axes should equal number of devices per slice.
 dcn_data_parallelism: 1  # recommended DCN axis to be auto-sharded
 dcn_fsdp_parallelism: 1
-dcn_context_parallelism: 1
+dcn_context_parallelism: -1
 dcn_tensor_parallelism: 1
 ici_data_parallelism: 1
-ici_fsdp_parallelism: 4
-ici_context_parallelism: 1
-ici_tensor_parallelism: 2
+ici_fsdp_parallelism: 1
+ici_context_parallelism: -1  # recommended ICI axis to be auto-sharded
+ici_tensor_parallelism: 1
 
 allow_split_physical_axes: False
 
@@ -294,7 +289,7 @@ negative_prompt: "Bright tones, overexposed, static, blurred details, subtitles,
 do_classifier_free_guidance: True
 height: 720
 width: 1280
-num_frames: 81
+num_frames: 121
 flow_shift: 5.0
 
 # Reference for below guidance scale and boundary values: https://github.com/Wan-Video/Wan2.2/blob/main/wan/configs/wan_t2v_A14B.py
 
@@ -39,6 +39,14 @@ def _get_animate_inference_settings(config):
       "guidance_scale": getattr(config, "animate_guidance_scale", 1.0),
   }
 
+
+def _frame_summary(name, frames):
+  """Return a compact frame-count/size summary for logging."""
+  if not frames:
+    return f"{name}_frames=0"
+  return f"{name}_frames={len(frames)}, {name}_frame_size={getattr(frames[0], 'size', None)}"
+
+
 def run(config):
   writer = max_utils.initialize_summary_writer(config)
   if jax.process_index() == 0 and writer:
@@ -53,9 +61,11 @@ def run(config):
   reference_image_path = getattr(config, "reference_image_path", "")
   if reference_image_path:
     image = load_image(reference_image_path)
+    reference_image_source = reference_image_path
   else:
     image_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/astronaut.jpg"
     image = load_image(image_url)
+    reference_image_source = image_url
 
   mode = getattr(config, "mode", "animate")
   pose_video_path = getattr(config, "pose_video_path", "")
@@ -98,6 +108,28 @@ def run(config):
     background_video = load_video(background_video_path)[:num_frames]
     mask_video = load_video(mask_video_path)[:num_frames]
 
+  max_logging.log(
+      "Wan animate inputs: reference_image=%s, image_size=%s, pose_video_path=%s, face_video_path=%s, %s, %s"
+      % (
+          reference_image_source,
+          getattr(image, "size", None),
+          pose_video_path or "<dummy>",
+          face_video_path or "<dummy>",
+          _frame_summary("pose", pose_video),
+          _frame_summary("face", face_video),
+      )
+  )
+  if mode == "replace":
+    max_logging.log(
+        "Wan replace inputs: background_video_path=%s, mask_video_path=%s, %s, %s"
+        % (
+            background_video_path,
+            mask_video_path,
+            _frame_summary("background", background_video),
+            _frame_summary("mask", mask_video),
+        )
+    )
+
   animate_settings = _get_animate_inference_settings(config)
   prompt = config.prompt
   negative_prompt = config.negative_prompt if animate_settings["guidance_scale"] > 1.0 else None
 
@@ -20,7 +20,6 @@
 import jax
 import jax.numpy as jnp
 from jax import tree_util
-from jax.sharding import NamedSharding, PartitionSpec as P
 from flax import nnx
 from ...configuration_utils import ConfigMixin
 from ..modeling_flax_utils import FlaxModelMixin, get_activation
@@ -100,10 +99,10 @@ def __init__(
 
     self.mesh = mesh
     # Set sharding dynamically based on out_channels.
-    num_vae_spatial_devices = mesh.shape["vae_spatial"]
+    num_context_axis_devices = mesh.shape["context"]
     kernel_sharding = (None, None, None, None, None)
-    if out_channels % num_vae_spatial_devices == 0:
-      kernel_sharding = (None, None, None, None, "vae_spatial")
+    if out_channels % num_context_axis_devices == 0:
+      kernel_sharding = (None, None, None, None, "conv_out")
 
     self.conv = nnx.Conv(
         in_features=in_channels,
@@ -120,8 +119,6 @@ def __init__(
     )
 
   def __call__(self, x: jax.Array, cache_x: Optional[jax.Array] = None, idx=-1) -> jax.Array:
-    # Shard the widest activation dimension across the dedicated VAE mesh.
-    x = jax.lax.with_sharding_constraint(x, NamedSharding(self.mesh, P(None, None, None, "vae_spatial", None)))
     current_padding = list(self._causal_padding)  # Mutable copy
     padding_needed = self._depth_padding_before